[CPU] Enable bf16 RNN primitives (#4942)
This commit is contained in:
parent
8bb73273f1
commit
39e1a21c42
@ -5,12 +5,17 @@
|
|||||||
#include "mkldnn_rnn.h"
|
#include "mkldnn_rnn.h"
|
||||||
#include "mkldnn_extension_utils.h"
|
#include "mkldnn_extension_utils.h"
|
||||||
|
|
||||||
|
#include "mkldnn_node.h"
|
||||||
#include "utils/general_utils.h"
|
#include "utils/general_utils.h"
|
||||||
#include "nodes/common/cpu_memcpy.h"
|
#include "nodes/common/cpu_memcpy.h"
|
||||||
|
#include "utils/bfloat16.hpp"
|
||||||
|
#include "nodes/common/cpu_convert.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " layer '" << getName() << "' "
|
||||||
|
|
||||||
using namespace mkldnn;
|
using namespace mkldnn;
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
|
|
||||||
@ -39,7 +44,7 @@ static algorithm ie2mkl(RNNCellBase::CellType cell_type) {
|
|||||||
case RNNCellBase::GRU: return algorithm::vanilla_gru;
|
case RNNCellBase::GRU: return algorithm::vanilla_gru;
|
||||||
case RNNCellBase::GRU_LBR: return algorithm::lbr_gru;
|
case RNNCellBase::GRU_LBR: return algorithm::lbr_gru;
|
||||||
default:
|
default:
|
||||||
IE_THROW() << "Unsupported cell type";
|
IE_THROW() << "RNN node. Unsupported cell type";
|
||||||
return algorithm::undef;
|
return algorithm::undef;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -51,7 +56,7 @@ size_t gatesCount(algorithm alg) {
|
|||||||
case algorithm::lbr_gru: return 3;
|
case algorithm::lbr_gru: return 3;
|
||||||
case algorithm::vanilla_lstm: return 4;
|
case algorithm::vanilla_lstm: return 4;
|
||||||
default:
|
default:
|
||||||
IE_THROW() << "Unsupported cell type";
|
IE_THROW() << "RNN node. Unsupported cell type";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -63,11 +68,24 @@ size_t statesCount(algorithm alg) {
|
|||||||
case algorithm::lbr_gru: return 1;
|
case algorithm::lbr_gru: return 1;
|
||||||
case algorithm::vanilla_lstm: return 2;
|
case algorithm::vanilla_lstm: return 2;
|
||||||
default:
|
default:
|
||||||
IE_THROW() << "Unsupported cell type";
|
IE_THROW() << "RNN node. Unsupported cell type";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool haveCellState(algorithm alg) {
|
||||||
|
return alg == algorithm::vanilla_lstm;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::map<InferenceEngine::Precision, InferenceEngine::Precision> MKLDNNRNN::weightsByLayerPrec {
|
||||||
|
// layer precision, weights precision
|
||||||
|
{InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32},
|
||||||
|
{InferenceEngine::Precision::BF16, InferenceEngine::Precision::BF16},
|
||||||
|
// FP16 and U8 are not supported yet
|
||||||
|
// {InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP16},
|
||||||
|
// {InferenceEngine::Precision::U8, InferenceEngine::Precision::I8},
|
||||||
|
};
|
||||||
|
|
||||||
MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||||
MKLDNNNode(layer, eng, cache) {
|
MKLDNNNode(layer, eng, cache) {
|
||||||
is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell");
|
is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell");
|
||||||
@ -78,6 +96,8 @@ bool MKLDNNRNN::created() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNRNN::getSupportedDescriptors() {
|
void MKLDNNRNN::getSupportedDescriptors() {
|
||||||
|
runtimePrecision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||||
|
|
||||||
if (is_cell)
|
if (is_cell)
|
||||||
fillCellDesc();
|
fillCellDesc();
|
||||||
else
|
else
|
||||||
@ -89,14 +109,14 @@ void MKLDNNRNN::fillCellDesc() {
|
|||||||
auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer());
|
auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer());
|
||||||
|
|
||||||
if (!cellLayer)
|
if (!cellLayer)
|
||||||
IE_THROW() << "No original layer for RNNCell.";
|
THROW_ERROR << "No original layer for RNNCell.";
|
||||||
|
|
||||||
cell_type = ie2mkl(cellLayer->cellType);
|
cell_type = ie2mkl(cellLayer->cellType);
|
||||||
cell_act = ie2mkl(cellLayer->activations[0]); // Works only for RNN with one gate
|
cell_act = ie2mkl(cellLayer->activations[0]); // Works only for RNN with one gate
|
||||||
|
|
||||||
if (cellLayer->clip != 0.0f) {
|
if (cellLayer->clip != 0.0f) {
|
||||||
// TODO [oneDNN]: No more supported
|
// TODO [oneDNN]: No more supported
|
||||||
IE_THROW() << "Clipping is not supported for RNN primitive";
|
THROW_ERROR << "Clipping is not supported for RNN primitive";
|
||||||
// cell_desc.set_clipping(cellLayer->clip);
|
// cell_desc.set_clipping(cellLayer->clip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,16 +124,16 @@ void MKLDNNRNN::fillCellDesc() {
|
|||||||
auto &outs = cellLayer->outData;
|
auto &outs = cellLayer->outData;
|
||||||
|
|
||||||
if (!one_of(ins.size(), 3, 2))
|
if (!one_of(ins.size(), 3, 2))
|
||||||
IE_THROW() << "Incorrect number of input ports for layer " << getName();
|
THROW_ERROR << "Incorrect number of input ports for layer " << getName();
|
||||||
if (!one_of(outs.size(), 2, 1))
|
if (!one_of(outs.size(), 2, 1))
|
||||||
IE_THROW() << "Incorrect number of output ports for layer " << getName();
|
THROW_ERROR << "Incorrect number of output ports for layer " << getName();
|
||||||
|
|
||||||
auto in_data_dims = getParentEdgeAt(0)->getDims();
|
auto in_data_dims = getParentEdgeAt(0)->getDims();
|
||||||
auto in_h_state_dims = getParentEdgeAt(1)->getDims();
|
auto in_h_state_dims = getParentEdgeAt(1)->getDims();
|
||||||
auto out_h_state_dims = getChildEdgeAt(0)->getDims();
|
auto out_h_state_dims = getChildEdgeAt(0)->getDims();
|
||||||
|
|
||||||
if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
|
if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
|
||||||
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
|
THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
|
||||||
|
|
||||||
G = gatesCount(cell_type);
|
G = gatesCount(cell_type);
|
||||||
S = statesCount(cell_type);
|
S = statesCount(cell_type);
|
||||||
@ -130,7 +150,7 @@ void MKLDNNRNN::fillCellDesc() {
|
|||||||
if (in_data_dims != D_shape
|
if (in_data_dims != D_shape
|
||||||
|| in_h_state_dims != S_shape
|
|| in_h_state_dims != S_shape
|
||||||
|| out_h_state_dims != S_shape)
|
|| out_h_state_dims != S_shape)
|
||||||
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
|
THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
|
||||||
|
|
||||||
if (S == 2) {
|
if (S == 2) {
|
||||||
auto in_c_state_dims = getParentEdgeAt(2)->getDims();
|
auto in_c_state_dims = getParentEdgeAt(2)->getDims();
|
||||||
@ -138,7 +158,7 @@ void MKLDNNRNN::fillCellDesc() {
|
|||||||
|
|
||||||
if (in_c_state_dims != S_shape
|
if (in_c_state_dims != S_shape
|
||||||
|| out_c_state_dims != S_shape)
|
|| out_c_state_dims != S_shape)
|
||||||
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
|
THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto blobs = cellLayer->blobs;
|
auto blobs = cellLayer->blobs;
|
||||||
@ -147,40 +167,53 @@ void MKLDNNRNN::fillCellDesc() {
|
|||||||
if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
|
if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
|
||||||
|
|
||||||
if (!weights)
|
if (!weights)
|
||||||
IE_THROW() << "RNN Layer. Weights do not present.";
|
THROW_ERROR << "RNN Layer. Weights do not present.";
|
||||||
|
|
||||||
if (weights->size() != G * SC * (SC + DC))
|
if (weights->size() != G * SC * (SC + DC))
|
||||||
IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC);
|
THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
|
||||||
|
|
||||||
if (bias && bias->size() != Gb * SC)
|
if (bias && bias->size() != Gb * SC)
|
||||||
IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC;
|
THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
|
||||||
|
|
||||||
|
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
|
||||||
|
|
||||||
|
// layer input plus states
|
||||||
|
in_data_d.resize(S + 1);
|
||||||
|
out_data_d.resize(S + 1);
|
||||||
|
|
||||||
// Shapes and Attributes are correct. Can start internal stuff initialization.
|
// Shapes and Attributes are correct. Can start internal stuff initialization.
|
||||||
for (size_t i = 0; i < S; i++) {
|
in_data_d[RNNInOutKind::Layer] = {{T, N, DC}, dataType, memory::format_tag::tnc};
|
||||||
in_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
|
out_data_d[RNNInOutKind::Layer] = {{T, N, SC}, dataType, memory::format_tag::tnc};
|
||||||
out_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
|
|
||||||
|
in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
|
||||||
|
out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
|
||||||
|
|
||||||
|
if (haveCellState(cell_type)) {
|
||||||
|
in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
|
||||||
|
out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
|
||||||
}
|
}
|
||||||
|
|
||||||
in_data_d = {{T, N, DC}, memory::data_type::f32, memory::format_tag::tnc};;
|
w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
|
||||||
out_data_d = {{T, N, SC}, memory::data_type::f32, memory::format_tag::tnc};;
|
w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
|
||||||
|
|
||||||
w_data_d = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
|
|
||||||
w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
|
|
||||||
|
|
||||||
if (bias)
|
if (bias)
|
||||||
w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
|
w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
|
||||||
|
|
||||||
std::vector<TensorDesc> in_candidate, out_candidate;
|
std::vector<TensorDesc> in_candidate, out_candidate;
|
||||||
std::vector<memory::format_tag> outputFormats;
|
in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc});
|
||||||
in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, memory::data_type::f32, memory::format_tag::nc});
|
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
|
||||||
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
|
||||||
out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
|
||||||
outputFormats.emplace_back(memory::format_tag::nc);
|
|
||||||
|
|
||||||
if (S == 2) {
|
if (haveCellState(cell_type)) {
|
||||||
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
||||||
out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
||||||
outputFormats.emplace_back(memory::format_tag::nc);
|
}
|
||||||
|
|
||||||
|
Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
|
||||||
|
|
||||||
|
if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
|
||||||
|
if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
|
||||||
|
convertWeightsBlobToBF16();
|
||||||
}
|
}
|
||||||
|
|
||||||
createDescriptor(in_candidate, out_candidate);
|
createDescriptor(in_candidate, out_candidate);
|
||||||
@ -191,10 +224,10 @@ void MKLDNNRNN::fillSeqDesc() {
|
|||||||
auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer());
|
auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer());
|
||||||
|
|
||||||
if (!rnnLayer)
|
if (!rnnLayer)
|
||||||
IE_THROW() << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
|
THROW_ERROR << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
|
||||||
|
|
||||||
if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
|
if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
|
||||||
IE_THROW() << "RNN layer supports only LSTM/GRU/RNN cell";
|
THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell";
|
||||||
|
|
||||||
cell_type = ie2mkl(rnnLayer->cellType);
|
cell_type = ie2mkl(rnnLayer->cellType);
|
||||||
cell_act = algorithm::undef;
|
cell_act = algorithm::undef;
|
||||||
@ -203,31 +236,31 @@ void MKLDNNRNN::fillSeqDesc() {
|
|||||||
|
|
||||||
// TODO [oneDNN]: No more supported
|
// TODO [oneDNN]: No more supported
|
||||||
if (rnnLayer->clip != 0.0f) {
|
if (rnnLayer->clip != 0.0f) {
|
||||||
IE_THROW() << "Clipping is not supported for RNN primitive";
|
THROW_ERROR << "Clipping is not supported for RNN primitive";
|
||||||
// cell_desc.set_clipping(rnnLayer->clip);
|
// cell_desc.set_clipping(rnnLayer->clip);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!one_of(rnnLayer->axis, 0, 1))
|
if (!one_of(rnnLayer->axis, 0, 1))
|
||||||
IE_THROW() << "RNN layer supports only sequence axis 0 or 1";
|
THROW_ERROR << "RNN layer supports only sequence axis 0 or 1";
|
||||||
nativeOrder = rnnLayer->axis == 0;
|
nativeOrder = rnnLayer->axis == 0;
|
||||||
|
|
||||||
if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD))
|
if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD))
|
||||||
IE_THROW() << "RNN layer supports only unidirectional RNN layer";
|
THROW_ERROR << "RNN layer supports only unidirectional RNN layer";
|
||||||
direction = ie2mkl(rnnLayer->direction);
|
direction = ie2mkl(rnnLayer->direction);
|
||||||
|
|
||||||
auto &ins = rnnLayer->insData;
|
auto &ins = rnnLayer->insData;
|
||||||
auto &outs = rnnLayer->outData;
|
auto &outs = rnnLayer->outData;
|
||||||
|
|
||||||
if (!one_of(ins.size(), 3, 2, 1))
|
if (!one_of(ins.size(), 3, 2, 1))
|
||||||
IE_THROW() << "Incorrect number of input ports for layer " << getName();
|
THROW_ERROR << "Incorrect number of input ports for layer " << getName();
|
||||||
if (!one_of(outs.size(), 3, 2, 1))
|
if (!one_of(outs.size(), 3, 2, 1))
|
||||||
IE_THROW() << "Incorrect number of output ports for layer " << getName();
|
THROW_ERROR << "Incorrect number of output ports for layer " << getName();
|
||||||
|
|
||||||
auto in_data_dims = getParentEdgeAt(0)->getDims();
|
auto in_data_dims = getParentEdgeAt(0)->getDims();
|
||||||
auto out_data_dims = getChildEdgeAt(0)->getDims();
|
auto out_data_dims = getChildEdgeAt(0)->getDims();
|
||||||
|
|
||||||
if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3)
|
if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3)
|
||||||
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
|
THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
|
||||||
|
|
||||||
if (!nativeOrder) {
|
if (!nativeOrder) {
|
||||||
std::swap(in_data_dims[0], in_data_dims[1]);
|
std::swap(in_data_dims[0], in_data_dims[1]);
|
||||||
@ -246,125 +279,153 @@ void MKLDNNRNN::fillSeqDesc() {
|
|||||||
MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
|
MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
|
||||||
|
|
||||||
if (out_data_dims != OD_shape)
|
if (out_data_dims != OD_shape)
|
||||||
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
|
THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
|
||||||
|
|
||||||
in_states_d.resize(S);
|
auto& blobs = rnnLayer->blobs;
|
||||||
out_states_d.resize(S);
|
|
||||||
|
|
||||||
for (int i = 1; i < ins.size(); i++) {
|
|
||||||
if (getParentEdgeAt(i)->getDims() != S_shape)
|
|
||||||
IE_THROW() << "Incorrect shape of state ports for layer " << getName();
|
|
||||||
in_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 1; i < outs.size(); i++) {
|
|
||||||
if (getChildEdgeAt(i)->getDims() != S_shape)
|
|
||||||
IE_THROW() << "Incorrect shape of state ports for layer " << getName();
|
|
||||||
out_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
|
|
||||||
}
|
|
||||||
|
|
||||||
auto blobs = rnnLayer->blobs;
|
|
||||||
Blob::Ptr weights, bias;
|
Blob::Ptr weights, bias;
|
||||||
if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
|
if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
|
||||||
if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
|
if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
|
||||||
|
|
||||||
if (!weights)
|
if (!weights)
|
||||||
IE_THROW() << "RNN Layer. Weights do not present.";
|
THROW_ERROR << "RNN Layer. Weights do not present.";
|
||||||
|
|
||||||
if (weights->size() != G * SC * (SC + DC))
|
if (weights->size() != G * SC * (SC + DC))
|
||||||
IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC);
|
THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
|
||||||
|
|
||||||
w_data_d = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
|
for (int i = 1; i < ins.size(); i++) {
|
||||||
w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
|
if (getParentEdgeAt(i)->getDims() != S_shape)
|
||||||
|
THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i < outs.size(); i++) {
|
||||||
|
if (getChildEdgeAt(i)->getDims() != S_shape)
|
||||||
|
THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
// layer input plus states
|
||||||
|
in_data_d.resize(S + 1);
|
||||||
|
out_data_d.resize(S + 1);
|
||||||
|
|
||||||
|
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
|
||||||
|
|
||||||
|
// Try to create descriptor and corresponding configuration
|
||||||
|
in_data_d[RNNInOutKind::Layer] = {in_data_dims, dataType, memory::format_tag::tnc};
|
||||||
|
out_data_d[RNNInOutKind::Layer] = {out_data_dims, dataType, memory::format_tag::tnc};
|
||||||
|
|
||||||
|
in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
|
||||||
|
out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
|
||||||
|
|
||||||
|
if (haveCellState(cell_type)) {
|
||||||
|
in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
|
||||||
|
out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
|
||||||
|
}
|
||||||
|
|
||||||
|
w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
|
||||||
|
w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
|
||||||
|
|
||||||
if (bias && bias->size() != Gb * SC)
|
if (bias && bias->size() != Gb * SC)
|
||||||
IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC;
|
THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
|
||||||
|
|
||||||
if (bias)
|
if (bias)
|
||||||
w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
|
w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
|
||||||
|
|
||||||
// Try to create descriptor and corresponding configuration
|
std::vector<TensorDesc> in_candidate, out_candidate;
|
||||||
in_data_d = {in_data_dims, memory::data_type::f32, memory::format_tag::tnc};
|
|
||||||
out_data_d = {out_data_dims, memory::data_type::f32, memory::format_tag::tnc};
|
|
||||||
|
|
||||||
std::vector<TensorDesc> in_candidate;
|
|
||||||
if (nativeOrder)
|
|
||||||
in_candidate.push_back(in_data_d);
|
|
||||||
else
|
|
||||||
in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, memory::data_type::f32, memory::format_tag::ntc});
|
|
||||||
|
|
||||||
for (int i = 1; i < ins.size(); i++)
|
|
||||||
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
|
|
||||||
|
|
||||||
std::vector<TensorDesc> out_candidate;
|
|
||||||
if (nativeOrder) {
|
if (nativeOrder) {
|
||||||
out_candidate.push_back(out_data_d);
|
in_candidate.push_back(in_data_d[RNNInOutKind::Layer]);
|
||||||
|
out_candidate.push_back(out_data_d[RNNInOutKind::Layer]);
|
||||||
} else {
|
} else {
|
||||||
out_candidate.push_back(MKLDNNMemoryDesc{{N, T, SC}, memory::data_type::f32, memory::format_tag::ntc});
|
in_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc});
|
||||||
|
out_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, SC}, dataType, memory::format_tag::ntc});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 1; i < outs.size(); i++) {
|
in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
|
||||||
|
out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
|
||||||
|
|
||||||
|
if (haveCellState(cell_type)) {
|
||||||
|
in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
|
||||||
out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
|
out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
|
||||||
|
|
||||||
|
if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
|
||||||
|
if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
|
||||||
|
convertWeightsBlobToBF16();
|
||||||
|
}
|
||||||
|
|
||||||
createDescriptor(in_candidate, out_candidate);
|
createDescriptor(in_candidate, out_candidate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MKLDNNRNN::convertWeightsBlobToBF16() {
|
||||||
|
Blob::Ptr &weights = getCnnLayer()->blobs["weights"];
|
||||||
|
MemoryBlob::Ptr cur_weights = as<MemoryBlob>(weights);
|
||||||
|
TensorDesc td(Precision::BF16, cur_weights->getTensorDesc().getDims(), cur_weights->getTensorDesc().getLayout());
|
||||||
|
MemoryBlob::Ptr new_weights_blob = make_shared_blob<uint16_t>(td);
|
||||||
|
|
||||||
|
new_weights_blob->allocate();
|
||||||
|
bfloat16_t *dst = new_weights_blob->wmap();
|
||||||
|
|
||||||
|
float* fp32src = cur_weights->rmap().as<float*>();
|
||||||
|
cpu_convert(fp32src, dst, Precision::FP32, Precision::BF16, new_weights_blob->size());
|
||||||
|
weights = new_weights_blob;
|
||||||
|
}
|
||||||
|
|
||||||
void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
|
void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
|
||||||
const std::vector<TensorDesc> &outputDesc) {
|
const std::vector<TensorDesc> &outputDesc) {
|
||||||
switch (cell_type) {
|
switch (cell_type) {
|
||||||
case mkldnn::algorithm::vanilla_rnn: {
|
case mkldnn::algorithm::vanilla_rnn: {
|
||||||
MKLDNNDescriptor desc(std::shared_ptr<vanilla_rnn_forward::desc>(
|
MKLDNNDescriptor desc(std::shared_ptr<vanilla_rnn_forward::desc>(
|
||||||
new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction,
|
new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction,
|
||||||
/* In Data */ in_data_d,
|
/* In Data */ in_data_d[RNNInOutKind::Layer],
|
||||||
/* In State */ in_states_d[0],
|
/* In State */ in_data_d[RNNInOutKind::HiddenState],
|
||||||
/* Weights data */ w_data_d,
|
/* Weights data */ w_data_d,
|
||||||
/* Weights state */ w_state_d,
|
/* Weights state */ w_state_d,
|
||||||
/* Bias */ w_bias_d,
|
/* Bias */ w_bias_d,
|
||||||
/* Out Data */ out_data_d,
|
/* Out Data */ out_data_d[RNNInOutKind::Layer],
|
||||||
/* Out State */ out_states_d[0])));
|
/* Out State */ out_data_d[RNNInOutKind::HiddenState])));
|
||||||
descs.push_back(desc);
|
descs.push_back(desc);
|
||||||
} break;
|
} break;
|
||||||
case mkldnn::algorithm::vanilla_gru: {
|
case mkldnn::algorithm::vanilla_gru: {
|
||||||
MKLDNNDescriptor desc(std::shared_ptr<gru_forward::desc>(
|
MKLDNNDescriptor desc(std::shared_ptr<gru_forward::desc>(
|
||||||
new gru_forward::desc(prop_kind::forward_scoring, direction,
|
new gru_forward::desc(prop_kind::forward_scoring, direction,
|
||||||
/* In Data */ in_data_d,
|
/* In Data */ in_data_d[RNNInOutKind::Layer],
|
||||||
/* In State */ in_states_d[0],
|
/* In State */ in_data_d[RNNInOutKind::HiddenState],
|
||||||
/* Weights data */ w_data_d,
|
/* Weights data */ w_data_d,
|
||||||
/* Weights state */ w_state_d,
|
/* Weights state */ w_state_d,
|
||||||
/* Bias */ w_bias_d,
|
/* Bias */ w_bias_d,
|
||||||
/* Out Data */ out_data_d,
|
/* Out Data */ out_data_d[RNNInOutKind::Layer],
|
||||||
/* Out State */ out_states_d[0])));
|
/* Out State */ out_data_d[RNNInOutKind::HiddenState])));
|
||||||
descs.push_back(desc);
|
descs.push_back(desc);
|
||||||
} break;
|
} break;
|
||||||
case mkldnn::algorithm::lbr_gru: {
|
case mkldnn::algorithm::lbr_gru: {
|
||||||
MKLDNNDescriptor desc(std::shared_ptr<lbr_gru_forward::desc>(
|
MKLDNNDescriptor desc(std::shared_ptr<lbr_gru_forward::desc>(
|
||||||
new lbr_gru_forward::desc(prop_kind::forward_scoring, direction,
|
new lbr_gru_forward::desc(prop_kind::forward_scoring, direction,
|
||||||
/* In Data */ in_data_d,
|
/* In Data */ in_data_d[RNNInOutKind::Layer],
|
||||||
/* In State */ in_states_d[0],
|
/* In State */ in_data_d[RNNInOutKind::HiddenState],
|
||||||
/* Weights data */ w_data_d,
|
/* Weights data */ w_data_d,
|
||||||
/* Weights state */ w_state_d,
|
/* Weights state */ w_state_d,
|
||||||
/* Bias */ w_bias_d,
|
/* Bias */ w_bias_d,
|
||||||
/* Out Data */ out_data_d,
|
/* Out Data */ out_data_d[RNNInOutKind::Layer],
|
||||||
/* Out State */ out_states_d[0])));
|
/* Out State */ out_data_d[RNNInOutKind::HiddenState])));
|
||||||
descs.push_back(desc);
|
descs.push_back(desc);
|
||||||
} break;
|
} break;
|
||||||
case mkldnn::algorithm::vanilla_lstm: {
|
case mkldnn::algorithm::vanilla_lstm: {
|
||||||
MKLDNNDescriptor desc(std::shared_ptr<lstm_forward::desc>(
|
MKLDNNDescriptor desc(std::shared_ptr<lstm_forward::desc>(
|
||||||
new lstm_forward::desc(prop_kind::forward_scoring, direction,
|
new lstm_forward::desc(prop_kind::forward_scoring, direction,
|
||||||
/* In Data */ in_data_d,
|
/* In Data */ in_data_d[RNNInOutKind::Layer],
|
||||||
/* In State H */ in_states_d[0],
|
/* In State */ in_data_d[RNNInOutKind::HiddenState],
|
||||||
/* In State C */ in_states_d[1],
|
/* In State C */ in_data_d[RNNInOutKind::CellState],
|
||||||
/* Weights data */ w_data_d,
|
/* Weights data */ w_data_d,
|
||||||
/* Weights state */ w_state_d,
|
/* Weights state */ w_state_d,
|
||||||
/* Bias */ w_bias_d,
|
/* Bias */ w_bias_d,
|
||||||
/* Out Data */ out_data_d,
|
/* Out Data */ out_data_d[RNNInOutKind::Layer],
|
||||||
/* Out State H */ out_states_d[0],
|
/* Out State */ out_data_d[RNNInOutKind::HiddenState],
|
||||||
/* Out State C */ out_states_d[1])));
|
/* Out State C */ out_data_d[RNNInOutKind::CellState])));
|
||||||
descs.push_back(desc);
|
descs.push_back(desc);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
IE_THROW() << "Unknown cell type";
|
THROW_ERROR << "Unknown cell type";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill supported config
|
// Fill supported config
|
||||||
@ -389,48 +450,41 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
|
|||||||
supportedPrimitiveDescriptors.emplace_back(config, ref_any);
|
supportedPrimitiveDescriptors.emplace_back(config, ref_any);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) {
|
||||||
|
if (!weightsByLayerPrec.count(layerPrec))
|
||||||
|
THROW_ERROR << "Unsupported layer precision " << layerPrec;
|
||||||
|
return weightsPrec == weightsByLayerPrec.at(layerPrec);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MKLDNNRNN::verifyWeights() {
|
||||||
|
auto layer = getCnnLayer();
|
||||||
|
auto weightsIt = layer->blobs.find("weights");
|
||||||
|
|
||||||
|
if (weightsIt == layer->blobs.end())
|
||||||
|
THROW_ERROR << "Missed weights blob.";
|
||||||
|
|
||||||
|
const auto& weightsPrec = weightsIt->second->getTensorDesc().getPrecision();
|
||||||
|
|
||||||
|
if (!verifyWeightsPrecision(runtimePrecision, weightsPrec)) {
|
||||||
|
THROW_ERROR << "Weights precision " << weightsPrec <<
|
||||||
|
" does not match runtime precision" << runtimePrecision;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MKLDNNRNN::verifyBiases() {
|
||||||
|
auto layer = getCnnLayer();
|
||||||
|
if (layer->blobs.find("biases") != layer->blobs.end()
|
||||||
|
&& layer->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32)
|
||||||
|
THROW_ERROR << "Invalid biases precision: " << layer->blobs["biases"]->getTensorDesc().getPrecision();
|
||||||
|
}
|
||||||
|
|
||||||
void MKLDNNRNN::createPrimitive() {
|
void MKLDNNRNN::createPrimitive() {
|
||||||
if (prim) return;
|
if (prim) return;
|
||||||
|
|
||||||
std::string errorPrefix = "RNN layer '" + getCnnLayer()->name + "'";
|
verifyWeights();
|
||||||
auto weightsIt = getCnnLayer()->blobs.find("weights");
|
verifyBiases();
|
||||||
if (weightsIt == getCnnLayer()->blobs.end())
|
|
||||||
IE_THROW() << errorPrefix << " does not have weights blob.";
|
|
||||||
if (weightsIt->second->getTensorDesc().getPrecision() != Precision::FP32)
|
|
||||||
IE_THROW() << errorPrefix << " has invalid weights precision: " << weightsIt->second->getTensorDesc().getPrecision();
|
|
||||||
if (getCnnLayer()->blobs.find("biases") != getCnnLayer()->blobs.end()
|
|
||||||
&& getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32)
|
|
||||||
IE_THROW() << errorPrefix << " has invalid biases precision: " << getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision();
|
|
||||||
|
|
||||||
auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine());
|
/*
|
||||||
|
|
||||||
auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
|
|
||||||
auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();
|
|
||||||
|
|
||||||
// create weight blobs (data and state part)
|
|
||||||
auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
|
|
||||||
w_data_mem->Create(w_data_d);
|
|
||||||
internalBlobMemory.push_back(w_data_mem);
|
|
||||||
|
|
||||||
auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
|
|
||||||
w_state_mem->Create(w_state_d);
|
|
||||||
internalBlobMemory.push_back(w_state_mem);
|
|
||||||
|
|
||||||
auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
|
|
||||||
w_bias_mem->Create(w_bias_d);
|
|
||||||
internalBlobMemory.push_back(w_bias_mem);
|
|
||||||
|
|
||||||
{
|
|
||||||
/* Copy Weight data
|
|
||||||
* IE format:
|
|
||||||
* W - [gates, out_state_size, in_data_size + in_state_size]
|
|
||||||
* B - [gates, out_state_size]
|
|
||||||
*
|
|
||||||
* MKLDNN format:
|
|
||||||
* W - [1, 1, in_date_size, gates, out_state_size]
|
|
||||||
* R - [1, 1, in_state_size, gates, out_state_size]
|
|
||||||
* B - [gates, out_state_size]
|
|
||||||
*
|
|
||||||
* Gate order
|
* Gate order
|
||||||
* ====== LSTM ======
|
* ====== LSTM ======
|
||||||
* Caffe - IFOC, ONNX - IOFC
|
* Caffe - IFOC, ONNX - IOFC
|
||||||
@ -449,39 +503,99 @@ void MKLDNNRNN::createPrimitive() {
|
|||||||
if (cell_type == algorithm::vanilla_lstm) {
|
if (cell_type == algorithm::vanilla_lstm) {
|
||||||
gate_map = gate_map_lstm;
|
gate_map = gate_map_lstm;
|
||||||
if (G > gate_map_lstm_size) {
|
if (G > gate_map_lstm_size) {
|
||||||
IE_THROW() << "G isn't equal to the size of gate_map";
|
THROW_ERROR << "G isn't equal to the size of gate_map";
|
||||||
}
|
}
|
||||||
} else if (cell_type == algorithm::vanilla_gru) {
|
} else if (cell_type == algorithm::vanilla_gru) {
|
||||||
gate_map = gate_map_gru;
|
gate_map = gate_map_gru;
|
||||||
if (G > gate_map_gru_size) {
|
if (G > gate_map_gru_size) {
|
||||||
IE_THROW() << "G isn't equal to the size of gate_map";
|
THROW_ERROR << "G isn't equal to the size of gate_map";
|
||||||
}
|
}
|
||||||
} else if (cell_type == algorithm::lbr_gru) {
|
} else if (cell_type == algorithm::lbr_gru) {
|
||||||
gate_map = gate_map_gru;
|
gate_map = gate_map_gru;
|
||||||
if (G > gate_map_gru_size) {
|
if (G > gate_map_gru_size) {
|
||||||
IE_THROW() << "G isn't equal to the size of gate_map";
|
THROW_ERROR << "G isn't equal to the size of gate_map";
|
||||||
}
|
}
|
||||||
} else if (cell_type == algorithm::vanilla_rnn) {
|
} else if (cell_type == algorithm::vanilla_rnn) {
|
||||||
gate_map = gate_map_rnn;
|
gate_map = gate_map_rnn;
|
||||||
if (G > gate_map_rnn_size) {
|
if (G > gate_map_rnn_size) {
|
||||||
IE_THROW() << "G isn't equal to the size of gate_map";
|
THROW_ERROR << "G isn't equal to the size of gate_map";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
gate_map = gate_map_gru;
|
gate_map = gate_map_gru;
|
||||||
if (G > gate_map_gru_size) {
|
if (G > gate_map_gru_size) {
|
||||||
IE_THROW() << "G isn't equal to the size of gate_map";
|
THROW_ERROR << "G isn't equal to the size of gate_map";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const float*>();
|
if (runtimePrecision == Precision::BF16)
|
||||||
auto w_ptr = static_cast<float*>(w_data_mem->GetData());
|
fillWeights<bfloat16_t>(gate_map);
|
||||||
auto r_ptr = static_cast<float*>(w_state_mem->GetData());
|
else if (runtimePrecision == Precision::FP32)
|
||||||
|
fillWeights<float>(gate_map);
|
||||||
|
else // TODO FP16 and INT8 support
|
||||||
|
THROW_ERROR << "Unsupported data type";
|
||||||
|
|
||||||
|
if (runtimePrecision == Precision::BF16 ||
|
||||||
|
runtimePrecision == Precision::FP32)
|
||||||
|
fillBiases<float>(gate_map);
|
||||||
|
|
||||||
|
auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine());
|
||||||
|
prim.reset(new mkldnn::primitive(pd));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IE format:
|
||||||
|
* B - [gates, out_state_size]
|
||||||
|
*
|
||||||
|
* MKLDNN format:
|
||||||
|
* B - [gates, out_state_size]
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Prec>
|
||||||
|
void MKLDNNRNN::fillBiases(const int *gate_map) {
|
||||||
|
if (!w_bias_d)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||||
|
w_bias_mem->Create(w_bias_d);
|
||||||
|
internalBlobMemory.push_back(w_bias_mem);
|
||||||
|
|
||||||
|
auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const Prec*>();
|
||||||
|
auto b_ptr = static_cast<Prec*>(w_bias_mem->GetData());
|
||||||
|
for (int g = 0; g < Gb; g++) {
|
||||||
|
Prec *l_b_ptr = b_ptr + gate_map[g]*SC;
|
||||||
|
const Prec *l_ie_b_ptr = ie_b_ptr + g * SC;
|
||||||
|
cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(Prec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IE format:
|
||||||
|
* W - [gates, out_state_size, in_data_size + in_state_size]
|
||||||
|
*
|
||||||
|
* MKLDNN format:
|
||||||
|
* W - [1, 1, in_date_size, gates, out_state_size]
|
||||||
|
* R - [1, 1, in_state_size, gates, out_state_size]
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Prec>
|
||||||
|
void MKLDNNRNN::fillWeights(const int *gate_map) {
|
||||||
|
// create weight blobs (data and state part)
|
||||||
|
auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||||
|
w_data_mem->Create(w_data_d);
|
||||||
|
internalBlobMemory.push_back(w_data_mem);
|
||||||
|
auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||||
|
w_state_mem->Create(w_state_d);
|
||||||
|
internalBlobMemory.push_back(w_state_mem);
|
||||||
|
|
||||||
|
auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const Prec*>();
|
||||||
|
auto w_ptr = static_cast<Prec*>(w_data_mem->GetData());
|
||||||
|
auto r_ptr = static_cast<Prec*>(w_state_mem->GetData());
|
||||||
const int step = SC * G;
|
const int step = SC * G;
|
||||||
|
|
||||||
for (int g = 0; g < G; g++) {
|
for (int g = 0; g < G; g++) {
|
||||||
for (int out_i = 0; out_i < SC; out_i++) {
|
for (int out_i = 0; out_i < SC; out_i++) {
|
||||||
float *l_w_ptr = w_ptr + gate_map[g]*SC + out_i;
|
Prec *l_w_ptr = w_ptr + gate_map[g]*SC + out_i;
|
||||||
float *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i;
|
Prec *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i;
|
||||||
for (int in_i = 0; in_i < DC; in_i++) {
|
for (int in_i = 0; in_i < DC; in_i++) {
|
||||||
*l_w_ptr = *ie_w_ptr;
|
*l_w_ptr = *ie_w_ptr;
|
||||||
ie_w_ptr++;
|
ie_w_ptr++;
|
||||||
@ -495,24 +609,11 @@ void MKLDNNRNN::createPrimitive() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (w_bias_d) {
|
|
||||||
auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const float*>();
|
|
||||||
auto b_ptr = static_cast<float*>(w_bias_mem->GetData());
|
|
||||||
for (int g = 0; g < Gb; g++) {
|
|
||||||
float *l_b_ptr = b_ptr + gate_map[g]*SC;
|
|
||||||
const float *l_ie_b_ptr = ie_b_ptr + g * SC;
|
|
||||||
cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(float));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
prim.reset(new mkldnn::primitive(pd));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MKLDNNRNN::execute(mkldnn::stream strm) {
|
void MKLDNNRNN::execute(mkldnn::stream strm) {
|
||||||
if (!prim)
|
if (!prim)
|
||||||
IE_THROW() << "No initialized primitive to execute";
|
THROW_ERROR << "No initialized primitive to execute";
|
||||||
|
|
||||||
const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
|
const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
|
||||||
const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();
|
const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();
|
||||||
|
@ -28,8 +28,19 @@ public:
|
|||||||
private:
|
private:
|
||||||
void fillCellDesc();
|
void fillCellDesc();
|
||||||
void fillSeqDesc();
|
void fillSeqDesc();
|
||||||
|
bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec,
|
||||||
|
const InferenceEngine::Precision& weightsPrec);
|
||||||
|
void verifyWeights();
|
||||||
|
void verifyBiases();
|
||||||
|
void convertWeightsBlobToBF16();
|
||||||
|
|
||||||
|
template <typename Prec>
|
||||||
|
void fillWeights(const int* gate_map);
|
||||||
|
template <typename Prec>
|
||||||
|
void fillBiases(const int* gate_map);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
InferenceEngine::Precision runtimePrecision;
|
||||||
/** Specify mode Cell or Seq. true - Cell, false - Seq */
|
/** Specify mode Cell or Seq. true - Cell, false - Seq */
|
||||||
bool is_cell = false;
|
bool is_cell = false;
|
||||||
|
|
||||||
@ -56,11 +67,14 @@ private:
|
|||||||
const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */
|
const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */
|
||||||
const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */
|
const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */
|
||||||
|
|
||||||
MKLDNNMemoryDesc in_data_d;
|
std::vector<MKLDNNMemoryDesc> in_data_d;
|
||||||
MKLDNNMemoryDesc out_data_d;
|
std::vector<MKLDNNMemoryDesc> out_data_d;
|
||||||
|
|
||||||
std::vector<MKLDNNMemoryDesc> in_states_d;
|
enum RNNInOutKind {
|
||||||
std::vector<MKLDNNMemoryDesc> out_states_d;
|
Layer = 0,
|
||||||
|
HiddenState = 1,
|
||||||
|
CellState = 2
|
||||||
|
};
|
||||||
|
|
||||||
MKLDNNMemoryDesc w_data_d;
|
MKLDNNMemoryDesc w_data_d;
|
||||||
MKLDNNMemoryDesc w_state_d;
|
MKLDNNMemoryDesc w_state_d;
|
||||||
@ -69,7 +83,7 @@ private:
|
|||||||
// List of in/out reorders if required
|
// List of in/out reorders if required
|
||||||
std::vector<mkldnn::reorder> exec_before;
|
std::vector<mkldnn::reorder> exec_before;
|
||||||
std::vector<mkldnn::reorder> exec_after;
|
std::vector<mkldnn::reorder> exec_after;
|
||||||
};
|
|
||||||
|
|
||||||
|
static const std::map<InferenceEngine::Precision, InferenceEngine::Precision> weightsByLayerPrec;
|
||||||
|
}; // class MKLDNNRNN
|
||||||
} // namespace MKLDNNPlugin
|
} // namespace MKLDNNPlugin
|
||||||
|
|
||||||
|
@ -0,0 +1,135 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "ngraph/op/gru_cell.hpp"
|
||||||
|
#include <shared_test_classes/single_layer/gru_cell.hpp>
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
using GRUCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::GRUCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
|
||||||
|
|
||||||
|
class GRUCellCPUTest : public testing::WithParamInterface<GRUCellCpuSpecificParams>,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<GRUCellCpuSpecificParams> &obj) {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::GRUCellParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << LayerTestsDefinitions::GRUCellTest::getTestCaseName(
|
||||||
|
testing::TestParamInfo<LayerTestsDefinitions::GRUCellParams>(basicParamsSet, 0));
|
||||||
|
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||||
|
|
||||||
|
if (!additionalConfig.empty()) {
|
||||||
|
result << "_PluginConf";
|
||||||
|
for (auto &item : additionalConfig) {
|
||||||
|
if (item.second == PluginConfigParams::YES)
|
||||||
|
result << "_" << item.first << "=" << item.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::GRUCellParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
bool should_decompose;
|
||||||
|
size_t batch;
|
||||||
|
size_t hidden_size;
|
||||||
|
size_t input_size;
|
||||||
|
std::vector<std::string> activations;
|
||||||
|
std::vector<float> activations_alpha;
|
||||||
|
std::vector<float> activations_beta;
|
||||||
|
float clip;
|
||||||
|
bool linear_before_reset;
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||||
|
std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, linear_before_reset, netPrecision, targetDevice) = basicParamsSet;
|
||||||
|
|
||||||
|
std::vector<std::vector<size_t>> inputShapes = {
|
||||||
|
{{batch, input_size},
|
||||||
|
{batch, hidden_size},
|
||||||
|
{3 * hidden_size, input_size},
|
||||||
|
{3 * hidden_size, hidden_size},
|
||||||
|
{(linear_before_reset ? 4 : 3) * hidden_size}},
|
||||||
|
};
|
||||||
|
|
||||||
|
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||||
|
|
||||||
|
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||||
|
inPrc = outPrc = Precision::BF16;
|
||||||
|
} else {
|
||||||
|
inPrc = outPrc = netPrecision;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedType += "_";
|
||||||
|
selectedType += outPrc.name();
|
||||||
|
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
|
||||||
|
std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
|
||||||
|
auto gru_cell = ngraph::builder::makeGRU(
|
||||||
|
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip, linear_before_reset);
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_cell->output(0))};
|
||||||
|
|
||||||
|
function = makeNgraphFunction(ngPrc, params, gru_cell, "gru_cell");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(GRUCellCPUTest, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
CheckPluginRelatedResults(executableNetwork, "RNNCell");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/* CPU PARAMS */
|
||||||
|
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||||
|
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
|
||||||
|
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
|
||||||
|
|
||||||
|
CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"};
|
||||||
|
|
||||||
|
std::vector<bool> should_decompose{false};
|
||||||
|
std::vector<size_t> batch{1, 5};
|
||||||
|
std::vector<size_t> hidden_size{1, 10};
|
||||||
|
std::vector<size_t> input_size{1, 30};
|
||||||
|
// oneDNN supports only sigmoid-tanh
|
||||||
|
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh"}};
|
||||||
|
// oneDNN supports only zero clip
|
||||||
|
std::vector<float> clip = {0.f};
|
||||||
|
std::vector<bool> linear_before_reset = {true, false};
|
||||||
|
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_GRUCellCPU,
|
||||||
|
GRUCellCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
|
||||||
|
::testing::ValuesIn(batch),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(linear_before_reset),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParams),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
GRUCellCPUTest::getTestCaseName);
|
||||||
|
} // namespace
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,202 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "shared_test_classes/single_layer/gru_sequence.hpp"
|
||||||
|
#include "ngraph/pass/visualize_tree.hpp"
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
using GRUSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::GRUSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
|
||||||
|
|
||||||
|
class GRUSequenceCPUTest : public testing::WithParamInterface<GRUSequenceCpuSpecificParams>,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<GRUSequenceCpuSpecificParams> &obj) {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::GRUSequenceParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
|
||||||
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << LayerTestsDefinitions::GRUSequenceTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::GRUSequenceParams>(basicParamsSet, 0));
|
||||||
|
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||||
|
|
||||||
|
if (!additionalConfig.empty()) {
|
||||||
|
result << "_PluginConf";
|
||||||
|
for (auto &item : additionalConfig) {
|
||||||
|
if (item.second == PluginConfigParams::YES)
|
||||||
|
result << "_" << item.first << "=" << item.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
LayerTestsDefinitions::GRUSequenceParams basicParamsSet;
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
size_t seq_lenghts;
|
||||||
|
size_t batch;
|
||||||
|
size_t hidden_size;
|
||||||
|
size_t input_size = 10;
|
||||||
|
std::vector<std::string> activations;
|
||||||
|
std::vector<float> activations_alpha;
|
||||||
|
std::vector<float> activations_beta;
|
||||||
|
float clip;
|
||||||
|
bool linear_before_reset;
|
||||||
|
ngraph::op::RecurrentSequenceDirection direction;
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||||
|
std::tie(m_mode, seq_lenghts, batch, hidden_size, activations, clip, linear_before_reset, direction, netPrecision, targetDevice) = basicParamsSet;
|
||||||
|
|
||||||
|
size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
|
||||||
|
std::vector<std::vector<size_t>> inputShapes = {
|
||||||
|
{{batch, seq_lenghts, input_size},
|
||||||
|
{batch, num_directions, hidden_size},
|
||||||
|
{batch},
|
||||||
|
{num_directions, 3 * hidden_size, input_size},
|
||||||
|
{num_directions, 3 * hidden_size, hidden_size},
|
||||||
|
{num_directions, (linear_before_reset ? 4 : 3) * hidden_size}},
|
||||||
|
};
|
||||||
|
|
||||||
|
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||||
|
|
||||||
|
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||||
|
inPrc = outPrc = Precision::BF16;
|
||||||
|
} else {
|
||||||
|
inPrc = outPrc = netPrecision;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedType += "_";
|
||||||
|
selectedType += outPrc.name();
|
||||||
|
|
||||||
|
m_max_seq_len = seq_lenghts;
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
|
||||||
|
if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
|
||||||
|
|| m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
|
||||||
|
auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0);
|
||||||
|
seq_lengths->set_friendly_name("seq_lengths");
|
||||||
|
params.push_back(seq_lengths);
|
||||||
|
}
|
||||||
|
std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]};
|
||||||
|
auto gru_sequence = ngraph::builder::makeGRU(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
|
||||||
|
WRB,
|
||||||
|
hidden_size,
|
||||||
|
activations,
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
clip,
|
||||||
|
linear_before_reset,
|
||||||
|
true,
|
||||||
|
direction,
|
||||||
|
m_mode);
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_sequence->output(0)),
|
||||||
|
std::make_shared<ngraph::opset1::Result>(gru_sequence->output(1))};
|
||||||
|
|
||||||
|
function = makeNgraphFunction(ngPrc, params, gru_sequence, "gru_sequence");
|
||||||
|
|
||||||
|
if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
|
||||||
|
manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
|
||||||
|
manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
} else {
|
||||||
|
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GenerateInputs() {
|
||||||
|
for (const auto &input : executableNetwork.GetInputsInfo()) {
|
||||||
|
const auto &info = input.second;
|
||||||
|
auto blob = GenerateInput(*info);
|
||||||
|
if (input.first == "seq_lengths") {
|
||||||
|
blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
|
||||||
|
}
|
||||||
|
inputs.push_back(blob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ngraph::helpers::SequenceTestsMode m_mode;
|
||||||
|
int64_t m_max_seq_len = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(GRUSequenceCPUTest, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
CheckPluginRelatedResults(executableNetwork, "RNNSeq");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/* CPU PARAMS */
|
||||||
|
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||||
|
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
|
||||||
|
|
||||||
|
CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
|
||||||
|
CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};;
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
|
||||||
|
// output values increase rapidly without clip, so use only seq_lenghts = 2
|
||||||
|
std::vector<size_t> seq_lengths_zero_clip{2};
|
||||||
|
std::vector<size_t> batch{10};
|
||||||
|
std::vector<size_t> batch_size_one{1};
|
||||||
|
std::vector<size_t> hidden_size{1, 10};
|
||||||
|
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh"}};
|
||||||
|
std::vector<bool> linear_before_reset = {true, false};
|
||||||
|
std::vector<float> clip{0.f};
|
||||||
|
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD};
|
||||||
|
|
||||||
|
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPU,
|
||||||
|
GRUSequenceCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
|
||||||
|
::testing::ValuesIn(seq_lengths_zero_clip),
|
||||||
|
::testing::ValuesIn(batch),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(linear_before_reset),
|
||||||
|
::testing::ValuesIn(direction),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParams),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
GRUSequenceCPUTest::getTestCaseName);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPUBatchSizeOne,
|
||||||
|
GRUSequenceCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
|
||||||
|
::testing::ValuesIn(seq_lengths_zero_clip),
|
||||||
|
::testing::ValuesIn(batch_size_one),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(linear_before_reset),
|
||||||
|
::testing::ValuesIn(direction),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParamsBatchSizeOne),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
GRUSequenceCPUTest::getTestCaseName);
|
||||||
|
} // namespace
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,132 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "ngraph/op/lstm_cell.hpp"
|
||||||
|
#include <shared_test_classes/single_layer/lstm_cell.hpp>
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
using LSTMCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::LSTMCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
|
||||||
|
|
||||||
|
class LSTMCellLayerCPUTest : public testing::WithParamInterface<LSTMCellCpuSpecificParams>,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<LSTMCellCpuSpecificParams>& obj) {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::LSTMCellParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
|
||||||
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << LayerTestsDefinitions::LSTMCellTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::LSTMCellParams>(
|
||||||
|
basicParamsSet, 0));
|
||||||
|
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||||
|
|
||||||
|
if (!additionalConfig.empty()) {
|
||||||
|
result << "_PluginConf";
|
||||||
|
for (auto& item : additionalConfig) {
|
||||||
|
if (item.second == PluginConfigParams::YES)
|
||||||
|
result << "_" << item.first << "=" << item.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
LayerTestsDefinitions::LSTMCellParams basicParamsSet;
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
bool should_decompose;
|
||||||
|
size_t batch;
|
||||||
|
size_t hidden_size;
|
||||||
|
size_t input_size;
|
||||||
|
std::vector<std::string> activations;
|
||||||
|
std::vector<float> activations_alpha;
|
||||||
|
std::vector<float> activations_beta;
|
||||||
|
float clip;
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
threshold = 0.05;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||||
|
std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet;
|
||||||
|
|
||||||
|
std::vector<std::vector<size_t>> inputShapes = {
|
||||||
|
{{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size}, {4 * hidden_size, hidden_size}, {4 * hidden_size}},
|
||||||
|
};
|
||||||
|
|
||||||
|
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||||
|
|
||||||
|
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||||
|
inPrc = outPrc = Precision::BF16;
|
||||||
|
} else {
|
||||||
|
inPrc = outPrc = netPrecision;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedType += "_";
|
||||||
|
selectedType += outPrc.name();
|
||||||
|
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
|
||||||
|
std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5]};
|
||||||
|
|
||||||
|
auto lstm_cell = ngraph::builder::makeLSTM(
|
||||||
|
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip);
|
||||||
|
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_cell->output(0)),
|
||||||
|
std::make_shared<ngraph::opset1::Result>(lstm_cell->output(1))};
|
||||||
|
|
||||||
|
function = makeNgraphFunction(ngPrc, params, lstm_cell, "lstm_cell");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(LSTMCellLayerCPUTest, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
CheckPluginRelatedResults(executableNetwork, "RNNCell");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/* CPU PARAMS */
|
||||||
|
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||||
|
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}},
|
||||||
|
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}};
|
||||||
|
|
||||||
|
CPUSpecificParams cpuParams{{nc, nc, nc}, {nc}, {"ref_any"}, "ref_any"};
|
||||||
|
|
||||||
|
std::vector<bool> should_decompose{false};
|
||||||
|
std::vector<size_t> batch{5};
|
||||||
|
std::vector<size_t> hidden_size{1, 10};
|
||||||
|
std::vector<size_t> input_size{1, 30};
|
||||||
|
// oneDNN supports only sigmoid-tanh-tanh
|
||||||
|
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh", "tanh"}};
|
||||||
|
// oneDNN supports only zero clip
|
||||||
|
std::vector<float> clip{0.f};
|
||||||
|
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_LSTMCellCPU,
|
||||||
|
LSTMCellLayerCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
|
||||||
|
::testing::ValuesIn(batch),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParams),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
LSTMCellLayerCPUTest::getTestCaseName);
|
||||||
|
} // namespace
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,205 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "shared_test_classes/single_layer/lstm_sequence.hpp"
|
||||||
|
#include "ngraph/pass/visualize_tree.hpp"
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
using LSTMSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::LSTMSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
|
||||||
|
|
||||||
|
class LSTMSequenceCPUTest : public testing::WithParamInterface<LSTMSequenceCpuSpecificParams>,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<LSTMSequenceCpuSpecificParams> &obj) {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::LSTMSequenceParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
|
||||||
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << LayerTestsDefinitions::LSTMSequenceTest::getTestCaseName(
|
||||||
|
testing::TestParamInfo<LayerTestsDefinitions::LSTMSequenceParams>(basicParamsSet, 0));
|
||||||
|
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||||
|
|
||||||
|
if (!additionalConfig.empty()) {
|
||||||
|
result << "_PluginConf";
|
||||||
|
for (auto &item : additionalConfig) {
|
||||||
|
if (item.second == PluginConfigParams::YES)
|
||||||
|
result << "_" << item.first << "=" << item.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
LayerTestsDefinitions::LSTMSequenceParams basicParamsSet;
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
size_t seq_lenghts;
|
||||||
|
size_t batch;
|
||||||
|
size_t hidden_size;
|
||||||
|
size_t input_size;
|
||||||
|
std::vector<std::string> activations;
|
||||||
|
std::vector<float> activations_alpha;
|
||||||
|
std::vector<float> activations_beta;
|
||||||
|
float clip;
|
||||||
|
ngraph::op::RecurrentSequenceDirection direction;
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||||
|
std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet;
|
||||||
|
|
||||||
|
size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
|
||||||
|
m_max_seq_len = seq_lenghts;
|
||||||
|
std::vector<std::vector<size_t>> inputShapes = {
|
||||||
|
{{batch, seq_lenghts, input_size},
|
||||||
|
{batch, num_directions, hidden_size},
|
||||||
|
{batch, num_directions, hidden_size},
|
||||||
|
{batch},
|
||||||
|
{num_directions, 4 * hidden_size, input_size},
|
||||||
|
{num_directions, 4 * hidden_size, hidden_size},
|
||||||
|
{num_directions, 4 * hidden_size}},
|
||||||
|
};
|
||||||
|
|
||||||
|
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||||
|
|
||||||
|
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||||
|
inPrc = outPrc = Precision::BF16;
|
||||||
|
} else {
|
||||||
|
inPrc = outPrc = netPrecision;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedType += "_";
|
||||||
|
selectedType += outPrc.name();
|
||||||
|
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
|
||||||
|
if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
|
||||||
|
|| m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
|
||||||
|
auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[3]}).at(0);
|
||||||
|
seq_lengths->set_friendly_name("seq_lengths");
|
||||||
|
params.push_back(seq_lengths);
|
||||||
|
}
|
||||||
|
std::vector<ngraph::Shape> WRB = {inputShapes[4], inputShapes[5], inputShapes[6], inputShapes[3]};
|
||||||
|
auto lstm_sequence = ngraph::builder::makeLSTM(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
|
||||||
|
WRB,
|
||||||
|
hidden_size,
|
||||||
|
activations,
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
clip,
|
||||||
|
true,
|
||||||
|
direction,
|
||||||
|
m_mode);
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(0)),
|
||||||
|
std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(1)),
|
||||||
|
std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(2))};
|
||||||
|
|
||||||
|
function = makeNgraphFunction(ngPrc, params, lstm_sequence, "lstm_sequence");
|
||||||
|
|
||||||
|
if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
|
||||||
|
manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
|
||||||
|
manager.register_pass<ngraph::pass::ConvertLSTMSequenceToTensorIterator>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
} else {
|
||||||
|
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GenerateInputs() {
|
||||||
|
for (const auto &input : executableNetwork.GetInputsInfo()) {
|
||||||
|
const auto &info = input.second;
|
||||||
|
auto blob = GenerateInput(*info);
|
||||||
|
if (input.first == "seq_lengths") {
|
||||||
|
blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inputs.push_back(blob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ngraph::helpers::SequenceTestsMode m_mode;
|
||||||
|
int64_t m_max_seq_len = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(LSTMSequenceCPUTest, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
CheckPluginRelatedResults(executableNetwork, "RNNSeq");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/* CPU PARAMS */
|
||||||
|
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||||
|
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
|
||||||
|
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
|
||||||
|
|
||||||
|
CPUSpecificParams cpuParams{{ntc, nc, nc}, {ntc, nc, nc}, {"ref_any"}, "ref_any"};
|
||||||
|
CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc, nc}, {tnc, nc, nc}, {"ref_any"}, "ref_any"};
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
|
||||||
|
std::vector<size_t> seq_lengths_zero_clip{2};
|
||||||
|
std::vector<size_t> batch_size_one{1};
|
||||||
|
std::vector<size_t> batch{10};
|
||||||
|
std::vector<size_t> hidden_size{1, 10};
|
||||||
|
std::vector<size_t> input_size{10};
|
||||||
|
// oneDNN supports only sigmoid-tanh-tanh
|
||||||
|
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh", "tanh"}};
|
||||||
|
// oneDNN supports only zero clip
|
||||||
|
std::vector<float> clip{0.f};
|
||||||
|
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD};
|
||||||
|
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPU,
|
||||||
|
LSTMSequenceCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
|
||||||
|
::testing::ValuesIn(seq_lengths_zero_clip),
|
||||||
|
::testing::ValuesIn(batch),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(direction),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParams),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
LSTMSequenceCPUTest::getTestCaseName);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPUbatchSizeOne,
|
||||||
|
LSTMSequenceCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
|
||||||
|
::testing::ValuesIn(seq_lengths_zero_clip),
|
||||||
|
::testing::ValuesIn(batch_size_one),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(direction),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParamsBatchSizeOne),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
LSTMSequenceCPUTest::getTestCaseName);
|
||||||
|
} // namespace
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,124 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "ngraph/op/rnn_cell.hpp"
|
||||||
|
#include <shared_test_classes/single_layer/rnn_cell.hpp>
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
using RNNCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::RNNCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
|
||||||
|
|
||||||
|
class RNNCellCPUTest : public testing::WithParamInterface<RNNCellCpuSpecificParams>,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<RNNCellCpuSpecificParams> &obj) {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::RNNCellParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << LayerTestsDefinitions::RNNCellTest::getTestCaseName(
|
||||||
|
testing::TestParamInfo<LayerTestsDefinitions::RNNCellParams>(basicParamsSet, 0));
|
||||||
|
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||||
|
|
||||||
|
if (!additionalConfig.empty()) {
|
||||||
|
result << "_PluginConf";
|
||||||
|
for (auto &item : additionalConfig) {
|
||||||
|
if (item.second == PluginConfigParams::YES)
|
||||||
|
result << "_" << item.first << "=" << item.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::RNNCellParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
bool should_decompose;
|
||||||
|
size_t batch;
|
||||||
|
size_t hidden_size;
|
||||||
|
size_t input_size;
|
||||||
|
std::vector<std::string> activations;
|
||||||
|
std::vector<float> activations_alpha;
|
||||||
|
std::vector<float> activations_beta;
|
||||||
|
float clip;
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||||
|
std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet;
|
||||||
|
|
||||||
|
std::vector<std::vector<size_t>> inputShapes = {{batch, input_size}, {batch, hidden_size},
|
||||||
|
{hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}};
|
||||||
|
|
||||||
|
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||||
|
|
||||||
|
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||||
|
inPrc = outPrc = Precision::BF16;
|
||||||
|
} else {
|
||||||
|
inPrc = outPrc = netPrecision;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedType += "_";
|
||||||
|
selectedType += outPrc.name();
|
||||||
|
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
|
||||||
|
std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
|
||||||
|
auto rnn_cell = ngraph::builder::makeRNN(
|
||||||
|
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
|
||||||
|
WRB, hidden_size, activations, {}, {}, clip);
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_cell)};
|
||||||
|
function = makeNgraphFunction(ngPrc, params, rnn_cell, "rnn_cell");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(RNNCellCPUTest, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
CheckPluginRelatedResults(executableNetwork, "RNNCell");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/* CPU PARAMS */
|
||||||
|
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||||
|
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
|
||||||
|
|
||||||
|
CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"};
|
||||||
|
std::vector<bool> should_decompose{false};
|
||||||
|
std::vector<size_t> batch{1, 5};
|
||||||
|
std::vector<size_t> hidden_size{1, 10};
|
||||||
|
std::vector<size_t> input_size{1, 30};
|
||||||
|
std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
|
||||||
|
// oneDNN supports only zero clip
|
||||||
|
std::vector<float> clip = {0.f};
|
||||||
|
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_RNNCellCPU,
|
||||||
|
RNNCellCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
|
||||||
|
::testing::ValuesIn(batch),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParams),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
RNNCellCPUTest::getTestCaseName);
|
||||||
|
} // namespace
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,202 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "shared_test_classes/single_layer/rnn_sequence.hpp"
|
||||||
|
#include "ngraph/pass/visualize_tree.hpp"
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace CPULayerTestsDefinitions {
|
||||||
|
|
||||||
|
using RNNSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::RNNSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
|
||||||
|
|
||||||
|
class RNNSequenceCPUTest : public testing::WithParamInterface<RNNSequenceCpuSpecificParams>,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon,
|
||||||
|
public CPUTestsBase {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<RNNSequenceCpuSpecificParams> &obj) {
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
LayerTestsDefinitions::RNNSequenceParams basicParamsSet;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
|
||||||
|
std::ostringstream result;
|
||||||
|
|
||||||
|
result << LayerTestsDefinitions::RNNSequenceTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::RNNSequenceParams>(basicParamsSet, 0));
|
||||||
|
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||||
|
|
||||||
|
if (!additionalConfig.empty()) {
|
||||||
|
result << "_PluginConf";
|
||||||
|
for (auto &item : additionalConfig) {
|
||||||
|
if (item.second == PluginConfigParams::YES)
|
||||||
|
result << "_" << item.first << "=" << item.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() {
|
||||||
|
LayerTestsDefinitions::RNNSequenceParams basicParamsSet;
|
||||||
|
CPUSpecificParams cpuParams;
|
||||||
|
std::map<std::string, std::string> additionalConfig;
|
||||||
|
|
||||||
|
size_t seq_lenghts;
|
||||||
|
size_t batch;
|
||||||
|
size_t hidden_size;
|
||||||
|
size_t input_size;
|
||||||
|
std::vector<std::string> activations;
|
||||||
|
std::vector<float> activations_alpha;
|
||||||
|
std::vector<float> activations_beta;
|
||||||
|
float clip;
|
||||||
|
ngraph::op::RecurrentSequenceDirection direction;
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
|
||||||
|
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||||
|
std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet;
|
||||||
|
|
||||||
|
size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
|
||||||
|
std::vector<std::vector<size_t>> inputShapes = {
|
||||||
|
{{batch, seq_lenghts, input_size},
|
||||||
|
{batch, num_directions, hidden_size},
|
||||||
|
{batch},
|
||||||
|
{num_directions, hidden_size, input_size},
|
||||||
|
{num_directions, hidden_size, hidden_size},
|
||||||
|
{num_directions, hidden_size}},
|
||||||
|
};
|
||||||
|
|
||||||
|
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||||
|
|
||||||
|
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
|
||||||
|
inPrc = outPrc = Precision::BF16;
|
||||||
|
} else {
|
||||||
|
inPrc = outPrc = netPrecision;
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedType += "_";
|
||||||
|
selectedType += outPrc.name();
|
||||||
|
|
||||||
|
m_max_seq_len = seq_lenghts;
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
|
||||||
|
if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
|
||||||
|
|| m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
|
||||||
|
auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0);
|
||||||
|
seq_lengths->set_friendly_name("seq_lengths");
|
||||||
|
params.push_back(seq_lengths);
|
||||||
|
}
|
||||||
|
std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]};
|
||||||
|
auto rnn_sequence = ngraph::builder::makeRNN(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
|
||||||
|
WRB,
|
||||||
|
hidden_size,
|
||||||
|
activations,
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
clip,
|
||||||
|
true,
|
||||||
|
direction,
|
||||||
|
m_mode);
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_sequence->output(0)),
|
||||||
|
std::make_shared<ngraph::opset1::Result>(rnn_sequence->output(1))};
|
||||||
|
function = makeNgraphFunction(ngPrc, params, rnn_sequence, "rnn_sequence");
|
||||||
|
if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
|
||||||
|
manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
|
||||||
|
manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
|
||||||
|
manager.run_passes(function);
|
||||||
|
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, true);
|
||||||
|
} else {
|
||||||
|
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
|
||||||
|
EXPECT_EQ(ti_found, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GenerateInputs() {
|
||||||
|
for (const auto &input : executableNetwork.GetInputsInfo()) {
|
||||||
|
const auto &info = input.second;
|
||||||
|
auto blob = GenerateInput(*info);
|
||||||
|
if (input.first == "seq_lengths") {
|
||||||
|
blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inputs.push_back(blob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ngraph::helpers::SequenceTestsMode m_mode;
|
||||||
|
int64_t m_max_seq_len = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(RNNSequenceCPUTest, CompareWithRefs) {
|
||||||
|
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||||
|
|
||||||
|
Run();
|
||||||
|
CheckPluginRelatedResults(executableNetwork, "RNNSeq");
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
/* CPU PARAMS */
|
||||||
|
std::vector<std::map<std::string, std::string>> additionalConfig
|
||||||
|
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
|
||||||
|
|
||||||
|
CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
|
||||||
|
CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};
|
||||||
|
|
||||||
|
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
|
||||||
|
// output values increase rapidly without clip, so use only seq_lenghts = 2
|
||||||
|
std::vector<size_t> seq_lengths_zero_clip{2};
|
||||||
|
std::vector<size_t> batch{10};
|
||||||
|
std::vector<size_t> batch_size_one{1};
|
||||||
|
std::vector<size_t> hidden_size{10};
|
||||||
|
// std::vector<size_t> hidden_size{1, 10};
|
||||||
|
std::vector<size_t> input_size{10};
|
||||||
|
std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
|
||||||
|
// oneDNN supports only zero clip
|
||||||
|
std::vector<float> clip{0.f};
|
||||||
|
|
||||||
|
std::vector<ngraph::op::RecurrentSequenceDirection> direction{ngraph::op::RecurrentSequenceDirection::FORWARD};
|
||||||
|
|
||||||
|
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPU,
|
||||||
|
RNNSequenceCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
|
||||||
|
::testing::ValuesIn(seq_lengths_zero_clip),
|
||||||
|
::testing::ValuesIn(batch),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(direction),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParams),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
RNNSequenceCPUTest::getTestCaseName);
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPUBatchSizeOne,
|
||||||
|
RNNSequenceCPUTest,
|
||||||
|
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
|
||||||
|
::testing::ValuesIn(seq_lengths_zero_clip),
|
||||||
|
::testing::ValuesIn(batch_size_one),
|
||||||
|
::testing::ValuesIn(hidden_size),
|
||||||
|
::testing::ValuesIn(input_size),
|
||||||
|
::testing::ValuesIn(activations),
|
||||||
|
::testing::ValuesIn(clip),
|
||||||
|
::testing::ValuesIn(direction),
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||||
|
::testing::Values(cpuParamsBatchSizeOne),
|
||||||
|
::testing::ValuesIn(additionalConfig)),
|
||||||
|
RNNSequenceCPUTest::getTestCaseName);
|
||||||
|
} // namespace
|
||||||
|
} // namespace CPULayerTestsDefinitions
|
@ -8,16 +8,29 @@
|
|||||||
namespace CPUTestUtils {
|
namespace CPUTestUtils {
|
||||||
|
|
||||||
const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) {
|
const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) {
|
||||||
if (v == nchw) return "nchw";
|
#define CASE(_fmt) do { \
|
||||||
if (v == nChw8c) return "nChw8c";
|
if (v == _fmt) return #_fmt; \
|
||||||
if (v == nChw16c) return "nChw16c";
|
} while (0)
|
||||||
if (v == nhwc) return "nhwc";
|
CASE(undef);
|
||||||
if (v == ncdhw) return "ncdhw";
|
CASE(nchw);
|
||||||
if (v == nCdhw8c) return "nCdhw8c";
|
CASE(nChw8c);
|
||||||
if (v == nCdhw16c) return "nCdhw16c";
|
CASE(nChw16c);
|
||||||
if (v == ndhwc) return "ndhwc";
|
CASE(nhwc);
|
||||||
if (v == nc) return "nc";
|
CASE(ncdhw);
|
||||||
if (v == x) return "x";
|
CASE(nCdhw8c);
|
||||||
|
CASE(nCdhw16c);
|
||||||
|
CASE(ndhwc);
|
||||||
|
CASE(nc);
|
||||||
|
CASE(x);
|
||||||
|
CASE(tnc);
|
||||||
|
CASE(ntc);
|
||||||
|
CASE(ldnc);
|
||||||
|
CASE(ldigo);
|
||||||
|
CASE(ldgoi);
|
||||||
|
CASE(ldio);
|
||||||
|
CASE(ldoi);
|
||||||
|
CASE(ldgo);
|
||||||
|
#undef CASE
|
||||||
assert(!"unknown fmt");
|
assert(!"unknown fmt");
|
||||||
return "undef";
|
return "undef";
|
||||||
}
|
}
|
||||||
@ -39,6 +52,10 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
|
|||||||
CASE(acdeb);
|
CASE(acdeb);
|
||||||
CASE(aBcde8b);
|
CASE(aBcde8b);
|
||||||
CASE(aBcde16b);
|
CASE(aBcde16b);
|
||||||
|
CASE(abc);
|
||||||
|
CASE(bac);
|
||||||
|
CASE(abdc);
|
||||||
|
CASE(abdec);
|
||||||
CASE(nchw);
|
CASE(nchw);
|
||||||
CASE(nChw8c);
|
CASE(nChw8c);
|
||||||
CASE(nChw16c);
|
CASE(nChw16c);
|
||||||
@ -49,6 +66,14 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
|
|||||||
CASE(ndhwc);
|
CASE(ndhwc);
|
||||||
CASE(nc);
|
CASE(nc);
|
||||||
CASE(x);
|
CASE(x);
|
||||||
|
CASE(tnc);
|
||||||
|
CASE(ntc);
|
||||||
|
CASE(ldnc);
|
||||||
|
CASE(ldigo);
|
||||||
|
CASE(ldgoi);
|
||||||
|
CASE(ldio);
|
||||||
|
CASE(ldoi);
|
||||||
|
CASE(ldgo);
|
||||||
#undef CASE
|
#undef CASE
|
||||||
assert(!"unknown memory format");
|
assert(!"unknown memory format");
|
||||||
return undef;
|
return undef;
|
||||||
@ -120,18 +145,38 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
|
|||||||
auto shape = parentNode->get_output_tensor(0).get_shape();
|
auto shape = parentNode->get_output_tensor(0).get_shape();
|
||||||
auto actualInputMemoryFormat = getExecValueOutputsLayout(parentNode);
|
auto actualInputMemoryFormat = getExecValueOutputsLayout(parentNode);
|
||||||
|
|
||||||
if (!should_be_skipped(shape, inFmts[i]))
|
if (!should_be_skipped(shape, inFmts[i])) {
|
||||||
ASSERT_EQ(inFmts[i], cpu_str2fmt(actualInputMemoryFormat.c_str()));
|
ASSERT_EQ(inFmts[i], cpu_str2fmt(actualInputMemoryFormat.c_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < outFmts.size(); i++) {
|
}
|
||||||
|
|
||||||
|
/* actual output formats are represented as a single string, for example 'fmt1' or 'fmt1, fmt2, fmt3'
|
||||||
|
* convert it to the list of formats */
|
||||||
|
auto getActualOutputMemoryFormats = [] (const std::string& fmtStr) -> std::vector<std::string> {
|
||||||
|
std::vector<std::string> result;
|
||||||
|
std::stringstream ss(fmtStr);
|
||||||
|
std::string str;
|
||||||
|
while (std::getline(ss, str, ',')) {
|
||||||
|
result.push_back(str);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto actualOutputMemoryFormats = getActualOutputMemoryFormats(getExecValueOutputsLayout(node));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < outFmts.size(); i++) {
|
||||||
const auto actualOutputMemoryFormat = getExecValue(ExecGraphInfoSerialization::OUTPUT_LAYOUTS);
|
const auto actualOutputMemoryFormat = getExecValue(ExecGraphInfoSerialization::OUTPUT_LAYOUTS);
|
||||||
const auto shape = node->get_output_shape(i);
|
const auto shape = node->get_output_shape(i);
|
||||||
|
|
||||||
if (!should_be_skipped(shape, outFmts[i]))
|
if (should_be_skipped(shape, outFmts[i]))
|
||||||
ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormat.c_str()));
|
continue;
|
||||||
|
|
||||||
|
ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormats[i].c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto primType = getExecValue(ExecGraphInfoSerialization::IMPL_TYPE);
|
auto primType = getExecValue(ExecGraphInfoSerialization::IMPL_TYPE);
|
||||||
|
|
||||||
ASSERT_EQ(selectedType, primType);
|
ASSERT_EQ(selectedType, primType);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -197,8 +242,11 @@ std::shared_ptr<ngraph::Function>
|
|||||||
CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector ¶ms,
|
CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector ¶ms,
|
||||||
const std::shared_ptr<ngraph::Node> &lastNode, std::string name) const {
|
const std::shared_ptr<ngraph::Node> &lastNode, std::string name) const {
|
||||||
auto newLastNode = modifyGraph(ngPrc, params, lastNode);
|
auto newLastNode = modifyGraph(ngPrc, params, lastNode);
|
||||||
|
ngraph::ResultVector results;
|
||||||
|
|
||||||
|
for (int i = 0; i < newLastNode->get_output_size(); i++)
|
||||||
|
results.push_back(std::make_shared<ngraph::opset1::Result>(newLastNode->output(i)));
|
||||||
|
|
||||||
ngraph::ResultVector results = {std::make_shared<ngraph::opset1::Result>(newLastNode)};
|
|
||||||
return std::make_shared<ngraph::Function>(results, params, name);
|
return std::make_shared<ngraph::Function>(results, params, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,11 @@ namespace CPUTestUtils {
|
|||||||
acdeb,
|
acdeb,
|
||||||
aBcde8b,
|
aBcde8b,
|
||||||
aBcde16b,
|
aBcde16b,
|
||||||
|
// RNN layouts
|
||||||
|
abc,
|
||||||
|
bac,
|
||||||
|
abdc,
|
||||||
|
abdec,
|
||||||
|
|
||||||
x = a,
|
x = a,
|
||||||
nc = ab,
|
nc = ab,
|
||||||
@ -34,7 +39,41 @@ namespace CPUTestUtils {
|
|||||||
ncdhw = abcde,
|
ncdhw = abcde,
|
||||||
nCdhw8c = aBcde8b,
|
nCdhw8c = aBcde8b,
|
||||||
nCdhw16c = aBcde16b,
|
nCdhw16c = aBcde16b,
|
||||||
ndhwc = acdeb
|
ndhwc = acdeb,
|
||||||
|
// RNN layouts
|
||||||
|
tnc = abc,
|
||||||
|
/// 3D RNN data tensor in the format (batch, seq_length, input channels).
|
||||||
|
ntc = bac,
|
||||||
|
/// 4D RNN states tensor in the format (num_layers, num_directions,
|
||||||
|
/// batch, state channels).
|
||||||
|
ldnc = abcd,
|
||||||
|
/// 5D RNN weights tensor in the format (num_layers, num_directions,
|
||||||
|
/// input_channels, num_gates, output_channels).
|
||||||
|
///
|
||||||
|
/// - For LSTM cells, the gates order is input, forget, candidate
|
||||||
|
/// and output gate.
|
||||||
|
/// - For GRU cells, the gates order is update, reset and output gate.
|
||||||
|
ldigo = abcde,
|
||||||
|
/// 5D RNN weights tensor in the format (num_layers, num_directions,
|
||||||
|
/// num_gates, output_channels, input_channels).
|
||||||
|
///
|
||||||
|
/// - For LSTM cells, the gates order is input, forget, candidate
|
||||||
|
/// and output gate.
|
||||||
|
/// - For GRU cells, the gates order is update, reset and output gate.
|
||||||
|
ldgoi = abdec,
|
||||||
|
/// 4D LSTM projection tensor in the format (num_layers, num_directions,
|
||||||
|
/// num_channels_in_hidden_state, num_channels_in_recurrent_projection).
|
||||||
|
ldio = abcd,
|
||||||
|
/// 4D LSTM projection tensor in the format (num_layers, num_directions,
|
||||||
|
/// num_channels_in_recurrent_projection, num_channels_in_hidden_state).
|
||||||
|
ldoi = abdc,
|
||||||
|
/// 4D RNN bias tensor in the format (num_layers, num_directions,
|
||||||
|
/// num_gates, output_channels).
|
||||||
|
///
|
||||||
|
/// - For LSTM cells, the gates order is input, forget, candidate
|
||||||
|
/// and output gate.
|
||||||
|
/// - For GRU cells, the gates order is update, reset and output gate.
|
||||||
|
ldgo = abcd,
|
||||||
} cpu_memory_format_t;
|
} cpu_memory_format_t;
|
||||||
|
|
||||||
using CPUSpecificParams = std::tuple<
|
using CPUSpecificParams = std::tuple<
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
2
inference-engine/thirdparty/mkl-dnn
vendored
2
inference-engine/thirdparty/mkl-dnn
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 0813c00df7558bc9b858d3a73c725bab2ce1b1eb
|
Subproject commit 462982a2f9272ad26473ec13d983b10dbd193cd3
|
Loading…
Reference in New Issue
Block a user