Merge remote-tracking branch 'upstream/master' into itikhono/extension/json_config

This commit is contained in:
Ivan Tikhonov 2021-12-16 19:49:06 +03:00
commit 089cfc8539
21 changed files with 1208 additions and 325 deletions

View File

@ -83,7 +83,7 @@ jobs:
displayName: 'Make dir'
- script: |
certutil -urlcache -split -f https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat install_ib_console.bat
curl -O https://openvinoweb.z5.web.core.windows.net/incredibuild/install_ib_console.bat
call install_ib_console.bat
workingDirectory: $(WORK_DIR)
displayName: 'Install IncrediBuild'
@ -117,9 +117,9 @@ jobs:
python -m pip install -r $(REPO_DIR)\tools\mo\requirements.txt
python -m pip install -r $(REPO_DIR)\tools\mo\requirements_dev.txt
rem Speed up build
certutil -urlcache -split -f https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip cmake-$(CMAKE_VERSION)-windows-x86_64.zip
powershell -command "Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v$(CMAKE_VERSION)/cmake-$(CMAKE_VERSION)-windows-x86_64.zip -OutFile cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
powershell -command "Expand-Archive -Force cmake-$(CMAKE_VERSION)-windows-x86_64.zip"
certutil -urlcache -split -f https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip ninja-win.zip
powershell -command "Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip -OutFile ninja-win.zip"
powershell -command "Expand-Archive -Force ninja-win.zip"
git clone https://github.com/google/gtest-parallel.git
workingDirectory: $(WORK_DIR)

View File

@ -1961,6 +1961,30 @@ bool evaluate(const shared_ptr<op::v0::RNNCell>& op, const HostTensorVector& out
return true;
}
template <element::Type_t ET>
bool evaluate(const shared_ptr<op::v0::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
using T = typename element_type_traits<ET>::value_type;
runtime::reference::lstm_cell<T>(inputs[0]->get_data_ptr<ET>(),
inputs[0]->get_shape(),
inputs[1]->get_data_ptr<ET>(),
inputs[1]->get_shape(),
inputs[2]->get_data_ptr<ET>(),
inputs[2]->get_shape(),
inputs[3]->get_data_ptr<ET>(),
inputs[3]->get_shape(),
inputs[4]->get_data_ptr<ET>(),
inputs[4]->get_shape(),
inputs[5]->get_data_ptr<ET>(),
inputs[5]->get_shape(),
outputs[0]->get_data_ptr<ET>(),
outputs[1]->get_data_ptr<ET>(),
op->get_activations()[0],
op->get_activations()[1],
op->get_activations()[2],
op->get_clip());
return true;
}
template <element::Type_t ET>
bool evaluate(const shared_ptr<op::v4::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
using T = typename element_type_traits<ET>::value_type;

View File

@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0)
NGRAPH_OP(GRN, op::v0)
NGRAPH_OP(HardSigmoid, op::v0)
NGRAPH_OP(LRN, ngraph::op::v0)
NGRAPH_OP(LSTMCell, op::v0)
NGRAPH_OP(MVN, ngraph::op::v0)
NGRAPH_OP(NormalizeL2, op::v0)
NGRAPH_OP(PriorBox, ngraph::op::v0)

View File

@ -4,7 +4,8 @@
#include <gtest/gtest.h>
#include "openvino/op/lstm_cell.hpp"
#include "openvino/opsets/opset4.hpp"
#include "openvino/opsets/opset1.hpp"
#include "base_reference_test.hpp"
using namespace reference_tests;
@ -12,13 +13,6 @@ using namespace ov;
namespace {
struct LSTMCellParams {
LSTMCellParams(
int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount,
const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B,
const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") :
batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount),
X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {}
int32_t batchSize;
int32_t inputSize;
int32_t hiddenSize;
@ -34,6 +28,22 @@ struct LSTMCellParams {
std::string testcaseName;
};
struct Builder : ParamsBuilder<LSTMCellParams> {
REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, X);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, W);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, R);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, B);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co);
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
};
class ReferenceLSTMCellTest : public testing::TestWithParam<LSTMCellParams>, public CommonReferenceTest {
public:
void SetUp() override {
@ -63,26 +73,24 @@ public:
result << "_hoType=" << param.Ho.type;
result << "_hoShape=" << param.Ho.shape;
result << "_coType=" << param.Co.type;
result << "_coShape=" << param.Co.shape;
if (param.testcaseName != "") {
result << "_coShape=" << param.Co.shape;
result << "_=" << param.testcaseName;
} else {
result << "_coShape=" << param.Co.shape;
}
return result.str();
}
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<op::v4::LSTMCell>(X,
std::make_shared<opset4::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@ -107,15 +115,15 @@ public:
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<op::v4::LSTMCell>(X,
std::make_shared<opset4::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
@ -142,15 +150,15 @@ private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const float clip_threshold = 3.5f;
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<op::v4::LSTMCell>(X,
std::make_shared<opset4::LSTMCell>(X,
H_t,
C_t,
W,
@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) {
Exec();
}
class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest {
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<opset1::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
params.hiddenSize);
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
return function;
}
};
class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs {
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<opset1::LSTMCell>(X,
H_t,
C_t,
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
params.hiddenSize);
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
return function;
}
};
class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip {
private:
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
const float clip_threshold = 3.5f;
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
const auto lstm_cell =
std::make_shared<opset1::LSTMCell>(X,
H_t,
C_t,
W,
R,
B,
params.hiddenSize,
op::LSTMWeightsFormat::IFCO,
std::vector<std::string>{"sigmoid", "tanh", "tanh"},
std::vector<float>{},
std::vector<float>{},
clip_threshold);
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
return function;
}
};
TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) {
Exec();
}
TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) {
Exec();
}
TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) {
Exec();
}
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParams() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
LSTMCellParams(
2, 3, 3, 4,
Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)),
Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}),
Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}),
"lstm_cell_zero_bias_default_attrs"),
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
.testcaseName("lstm_cell_zero_bias_default_attrs")
};
return params;
}
@ -232,29 +334,32 @@ template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
LSTMCellParams(
2, 3, 3, 4,
Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
@ -265,20 +370,20 @@ std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}),
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82509011030197144,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}),
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4578367471694946,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}),
"lstm_cell_bias_default_attrs"),
1.3659683465957642}))
.testcaseName("lstm_cell_bias_default_attrs"),
};
return params;
}
@ -302,29 +407,32 @@ template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasClip() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
LSTMCellParams(
2, 3, 3, 4,
Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
@ -335,20 +443,20 @@ std::vector<LSTMCellParams> generateParamsBiasClip() {
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}),
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82387429475784302,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}),
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4510968923568726,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}),
"lstm_cell_bias_clip"),
1.3659683465957642}))
.testcaseName("lstm_cell_bias_clip"),
};
return params;
}
@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip,
testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName);
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsV1() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
.testcaseName("lstm_cell_v1_zero_bias_default_attrs")
};
return params;
}
std::vector<LSTMCellParams> generateCombinedParamsV1() {
const std::vector<std::vector<LSTMCellParams>> generatedParams {
generateParamsV1<element::Type_t::bf16>(),
generateParamsV1<element::Type_t::f16>(),
generateParamsV1<element::Type_t::f32>(),
generateParamsV1<element::Type_t::f64>(),
};
std::vector<LSTMCellParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrsV1() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82509011030197144,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4578367471694946,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}))
.testcaseName("lstm_cell_v1_bias_default_attrs"),
};
return params;
}
std::vector<LSTMCellParams> generateCombinedParamsBiasDefaultAttrsV1() {
const std::vector<std::vector<LSTMCellParams>> generatedParams {
generateParamsBiasDefaultAttrsV1<element::Type_t::bf16>(),
generateParamsBiasDefaultAttrsV1<element::Type_t::f16>(),
generateParamsBiasDefaultAttrsV1<element::Type_t::f32>(),
generateParamsBiasDefaultAttrsV1<element::Type_t::f64>(),
};
std::vector<LSTMCellParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
template <element::Type_t ET>
std::vector<LSTMCellParams> generateParamsBiasClipV1() {
using T = typename element_type_traits<ET>::value_type;
std::vector<LSTMCellParams> params {
Builder {}
.batchSize(2)
.inputSize(3)
.hiddenSize(3)
.gatesCount(4)
.X(Tensor(ET, {2, 3}, std::vector<T>{
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
1.15248052f,
1.16671345f,
0.21450312f,
1.2380678f,
1.51688835f,
0.46718366f,
0.91810346f,
1.1274234f,
0.51022074f,
1.11389844f,
0.74174305f}))
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
0.76665538549423218,
0.82387429475784302,
0.6479143500328064,
0.66586339473724365,
0.74838578701019287}))
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
1.1150213479995728,
1.4510968923568726,
1.0649888515472412,
0.93761754035949707,
1.3659683465957642}))
.testcaseName("lstm_cell_v1_bias_clip"),
};
return params;
}
std::vector<LSTMCellParams> generateCombinedParamsBiasClipV1() {
const std::vector<std::vector<LSTMCellParams>> generatedParams {
generateParamsBiasClipV1<element::Type_t::bf16>(),
generateParamsBiasClipV1<element::Type_t::f16>(),
generateParamsBiasClipV1<element::Type_t::f32>(),
generateParamsBiasClipV1<element::Type_t::f64>(),
};
std::vector<LSTMCellParams> combinedParams;
for (const auto& params : generatedParams) {
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
}
return combinedParams;
}
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test,
testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs,
testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip,
testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName);
} // namespace

View File

@ -504,21 +504,22 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
// verification of supported input
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
for (const auto &ii : _networkInputs) {
for (const auto &ii : network.getInputsInfo()) {
auto input_precision = ii.second->getPrecision();
if (input_precision != InferenceEngine::Precision::FP64 &&
input_precision != InferenceEngine::Precision::FP32 &&
input_precision != InferenceEngine::Precision::I32 &&
input_precision != InferenceEngine::Precision::U32 &&
input_precision != InferenceEngine::Precision::U16 &&
input_precision != InferenceEngine::Precision::I16 &&
input_precision != InferenceEngine::Precision::I8 &&
input_precision != InferenceEngine::Precision::U8 &&
input_precision != InferenceEngine::Precision::BF16 &&
input_precision != InferenceEngine::Precision::BOOL &&
input_precision != InferenceEngine::Precision::I64 &&
input_precision != InferenceEngine::Precision::U64) {
using hash_t = std::hash<typename std::underlying_type<Precision::ePrecision>::type>;
static const std::unordered_set<Precision::ePrecision, hash_t> supported_precisions = {
Precision::U8, Precision::I8,
Precision::U16, Precision::I16,
Precision::U32, Precision::I32,
Precision::U64, Precision::I64,
Precision::BF16, Precision::FP16,
Precision::FP32, Precision::FP64,
Precision::BOOL
};
if (!supported_precisions.count(input_precision)) {
IE_THROW(NotImplemented)
<< "Input image format " << input_precision << " is not supported yet...";
}

View File

@ -4,27 +4,208 @@
#include "cpu_convert.h"
#include "cpu_memcpy.h"
#include "utils/bfloat16.hpp"
#include <utils/bfloat16.hpp>
#include <utils/general_utils.h>
#include <mkldnn_selective_build.h>
#include <ie_parallel.hpp>
#include <openvino/core/type/float16.hpp>
#include <cpu/x64/jit_generator.hpp>
#include <algorithm>
#include <type_traits>
#include <tuple>
#include <ie_parallel.hpp>
#include <cmath>
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using namespace dnnl::impl::cpu::x64;
using namespace dnnl::impl::utils;
using namespace Xbyak;
namespace {
template<typename srcType, typename dstType>
void convert(const void *srcPtr, void *dstPtr, const size_t size) {
if (std::is_same<srcType, dstType>::value) {
cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType));
} else {
const srcType *srcData = reinterpret_cast<const srcType *>(srcPtr);
dstType *dstData = reinterpret_cast<dstType *>(dstPtr);
template <typename src_t, typename dst_t>
void convert_vec(jit_generator & gen,
const RegExp & src,
const RegExp & dst);
parallel_for(size, [&](size_t i) {
dstData[i] = static_cast<dstType>(srcData[i]);
template <>
void convert_vec<ov::float16, float>(jit_generator & gen,
const RegExp & src,
const RegExp & dst) {
auto const & f16vec = gen.xmm3;
auto const & f32vec = gen.ymm4;
gen.movdqu(f16vec, gen.xword[src]);
gen.vcvtph2ps(f32vec, f16vec);
gen.vmovups(gen.yword[dst], f32vec);
}
template <>
void convert_vec<float, ov::float16>(jit_generator & gen,
const RegExp & src,
const RegExp & dst) {
auto const & f16vec = gen.xmm3;
auto const & f32vec = gen.ymm4;
gen.vmovups(f32vec, gen.yword[src]);
gen.vcvtps2ph(f16vec, f32vec, 0);
gen.movdqu(gen.xword[dst], f16vec);
}
class jit_convert_array : public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
void generate() override {
const size_t vlen = 8u;
const size_t vlen_log2 = 3;
auto reg_src = rax;
auto reg_dst = rbx;
auto reg_sz = rdx;
Label tail, exit;
preamble();
mov(reg_src, ptr[param1 + offsetof(args_t, src)]);
mov(reg_dst, ptr[param1 + offsetof(args_t, out)]);
mov(reg_sz, ptr[param1 + offsetof(args_t, count)]);
xor_(rsi, rsi);
mov(r8, reg_sz);
shr(r8, vlen_log2);
foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) {
_convert_vec(*this, reg_src, reg_dst);
add(reg_src, _src_size * vlen);
add(reg_dst, _dst_size * vlen);
});
L(tail);
shl(rsi, vlen_log2);
sub(reg_sz, rsi);
test(reg_sz, reg_sz);
jz(exit);
// allocate array for 8 floats on stack
sub(rsp, vlen * sizeof(float));
mov(r8, rsp);
vpxor(ymm4, ymm4, ymm4);
vmovups(yword[r8], ymm4);
// Tail conversion
copy(r8, reg_src, reg_sz, _src_size);
_convert_vec(*this, r8, r8);
copy(reg_dst, r8, reg_sz, _dst_size);
// Free the array on stack
add(rsp, vlen * sizeof(float));
L(exit);
postamble();
}
void foreach(const Xbyak::Reg64& idx,
size_t step,
const Xbyak::Reg64& end,
std::function<void(const Xbyak::Reg64&)> && fn) {
Label loop, exit;
L(loop);
cmp(idx, end);
jge(exit);
fn(idx);
add(idx, step);
jmp(loop);
L(exit);
}
void copy(const Xbyak::Reg64& dst,
const Xbyak::Reg64& src,
const Xbyak::Reg64& size,
size_t item_size) {
push(rsi);
push(r15);
xor_(rsi, rsi);
auto address_frame = [this](size_t size) -> const AddressFrame& {
switch (size) {
case 1: return byte;
case 2: return word;
case 4: return dword;
case 8: return qword;
default:
break;
}
return ptr;
};
const auto & addr_frame = address_frame(item_size);
foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) {
mov(r15, addr_frame[src + idx * item_size]);
mov(addr_frame[dst + idx * item_size], r15);
});
pop(r15);
pop(rsi);
}
public:
typedef struct {
const void* src;
void* out;
const size_t count;
} args_t;
typedef void (*fn_t)(const args_t*);
typedef void (*convert_vec_t)(jit_generator &,
const RegExp &,
const RegExp &);
jit_convert_array(convert_vec_t convert_vec,
size_t src_size,
size_t dst_size)
: _convert_vec(convert_vec)
, _src_size(src_size)
, _dst_size(dst_size) {}
template<typename src_t, typename dst_t>
static fn_t get() {
if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) {
static jit_convert_array converter(convert_vec<src_t, dst_t>, sizeof(src_t), sizeof(dst_t));
auto & generator = static_cast<jit_generator&>(converter);
generator.create_kernel();
return (fn_t)generator.jit_ker();
}
return nullptr;
}
private:
convert_vec_t _convert_vec;
size_t _src_size;
size_t _dst_size;
};
template <typename TI, typename TO>
void jit_convert(const TI* arg, TO* out, size_t count) {
using jit_impl = jit_convert_array;
static auto converter = jit_impl::get<TI, TO>();
if (converter) {
typename jit_impl::args_t args = { arg, out, count };
converter(&args);
} else {
for (size_t i = 0; i < count; ++i) {
out[i] = static_cast<TO>(arg[i]);
}
}
}
@ -35,84 +216,391 @@ struct PrecisionInfo {
template <>
struct PrecisionInfo<Precision::BF16> {
using value_type = MKLDNNPlugin::bfloat16_t;
using value_type = bfloat16_t;
};
template <>
struct PrecisionInfo<Precision::FP16> {
using value_type = ov::float16;
};
template <>
struct PrecisionInfo<Precision::BOOL> {
using value_type = uint8_t;
};
template<typename T,
typename U = typename std::conditional<
std::is_same<ov::float16, T>::value
|| std::is_same<bfloat16_t, T>::value,
float, T>::type>
struct Range {
const std::tuple<U, U> & fit(const Precision & prec);
private:
std::tuple<U, U> _range {
std::numeric_limits<T>::lowest(),
std::numeric_limits<T>::max()
};
};
template<typename T, typename U>
const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
if (prec.is_float()) {
double lbound, ubound;
switch (prec) {
case Precision::BF16:
lbound = static_cast<double>(std::numeric_limits<bfloat16_t>::lowest());
ubound = static_cast<double>(std::numeric_limits<bfloat16_t>::max());
break;
case Precision::FP16:
lbound = static_cast<double>(std::numeric_limits<ov::float16>::lowest());
ubound = static_cast<double>(std::numeric_limits<ov::float16>::max());
break;
case Precision::FP32:
lbound = static_cast<double>(std::numeric_limits<float>::lowest());
ubound = static_cast<double>(std::numeric_limits<float>::max());
break;
case Precision::FP64:
lbound = std::numeric_limits<double>::lowest();
ubound = std::numeric_limits<double>::max();
break;
default:
IE_THROW() << "Unsupported precision";
}
std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
} else {
int64_t lbound;
uint64_t ubound;
switch (prec) {
case Precision::BOOL:
case Precision::U8:
lbound = static_cast<int64_t>(std::numeric_limits<uint8_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
break;
case Precision::I8:
lbound = static_cast<int64_t>(std::numeric_limits<int8_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int8_t>::max());
break;
case Precision::U16:
lbound = static_cast<int64_t>(std::numeric_limits<uint16_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
break;
case Precision::I16:
lbound = static_cast<int64_t>(std::numeric_limits<int16_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int16_t>::max());
break;
case Precision::U32:
lbound = static_cast<int64_t>(std::numeric_limits<uint32_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
break;
case Precision::I32:
lbound = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
break;
case Precision::U64:
lbound = static_cast<int64_t>(std::numeric_limits<uint64_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
break;
case Precision::I64:
lbound = static_cast<int64_t>(std::numeric_limits<int64_t>::lowest());
ubound = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
break;
default:
IE_THROW() << "Unsupported precision";
}
using ltype = typename std::conditional<
std::is_floating_point<U>::value,
double, int64_t>::type;
using utype = typename std::conditional<
std::is_floating_point<U>::value,
double, uint64_t>::type;
std::get<0>(_range) = static_cast<U>(std::max(static_cast<ltype>(std::get<0>(_range)), static_cast<ltype>(lbound)));
std::get<1>(_range) = static_cast<U>(std::min(static_cast<utype>(std::get<1>(_range)), static_cast<utype>(ubound)));
}
return _range;
}
struct ConvertContext {
const void *srcPtr;
void *dstPtr;
size_t size;
Precision interimPrc;
Precision dstPrc;
bool converted;
template<typename T>
std::tuple<T, T> range() const {
Range<T> r;
r.fit(interimPrc);
return r.fit(dstPrc);
}
};
template<typename T>
struct ConvertPrecision {
using src_t = typename std::tuple_element<0, T>::type;
using dst_t = typename std::tuple_element<1, T>::type;
struct ConvertPrecision;
template<typename src_t, typename dst_t>
struct ConvertPrecision<std::tuple<src_t, dst_t>> {
void operator()(ConvertContext & ctx) {
convert<src_t, dst_t>(ctx.srcPtr, ctx.dstPtr, ctx.size);
auto src = static_cast<const src_t *>(ctx.srcPtr);
auto dst = static_cast<dst_t *>(ctx.dstPtr);
src_t lbound, ubound;
std::tie(lbound, ubound) = ctx.range<src_t>();
if (std::is_integral<src_t>::value
|| ctx.interimPrc.is_float()
|| std::is_integral<dst_t>::value) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<dst_t>(std::max(std::min(src[i], ubound), lbound));
});
} else {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<dst_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
});
}
ctx.converted = true;
}
};
template<>
struct ConvertPrecision<std::tuple<float, bfloat16_t>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const float *>(ctx.srcPtr);
auto dst = static_cast<bfloat16_t *>(ctx.dstPtr);
if (ctx.interimPrc.is_float()) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<bfloat16_t>(src[i]);
});
} else {
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<float>();
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<bfloat16_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
});
}
ctx.converted = true;
}
};
template<>
struct ConvertPrecision<std::tuple<bfloat16_t, float>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const bfloat16_t *>(ctx.srcPtr);
auto dst = static_cast<float *>(ctx.dstPtr);
if (ctx.interimPrc.is_float()) {
parallel_for(ctx.size, [&](size_t i) {
dst[i] = static_cast<float>(src[i]);
});
} else {
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<bfloat16_t>();
parallel_for(ctx.size, [&](size_t i) {
dst[i] = std::trunc(std::max(std::min(static_cast<float>(src[i]), ubound), lbound));
});
}
ctx.converted = true;
}
};
template<typename src_t>
struct ConvertPrecision<std::tuple<src_t, ov::float16>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const src_t *>(ctx.srcPtr);
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
constexpr size_t batch = 64;
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
typedef float batch_type[batch];
src_t lbound, ubound;
std::tie(lbound, ubound) = ctx.range<src_t>();
if (std::is_integral<src_t>::value
|| ctx.interimPrc.is_float()) {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
tmp[j] = static_cast<float>(std::max(std::min(src[offset + j], ubound), lbound));
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
});
} else {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
tmp[j] = static_cast<float>(std::trunc(std::max(std::min(src[offset + j], ubound), lbound)));
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
});
}
ctx.converted = true;
}
};
template<typename dst_t>
struct ConvertPrecision<std::tuple<ov::float16, dst_t>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
auto dst = static_cast<dst_t *>(ctx.dstPtr);
constexpr size_t batch = 64;
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
typedef float batch_type[batch];
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<ov::float16>();
if (ctx.interimPrc.is_float()
|| std::is_integral<dst_t>::value) {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
dst[offset + j] = static_cast<dst_t>(std::max(std::min(tmp[j], ubound), lbound));
});
} else {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
dst[offset + j] = static_cast<dst_t>(std::trunc(std::max(std::min(tmp[j], ubound), lbound)));
});
}
ctx.converted = true;
}
};
template<>
struct ConvertPrecision<std::tuple<ov::float16, ov::float16>> {
void operator()(ConvertContext & ctx) {
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
constexpr size_t batch = 64;
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
typedef float batch_type[batch];
float lbound, ubound;
std::tie(lbound, ubound) = ctx.range<ov::float16>();
if (ctx.interimPrc.is_float()) {
cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16));
} else {
parallel_for(iterations, [&](size_t i) {
batch_type tmp;
const size_t offset = i * batch;
const size_t current_batch_size = std::min(ctx.size - offset, batch);
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
for (size_t j = 0; j < current_batch_size; ++j) // truncate fp32
tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound));
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
});
}
ctx.converted = true;
}
};
bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
return to.bitsSize() < from.bitsSize()
|| (from.is_float() && !to.is_float()) // float -> integral
|| (from.isSigned() != to.isSigned()) // signed <-> unsigned
|| (to == Precision::BOOL && from != to); // T -> bool
}
} // namespace
#define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
using namespace MKLDNNPlugin;
#define MKLDNN_CVT_LIST \
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \
MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \
MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \
MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \
MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \
MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \
MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \
MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \
MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \
MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \
MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \
MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \
MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \
MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \
MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \
MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \
MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \
MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \
MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \
MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \
MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \
MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \
MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \
MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \
MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
MKLDNN_CVT(BOOL, BOOL)
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
}
void cpu_convert(const void *srcPtr,
void *dstPtr,
InferenceEngine::Precision srcPrc,
InferenceEngine::Precision interimPrc,
InferenceEngine::Precision dstPrc,
const size_t size) {
if (srcPtr == nullptr || dstPtr == nullptr)
IE_THROW() << "cpu_convert has null data pointer";
if (srcPrc == dstPrc) {
if (srcPrc == dstPrc && srcPrc == interimPrc) {
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
return;
}
ConvertContext ctx = { srcPtr, dstPtr, size, false };
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc),
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16),
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64),
MKLDNN_CVT(U8, FP32), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, BOOL),
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16),
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64),
MKLDNN_CVT(I8, FP32), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, BOOL),
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16),
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64),
MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, BOOL),
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16),
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64),
MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, BOOL),
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16),
MKLDNN_CVT(I32, I16), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64),
MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, BOOL),
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16),
MKLDNN_CVT(U64, I16), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64),
MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, BOOL),
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16),
MKLDNN_CVT(I64, I16), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64),
MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, BOOL),
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16),
MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64),
MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL),
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16),
MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64),
MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL),
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16),
MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64),
MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16),
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16),
MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64),
MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL),
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16),
MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64),
MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL));
} else {
ConvertContext ctx = {
srcPtr,
dstPtr,
size,
interimPrc,
dstPrc,
false
};
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
if (!ctx.converted)
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
}
}
#undef MKLDNN_CVT
#undef MKLDNN_CVT_LIST

View File

@ -19,5 +19,32 @@
* number of elements in buffers to be converted
* @return none.
*/
void cpu_convert(const void *srcPtr,
void *dstPtr,
InferenceEngine::Precision srcPrc,
InferenceEngine::Precision dstPrc,
const size_t size);
void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size);
/**
* @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
* If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
* @param srcPtr
* pointer to the buffer to convert from
* @param dstPtr
* pointer to the buffer to convert to
* @param srcPrc
* precision the buffer from which convert
* @param interimPrc
* intermediate precision used for type truncation
* @param dstPrc
* precision the buffer to which convert
* @param size
* number of elements in buffers to be converted
* @return none.
*/
void cpu_convert(const void *srcPtr,
void *dstPtr,
InferenceEngine::Precision srcPrc,
InferenceEngine::Precision interimPrc,
InferenceEngine::Precision dstPrc,
const size_t size);

View File

@ -7,7 +7,8 @@
#include "common/cpu_convert.h"
#include "common/blocked_desc_creator.h"
#include <ngraph/opsets/opset1.hpp>
#include "utils/ngraph_utils.hpp"
#include <ie_ngraph_utils.hpp>
#include <utils/ngraph_utils.hpp>
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
return true;
}
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(op, eng, cache) {
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(op, eng, cache) {
std::string errorMessage;
if (isSupportedOperation(op, errorMessage)) {
errorPrefix = "Convert node with name '" + getName() + "'";
} else {
IE_THROW(NotImplemented) << errorMessage;
}
auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
origPrc = details::convertPrecision(convert->get_destination_type());
}
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
@ -42,7 +46,8 @@ std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode("Convert", nodeName, eng, cache) {
: MKLDNNNode("Convert", nodeName, eng, cache)
, origPrc(outPrc) {
inputShapes.push_back(shape);
addOriginalInputPrecision(inPrc);
outputShapes.push_back(shape);
@ -147,7 +152,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
void* srcPtr = parentMem.GetPtr();
void* dstPtr = childMem.GetPtr();
cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount);
cpu_convert(srcPtr,
dstPtr,
parentMem.getDesc().getPrecision(),
origPrc,
childMem.getDesc().getPrecision(),
parentPaddElemCount);
}
bool MKLDNNConvertNode::created() const {

View File

@ -49,6 +49,7 @@ public:
private:
MemoryDescPtr input;
MemoryDescPtr output;
InferenceEngine::Precision origPrc;
std::string errorPrefix;
};

View File

@ -101,16 +101,9 @@ tests_expected_to_fail = [
(
xfail_issue_FLOAT_LIKE,
"OnnxBackendNodeModelTest.test_cast_BFLOAT16_to_FLOAT_cpu",
"OnnxBackendNodeModelTest.test_cast_FLOAT16_to_DOUBLE_cpu",
"OnnxBackendNodeModelTest.test_cast_FLOAT16_to_FLOAT_cpu",
"OnnxBackendNodeModelTest.test_cast_FLOAT_to_BFLOAT16_cpu",
"OnnxBackendNodeModelTest.test_castlike_BFLOAT16_to_FLOAT_expanded_cpu",
"OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_DOUBLE_expanded_cpu",
"OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_FLOAT_expanded_cpu",
"OnnxBackendNodeModelTest.test_castlike_FLOAT_to_BFLOAT16_expanded_cpu",
"OnnxBackendNodeModelTest.test_max_float16_cpu",
"OnnxBackendNodeModelTest.test_min_float16_cpu",
"OnnxBackendNodeModelTest.test_mod_mixed_sign_float16_cpu",
),
(
xfail_issue_49207,

View File

@ -171,12 +171,6 @@ if len(zoo_models) > 0:
test_cases = backend_test.test_cases["OnnxBackendModelExecutionTest"]
if tests.MODEL_ZOO_XFAIL:
execution_xfail_list = [
# New Python API - fp16 blob
(xfail_issue_67415, "test_MSFT_opset7_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"),
(xfail_issue_67415, "test_MSFT_opset7_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"),
(xfail_issue_67415, "test_MSFT_opset8_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"),
(xfail_issue_67415, "test_MSFT_opset8_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"),
# ONNX Model Zoo
(xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_encoder_12_t5_encoder_cpu"),
(xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_decoder_with_lm_head_12_t5_decoder_with_lm_head_cpu"),

View File

@ -309,16 +309,15 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
OPENVINO_ASSERT(outputsInfo.size() == function->get_output_size());
for (const auto& param : function->get_parameters()) {
auto new_param = param->copy_with_new_inputs({});
auto new_param = ov::as_type_ptr<ov::op::v0::Parameter>(param->copy_with_new_inputs({}));
new_param->set_friendly_name(param->get_friendly_name());
if (add_operation_names)
new_param->output(0).get_tensor().add_names({new_param->get_friendly_name()});
// WA: use CNNNetwork's precisions since plugins sometimes override their precisions
// after transformation pipeline is run
new_param->set_output_type(
0,
InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()),
new_param->get_output_partial_shape(0));
new_param->set_element_type(
InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()));
new_param->validate_and_infer_types();
const_params.emplace_back(new_param);
}
for (const auto& result : function->get_results()) {
@ -326,10 +325,9 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
result->get_output_partial_shape(0));
const std::string param_name = ngraph::op::util::create_ie_output_name(result->input_value(0));
fake_param->set_friendly_name(param_name);
fake_param->set_output_type(
0,
InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()),
fake_param->get_output_partial_shape(0));
fake_param->set_element_type(
InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()));
fake_param->validate_and_infer_types();
auto new_result = result->copy_with_new_inputs({fake_param});
new_result->set_friendly_name(result->get_friendly_name());
if (add_operation_names) {

View File

@ -8,8 +8,15 @@
using namespace BehaviorTestsDefinitions;
using namespace InferenceEngine;
const std::vector<Precision> precisionSet = {Precision::FP32, Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32, Precision::BOOL,
Precision::I64, Precision::U64};
const std::vector<Precision> precisionSet = {
Precision::U8, Precision::I8,
Precision::U16, Precision::I16,
Precision::U32, Precision::I32,
Precision::U64, Precision::I64,
Precision::BF16, Precision::FP16,
Precision::FP32, Precision::FP64,
Precision::BOOL
};
const std::vector<setType> typeSet = {setType::INPUT, setType::OUTPUT, setType::BOTH};

View File

@ -18,24 +18,26 @@ const std::vector<ngraph::helpers::ConversionTypes> conversionOpTypes = {
const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
const std::vector<InferenceEngine::Precision> netPrecisions = {
// Ticket: 59594
// InferenceEngine::Precision::I4,
InferenceEngine::Precision::I8,
InferenceEngine::Precision::I16,
InferenceEngine::Precision::I32,
InferenceEngine::Precision::I64,
// Ticket: 59594
// InferenceEngine::Precision::BIN,
// InferenceEngine::Precision::BOOL,
// InferenceEngine::Precision::U4,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::I8,
InferenceEngine::Precision::U16,
// Ticket: 59594
// InferenceEngine::Precision::U32,
InferenceEngine::Precision::I16,
InferenceEngine::Precision::U32,
InferenceEngine::Precision::I32,
InferenceEngine::Precision::U64,
InferenceEngine::Precision::I64,
InferenceEngine::Precision::BF16,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::FP32};
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP64,
InferenceEngine::Precision::BOOL,
InferenceEngine::Precision::MIXED,
InferenceEngine::Precision::Q78,
InferenceEngine::Precision::U4,
InferenceEngine::Precision::I4,
InferenceEngine::Precision::BIN,
InferenceEngine::Precision::CUSTOM,
};
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest,
ConversionLayerTest,

View File

@ -104,17 +104,6 @@ std::vector<std::string> disabledTestPatterns() {
// CPU plugin does not support some precisions
R"(smoke_CachingSupportCase_CPU/LoadNetworkCacheTestBase.CompareWithRefImpl/ReadConcatSplitAssign_f32_batch1_CPU)",
// CPU plugin does not support some precisions
R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(i8|u32).*)",
R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(f16).*)",
R"(.*EltwiseLayerTest.*NetType=f16.*)",
// TODO: CVS-66526 overrides i/o precisions in execution graph
// as WA we used GetInputsInfo() precisions instead of ngraph ones
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i16|u16).*)",
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i64|u64).*)",
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i16|u16).*)",
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i64|u64).*)",
// CPU does not support dynamic rank
// Issue: CVS-66778
@ -168,7 +157,18 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*CTCLossLayerCPUTest.*ctcMergeRepeated=1.*)",
// Issue: 71756
R"(.*Deconv_.*D_(Blocked|DW|1x1)_.*DeconvolutionLayerCPUTest\.CompareWithRefs.*inFmts=(nChw16c|nCdhw16c)_outFmts=(nChw16c|nCdhw16c)_primitive=jit_avx512_.*Fused=Multiply\(PerChannel\)\.Add\(PerChannel\).*)",
R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)"
R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)",
// Issue: 72150
R"(.*smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=.*_Device=CPU_PrecisionInNet=BOOL.*)",
// Issue: 59594
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BOOL.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*MIXED.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*Q78.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*U4.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*I4.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BIN.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*CUSTOM.*)",
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*UNSPECIFIED.*)",
};
#define FIX_62820 0

View File

@ -108,11 +108,15 @@ TEST_P(OVExecGraphImportExportTest, importExportedFunction) {
importedExecNet.input(0).get_tensor().get_partial_shape());
EXPECT_EQ(function->input(0).get_tensor().get_element_type(),
importedExecNet.input(0).get_tensor().get_element_type());
EXPECT_EQ(function->input(0).get_element_type(),
importedExecNet.input(0).get_tensor().get_element_type());
EXPECT_EQ(function->input(1).get_tensor().get_names(), importedExecNet.input(1).get_tensor().get_names());
EXPECT_EQ(function->input(1).get_tensor().get_partial_shape(),
importedExecNet.input(1).get_tensor().get_partial_shape());
EXPECT_EQ(function->input(1).get_tensor().get_element_type(),
importedExecNet.input(1).get_tensor().get_element_type());
EXPECT_EQ(function->input(1).get_element_type(),
importedExecNet.input(1).get_tensor().get_element_type());
EXPECT_EQ(importedExecNet.input(0).get_node(), importedExecNet.input("data1").get_node());
EXPECT_NE(importedExecNet.input(1).get_node(), importedExecNet.input("data1").get_node());
EXPECT_EQ(importedExecNet.input(1).get_node(), importedExecNet.input("data2").get_node());
@ -125,11 +129,15 @@ TEST_P(OVExecGraphImportExportTest, importExportedFunction) {
importedExecNet.output(0).get_tensor().get_partial_shape());
EXPECT_EQ(function->output(0).get_tensor().get_element_type(),
importedExecNet.output(0).get_tensor().get_element_type());
EXPECT_EQ(function->output(0).get_element_type(),
importedExecNet.output(0).get_tensor().get_element_type());
EXPECT_EQ(function->output(1).get_tensor().get_names(), importedExecNet.output(1).get_tensor().get_names());
EXPECT_EQ(function->output(1).get_tensor().get_partial_shape(),
importedExecNet.output(1).get_tensor().get_partial_shape());
EXPECT_EQ(function->output(1).get_tensor().get_element_type(),
importedExecNet.output(1).get_tensor().get_element_type());
EXPECT_EQ(function->output(1).get_element_type(),
importedExecNet.output(1).get_tensor().get_element_type());
EXPECT_EQ(importedExecNet.output(0).get_node(), importedExecNet.output("relu").get_node());
EXPECT_NE(importedExecNet.output(1).get_node(), importedExecNet.output("relu").get_node());
EXPECT_EQ(importedExecNet.output(1).get_node(), importedExecNet.output("concat").get_node());

View File

@ -43,15 +43,19 @@ std::string SetBlobTest::getTestCaseName(testing::TestParamInfo<SetBlobParams> o
inline void fillBlob(Blob::Ptr &blob) {
switch (blob->getTensorDesc().getPrecision()) {
#define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob); break;
CASE(InferenceEngine::Precision::FP32)
CASE(InferenceEngine::Precision::U8)
CASE(InferenceEngine::Precision::U16)
CASE(InferenceEngine::Precision::I8)
CASE(InferenceEngine::Precision::I16)
CASE(InferenceEngine::Precision::I64)
CASE(InferenceEngine::Precision::U64)
CASE(InferenceEngine::Precision::I32)
CASE(InferenceEngine::Precision::BOOL)
CASE(Precision::U8)
CASE(Precision::I8)
CASE(Precision::U16)
CASE(Precision::I16)
CASE(Precision::U32)
CASE(Precision::I32)
CASE(Precision::U64)
CASE(Precision::I64)
CASE(Precision::BF16)
CASE(Precision::FP16)
CASE(Precision::FP32)
CASE(Precision::FP64)
CASE(Precision::BOOL)
#undef CASE
default:
IE_THROW() << "Can't fill blob with precision: " << blob->getTensorDesc().getPrecision();

View File

@ -131,53 +131,53 @@ inline void callCompare(const std::pair<ngraph::element::Type, std::vector<std::
const T_IE* actualBuffer, size_t size, float threshold, float abs_threshold) {
auto expectedBuffer = expected.second.data();
switch (expected.first) {
case ngraph::element::Type_t::i64:
LayerTestsCommon::Compare<T_IE, int64_t>(reinterpret_cast<const int64_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::i32:
LayerTestsCommon::Compare<T_IE, int32_t>(reinterpret_cast<const int32_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::i16:
LayerTestsCommon::Compare<T_IE, int16_t>(reinterpret_cast<const int16_t *>(expectedBuffer),
case ngraph::element::Type_t::boolean:
case ngraph::element::Type_t::u8:
LayerTestsCommon::Compare<T_IE, uint8_t>(reinterpret_cast<const uint8_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::i8:
LayerTestsCommon::Compare<T_IE, int8_t>(reinterpret_cast<const int8_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::u64:
LayerTestsCommon::Compare<T_IE, uint64_t>(reinterpret_cast<const uint64_t *>(expectedBuffer),
case ngraph::element::Type_t::u16:
LayerTestsCommon::Compare<T_IE, uint16_t>(reinterpret_cast<const uint16_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::i16:
LayerTestsCommon::Compare<T_IE, int16_t>(reinterpret_cast<const int16_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::u32:
LayerTestsCommon::Compare<T_IE, uint32_t>(reinterpret_cast<const uint32_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::u16:
LayerTestsCommon::Compare<T_IE, uint16_t>(reinterpret_cast<const uint16_t *>(expectedBuffer),
case ngraph::element::Type_t::i32:
LayerTestsCommon::Compare<T_IE, int32_t>(reinterpret_cast<const int32_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::boolean:
case ngraph::element::Type_t::u8:
LayerTestsCommon::Compare<T_IE, uint8_t>(reinterpret_cast<const uint8_t *>(expectedBuffer),
case ngraph::element::Type_t::u64:
LayerTestsCommon::Compare<T_IE, uint64_t>(reinterpret_cast<const uint64_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::f64:
LayerTestsCommon::Compare<T_IE, double>(reinterpret_cast<const double *>(expectedBuffer),
case ngraph::element::Type_t::i64:
LayerTestsCommon::Compare<T_IE, int64_t>(reinterpret_cast<const int64_t *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::f32:
LayerTestsCommon::Compare<T_IE, float>(reinterpret_cast<const float *>(expectedBuffer),
case ngraph::element::Type_t::bf16:
LayerTestsCommon::Compare<T_IE, ngraph::bfloat16>(reinterpret_cast<const ngraph::bfloat16 *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::f16:
LayerTestsCommon::Compare<T_IE, ngraph::float16>(reinterpret_cast<const ngraph::float16 *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::bf16:
LayerTestsCommon::Compare<T_IE, ngraph::bfloat16>(reinterpret_cast<const ngraph::bfloat16 *>(expectedBuffer),
case ngraph::element::Type_t::f32:
LayerTestsCommon::Compare<T_IE, float>(reinterpret_cast<const float *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::f64:
LayerTestsCommon::Compare<T_IE, double>(reinterpret_cast<const double *>(expectedBuffer),
actualBuffer, size, threshold, abs_threshold);
break;
case ngraph::element::Type_t::i4: {
@ -230,14 +230,9 @@ void LayerTestsCommon::Compare(const std::pair<ngraph::element::Type, std::vecto
const auto &size = actual->size();
switch (precision) {
case InferenceEngine::Precision::FP32:
callCompare<float>(expected, reinterpret_cast<const float *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::I32:
callCompare<int32_t>(expected, reinterpret_cast<const int32_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::I64:
callCompare<int64_t>(expected, reinterpret_cast<const int64_t *>(actualBuffer), size, threshold, abs_threshold);
case InferenceEngine::Precision::BOOL:
case InferenceEngine::Precision::U8:
callCompare<uint8_t>(expected, reinterpret_cast<const uint8_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::I8:
callCompare<int8_t>(expected, reinterpret_cast<const int8_t *>(actualBuffer), size, threshold, abs_threshold);
@ -248,19 +243,30 @@ void LayerTestsCommon::Compare(const std::pair<ngraph::element::Type, std::vecto
case InferenceEngine::Precision::I16:
callCompare<int16_t>(expected, reinterpret_cast<const int16_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::BOOL:
case InferenceEngine::Precision::U8:
callCompare<uint8_t>(expected, reinterpret_cast<const uint8_t *>(actualBuffer), size, threshold, abs_threshold);
case InferenceEngine::Precision::U32:
callCompare<uint32_t>(expected, reinterpret_cast<const uint32_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::I32:
callCompare<int32_t>(expected, reinterpret_cast<const int32_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::U64:
callCompare<uint64_t>(expected, reinterpret_cast<const uint64_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::I64:
callCompare<int64_t>(expected, reinterpret_cast<const int64_t *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::BF16:
callCompare<ngraph::bfloat16>(expected, reinterpret_cast<const ngraph::bfloat16 *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::FP16:
callCompare<ngraph::float16>(expected, reinterpret_cast<const ngraph::float16 *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::FP32:
callCompare<float>(expected, reinterpret_cast<const float *>(actualBuffer), size, threshold, abs_threshold);
break;
case InferenceEngine::Precision::FP64:
callCompare<double>(expected, reinterpret_cast<const double *>(actualBuffer), size, threshold, abs_threshold);
break;
default:
FAIL() << "Comparator for " << precision << " precision isn't supported";
}

View File

@ -28,6 +28,9 @@ std::string ConversionLayerTest::getTestCaseName(const testing::TestParamInfo<Co
}
void ConversionLayerTest::SetUp() {
if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) {
GTEST_SKIP() << "Disabled test due to configuration" << std::endl;
}
ngraph::helpers::ConversionTypes conversionOpType;
InferenceEngine::Precision inputPrecision, targetPrecision;
std::vector<std::vector<size_t>> inputShape;

View File

@ -28,7 +28,7 @@ OMZ_DEFINITIONS_PATH = LIBS_ROOT/'open_model_zoo'/'data'/'dataset_definitions.ym
sys.path.append(str(OMZ_DOWNLOADER_PATH / 'src'))
# pylint: disable=E0611,C0413,C0411,E0401
importlib.reload(openvino)
from openvino.model_zoo._configuration import load_models
from openvino.model_zoo._configuration import load_models, ModelLoadingMode
from openvino.model_zoo._common import MODEL_ROOT
is_platform_windows = sys.platform.startswith('win')
@ -83,7 +83,7 @@ def convert(config):
def get_models_list():
return load_models(MODEL_ROOT, Dict(config=None))
return load_models(MODEL_ROOT, Dict(config=None), mode=ModelLoadingMode.ignore_composite)
def download_engine_config(model_name):

@ -1 +1 @@
Subproject commit b7c83530a470734e3e4bde8fe0d80dcc2d9e9b2c
Subproject commit a04512d8553aed4b7bde5032141f28bfd26f6f5c