Merge remote-tracking branch 'upstream/master' into add_mxnet_operations
This commit is contained in:
commit
c6ab942ada
@ -38,17 +38,10 @@ target_include_directories(interpreter_backend PUBLIC $<BUILD_INTERFACE:${CMAKE_
|
||||
file(GLOB_RECURSE all_backends_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp")
|
||||
add_clang_format_target(interpreter_backend_clang FOR_SOURCES ${all_backends_src})
|
||||
|
||||
|
||||
# developer package
|
||||
|
||||
openvino_developer_export_targets(COMPONENT core TARGETS interpreter_backend)
|
||||
|
||||
install(TARGETS interpreter_backend
|
||||
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
ARCHIVE DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
install(TARGETS interpreter_backend
|
||||
RUNTIME DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
ARCHIVE DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL
|
||||
LIBRARY DESTINATION tests COMPONENT tests OPTIONAL EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
# install
|
||||
|
||||
ov_install_static_lib(interpreter_backend template)
|
||||
|
@ -1707,7 +1707,24 @@ bool evaluate(const shared_ptr<op::v0::Log>& op, const HostTensorVector& outputs
|
||||
}
|
||||
|
||||
namespace ctc_loss_v4 {
|
||||
template <element::Type_t t1, element::Type_t t2>
|
||||
template <element::Type_t t1,
|
||||
element::Type_t t2,
|
||||
typename std::enable_if<!std::is_floating_point<typename element_type_traits<t1>::value_type>::value &&
|
||||
!std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value &&
|
||||
!std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
|
||||
bool>::type = true>
|
||||
inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
|
||||
const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) {
|
||||
OPENVINO_ASSERT(false, "The data type for logits is expected to be a floating point type. Got:", element::Type(t1));
|
||||
}
|
||||
|
||||
template <element::Type_t t1,
|
||||
element::Type_t t2,
|
||||
typename std::enable_if<std::is_floating_point<typename element_type_traits<t1>::value_type>::value ||
|
||||
std::is_same<typename element_type_traits<t1>::value_type, bfloat16>::value ||
|
||||
std::is_same<typename element_type_traits<t1>::value_type, float16>::value,
|
||||
bool>::type = true>
|
||||
inline void evaluate(const shared_ptr<op::v4::CTCLoss>& op,
|
||||
const HostTensorVector& outputs,
|
||||
const HostTensorVector& inputs) {
|
||||
@ -1944,6 +1961,30 @@ bool evaluate(const shared_ptr<op::v0::RNNCell>& op, const HostTensorVector& out
|
||||
return true;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
bool evaluate(const shared_ptr<op::v0::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
runtime::reference::lstm_cell<T>(inputs[0]->get_data_ptr<ET>(),
|
||||
inputs[0]->get_shape(),
|
||||
inputs[1]->get_data_ptr<ET>(),
|
||||
inputs[1]->get_shape(),
|
||||
inputs[2]->get_data_ptr<ET>(),
|
||||
inputs[2]->get_shape(),
|
||||
inputs[3]->get_data_ptr<ET>(),
|
||||
inputs[3]->get_shape(),
|
||||
inputs[4]->get_data_ptr<ET>(),
|
||||
inputs[4]->get_shape(),
|
||||
inputs[5]->get_data_ptr<ET>(),
|
||||
inputs[5]->get_shape(),
|
||||
outputs[0]->get_data_ptr<ET>(),
|
||||
outputs[1]->get_data_ptr<ET>(),
|
||||
op->get_activations()[0],
|
||||
op->get_activations()[1],
|
||||
op->get_activations()[2],
|
||||
op->get_clip());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
bool evaluate(const shared_ptr<op::v4::LSTMCell>& op, const HostTensorVector& outputs, const HostTensorVector& inputs) {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
|
@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0)
|
||||
NGRAPH_OP(GRN, op::v0)
|
||||
NGRAPH_OP(HardSigmoid, op::v0)
|
||||
NGRAPH_OP(LRN, ngraph::op::v0)
|
||||
NGRAPH_OP(LSTMCell, op::v0)
|
||||
NGRAPH_OP(MVN, ngraph::op::v0)
|
||||
NGRAPH_OP(NormalizeL2, op::v0)
|
||||
NGRAPH_OP(PriorBox, ngraph::op::v0)
|
||||
|
@ -37,4 +37,3 @@ set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_REL
|
||||
# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
|
||||
# POSSIBLE_PLUGINS ${TARGET_NAME})
|
||||
# [cmake:plugin]
|
||||
ov_install_static_lib(interpreter_backend tests)
|
||||
|
182
docs/template_plugin/tests/functional/op_reference/einsum.cpp
Normal file
182
docs/template_plugin/tests/functional/op_reference/einsum.cpp
Normal file
@ -0,0 +1,182 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/opsets/opset7.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
using namespace ov;
|
||||
|
||||
namespace {
|
||||
struct EinsumParams {
|
||||
std::vector<Tensor> inputs;
|
||||
std::string equation;
|
||||
Tensor expectedResult;
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
struct Builder : ParamsBuilder<EinsumParams> {
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputs);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, equation);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
|
||||
};
|
||||
|
||||
class ReferenceEinsumTest : public testing::TestWithParam<EinsumParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
function = CreateModel(params);
|
||||
for (const auto& input_tensor : params.inputs) {
|
||||
inputData.push_back(input_tensor.data);
|
||||
}
|
||||
refOutData = {params.expectedResult.data};
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<EinsumParams>& obj) {
|
||||
auto param = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "iType=" << param.inputs[0].type;
|
||||
result << "_iShape=" << param.inputs[0].shape;
|
||||
result << "_equation=" << param.equation;
|
||||
result << "_eType=" << param.expectedResult.type;
|
||||
result << "_eShape=" << param.expectedResult.shape;
|
||||
if (param.testcaseName != "") {
|
||||
result << "_=" << param.testcaseName;
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateModel(const EinsumParams& params) {
|
||||
OutputVector output_vector;
|
||||
ParameterVector param_vector;
|
||||
for (const auto& input_tensor : params.inputs) {
|
||||
auto param = std::make_shared<opset1::Parameter>(input_tensor.type, input_tensor.shape);
|
||||
output_vector.push_back(param);
|
||||
param_vector.push_back(param);
|
||||
}
|
||||
const auto einsum = std::make_shared<opset7::Einsum>(output_vector, params.equation);
|
||||
const auto f = std::make_shared<Model>(OutputVector{einsum}, param_vector);
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceEinsumTest, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<EinsumParams> generateParams() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<EinsumParams> params {
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 2}, std::vector<T>{1, 2}},
|
||||
{ET, {3, 4}, std::vector<T>{3, 4, 5, 6,
|
||||
7, 8, 9, 10,
|
||||
11, 12, 13, 14}}})
|
||||
.equation("ab,cd->abcd")
|
||||
.expectedResult({ET, {1, 2, 3, 4}, std::vector<T>{3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 6, 8, 10, 12,
|
||||
14, 16, 18, 20, 22, 24, 26, 28}})
|
||||
.testcaseName("einsum_no_reduction"),
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
|
||||
.equation("ijk->kij")
|
||||
.expectedResult({ET, {3, 1, 2}, std::vector<T>{1, 4, 2, 5, 3, 6}})
|
||||
.testcaseName("einsum_transpose"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
|
||||
.equation("ab->a")
|
||||
.expectedResult({ET, {2}, std::vector<T>{6, 15}})
|
||||
.testcaseName("einsum_reduce"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 3}, std::vector<T>{1, 2, 3, 4, 5, 6}},
|
||||
{ET, {3, 2}, std::vector<T>{1, 2, 3, 4, 5, 6}}})
|
||||
.equation("ab,bc->ac")
|
||||
.expectedResult({ET, {2, 2}, std::vector<T>{22, 28, 49, 64}})
|
||||
.testcaseName("einsum_matrix_multiplication"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 4}, std::vector<T>{1, 3, 2, 7, 5, 6, 0, 1}},
|
||||
{ET, {4, 3, 1}, std::vector<T>{1, 2, 3, 4, 5, 6, 5, 7, 3, 7, 9, 1}},
|
||||
{ET, {4, 3}, std::vector<T>{4, 3, 1, 6, 4, 2, 2, 5, 3, 1, 9, 4}}})
|
||||
.equation("ab,bcd,bc->ca")
|
||||
.expectedResult({ET, {3, 2}, std::vector<T>{145, 171, 703, 231, 85, 91}})
|
||||
.testcaseName("einsum_multiple_multiplication"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
|
||||
.equation("a...->...")
|
||||
.expectedResult({ET, {2, 3}, std::vector<T>{4, 8, 4, 8, 5, 13}})
|
||||
.testcaseName("einsum_ellipsis_one_input_reduction"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}})
|
||||
.equation("a...->...a")
|
||||
.expectedResult({ET, {2, 3, 2}, std::vector<T>{1, 3, 3, 5, 2, 2, 7, 1, 5, 0, 6, 7}})
|
||||
.testcaseName("einsum_ellipsis_one_input_transpose"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 2, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
|
||||
{ET, {1}, std::vector<T>{2}}})
|
||||
.equation("ab...,...->ab...")
|
||||
.expectedResult({ET, {2, 2, 3}, std::vector<T>{2, 6, 4, 14, 10, 12, 6, 10, 4, 2, 0, 14}})
|
||||
.testcaseName("einsum_ellipsis_mul_by_1dscalar"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 1, 4, 3}, std::vector<T>{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}},
|
||||
{ET, {3, 4, 2, 1}, std::vector<T>{3, 1, 6, 2, 3, 10, 9, 8, 2, 9, 3, 2,
|
||||
4, 2, 3, 1, 9, 1, 11, 4, 7, 2, 3, 1}}})
|
||||
.equation("a...j,j...->a...")
|
||||
.expectedResult({ET, {1, 4, 2, 4}, std::vector<T>{27, 85, 37, 66, 30, 58, 50, 8,
|
||||
37, 123, 55, 83, 16, 48, 24, 30,
|
||||
29, 83, 43, 52, 20, 92, 44, 24,
|
||||
24, 96, 48, 30, 13, 67, 31, 15}})
|
||||
.testcaseName("einsum_ellipsis_complex_mul"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {1, 3, 3}, std::vector<T>{1, 2, 3, 4, 5, 6, 7, 8, 9}}})
|
||||
.equation("kii->ki")
|
||||
.expectedResult({ET, {1, 3}, std::vector<T>{1, 5, 9}})
|
||||
.testcaseName("einsum_diagonal"),
|
||||
|
||||
Builder {}
|
||||
.inputs({{ET, {2, 3, 3, 2, 4}, std::vector<T>{4, 2, 5, 4, 5, 5, 1, 1, 3, 3, 1, 1, 2, 2, 4, 1, 3, 4,
|
||||
4, 5, 1, 3, 1, 3, 1, 4, 3, 5, 4, 4, 5, 4, 4, 5, 4, 2,
|
||||
2, 2, 3, 3, 1, 1, 4, 3, 4, 2, 2, 1, 1, 2, 3, 1, 1, 4,
|
||||
2, 3, 1, 3, 4, 2, 5, 5, 3, 4, 3, 4, 5, 4, 4, 5, 1, 3,
|
||||
4, 4, 5, 3, 1, 3, 2, 5, 3, 2, 5, 4, 4, 2, 4, 4, 1, 4,
|
||||
4, 5, 4, 4, 4, 2, 3, 3, 4, 2, 4, 2, 5, 1, 3, 2, 4, 3,
|
||||
5, 1, 2, 3, 1, 1, 2, 5, 1, 1, 2, 1, 4, 5, 3, 4, 1, 3,
|
||||
3, 1, 3, 2, 4, 5, 1, 1, 5, 4, 5, 2, 2, 3, 3, 1, 2, 4}},
|
||||
{ET, {3, 2, 1}, std::vector<T>{1, 4, 4, 5, 3, 3}}})
|
||||
.equation("abbac,bad->ad")
|
||||
.expectedResult({ET, {2, 1}, std::vector<T>{123, 129}})
|
||||
.testcaseName("einsum_diagonal_with_matmul"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<EinsumParams> generateCombinedParams() {
|
||||
const std::vector<std::vector<EinsumParams>> generatedParams {
|
||||
generateParams<element::Type_t::i32>(),
|
||||
generateParams<element::Type_t::f32>(),
|
||||
};
|
||||
std::vector<EinsumParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest,
|
||||
testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName);
|
||||
} // namespace
|
@ -0,0 +1,246 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/opsets/opset3.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
using namespace ov;
|
||||
|
||||
namespace {
|
||||
struct ExtractImagePatchesParams {
|
||||
Tensor data;
|
||||
Shape sizes;
|
||||
Strides strides;
|
||||
Shape rates;
|
||||
op::PadType autoPad;
|
||||
Tensor expectedResult;
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
struct Builder : ParamsBuilder<ExtractImagePatchesParams> {
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, data);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, sizes);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, strides);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, rates);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, autoPad);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
|
||||
};
|
||||
|
||||
class ReferenceExtractImagePatchesTest : public testing::TestWithParam<ExtractImagePatchesParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
function = CreateModel(params);
|
||||
inputData = {params.data.data};
|
||||
refOutData = {params.expectedResult.data};
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<ExtractImagePatchesParams>& obj) {
|
||||
auto param = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "dType=" << param.data.type;
|
||||
result << "_dShape=" << param.data.shape;
|
||||
result << "_sizes=" << param.sizes;
|
||||
result << "_strides=" << param.strides;
|
||||
result << "_rates=" << param.rates;
|
||||
result << "_autoPad=" << param.autoPad;
|
||||
result << "_eType=" << param.expectedResult.type;
|
||||
result << "_eShape=" << param.expectedResult.shape;
|
||||
if (param.testcaseName != "") {
|
||||
result << "_=" << param.testcaseName;
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateModel(const ExtractImagePatchesParams& params) {
|
||||
const auto data = std::make_shared<opset1::Parameter>(params.data.type, params.data.shape);
|
||||
const auto extrace_image_patches = std::make_shared<opset3::ExtractImagePatches>(data,
|
||||
params.sizes,
|
||||
params.strides,
|
||||
params.rates,
|
||||
params.autoPad);
|
||||
const auto f = std::make_shared<Model>(extrace_image_patches, ParameterVector{data});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceExtractImagePatchesTest, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<ExtractImagePatchesParams> generateParams() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<ExtractImagePatchesParams> params {
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({3, 3})
|
||||
.strides({5, 5})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
|
||||
1, 6, 51, 56,
|
||||
2, 7, 52, 57,
|
||||
3, 8, 53, 58,
|
||||
11, 16, 61, 66,
|
||||
12, 17, 62, 67,
|
||||
13, 18, 63, 68,
|
||||
21, 26, 71, 76,
|
||||
22, 27, 72, 77,
|
||||
23, 28, 73, 78}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({4, 4})
|
||||
.strides({8, 8})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 16, 1, 1}, std::vector<T>{
|
||||
1, 2, 3, 4,
|
||||
11, 12, 13, 14,
|
||||
21, 22, 23, 24,
|
||||
31, 32, 33, 34}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({4, 4})
|
||||
.strides({9, 9})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::SAME_UPPER)
|
||||
.expectedResult({ET, {1, 16, 2, 2}, std::vector<T>{
|
||||
0, 0, 0, 89,
|
||||
0, 0, 81, 90,
|
||||
0, 0, 82, 0,
|
||||
0, 0, 83, 0,
|
||||
0, 9, 0, 99,
|
||||
1, 10, 91, 100,
|
||||
2, 0, 92, 0,
|
||||
3, 0, 93, 0,
|
||||
0, 19, 0, 0,
|
||||
11, 20, 0, 0,
|
||||
12, 0, 0, 0,
|
||||
13, 0, 0, 0,
|
||||
0, 29, 0, 0,
|
||||
21, 30, 0, 0,
|
||||
22, 0, 0, 0,
|
||||
23, 0, 0, 0}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 1, 10, 10}, std::vector<T>{
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
|
||||
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
||||
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
|
||||
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
|
||||
91, 92, 93, 94, 95, 96, 97, 98, 99, 100}})
|
||||
.sizes({3, 3})
|
||||
.strides({5, 5})
|
||||
.rates({2, 2})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 9, 2, 2}, std::vector<T>{
|
||||
1, 6, 51, 56,
|
||||
3, 8, 53, 58,
|
||||
5, 10, 55, 60,
|
||||
21, 26, 71, 76,
|
||||
23, 28, 73, 78,
|
||||
25, 30, 75, 80,
|
||||
41, 46, 91, 96,
|
||||
43, 48, 93, 98,
|
||||
45, 50, 95, 100}}),
|
||||
|
||||
Builder {}
|
||||
.data({ET, {1, 2, 5, 5}, std::vector<T>{
|
||||
1, 2, 3, 4, 5,
|
||||
6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20,
|
||||
21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30,
|
||||
31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45,
|
||||
46, 47, 48, 49, 50}})
|
||||
.sizes({2, 2})
|
||||
.strides({3, 3})
|
||||
.rates({1, 1})
|
||||
.autoPad(op::PadType::VALID)
|
||||
.expectedResult({ET, {1, 8, 2, 2}, std::vector<T>{
|
||||
1, 4, 16, 19,
|
||||
26, 29, 41, 44,
|
||||
2, 5, 17, 20,
|
||||
27, 30, 42, 45,
|
||||
6, 9, 21, 24,
|
||||
31, 34, 46, 49,
|
||||
7, 10, 22, 25,
|
||||
32, 35, 47, 50}}),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<ExtractImagePatchesParams> generateCombinedParams() {
|
||||
const std::vector<std::vector<ExtractImagePatchesParams>> generatedParams {
|
||||
generateParams<element::Type_t::i8>(),
|
||||
generateParams<element::Type_t::i16>(),
|
||||
generateParams<element::Type_t::i32>(),
|
||||
generateParams<element::Type_t::i64>(),
|
||||
generateParams<element::Type_t::u8>(),
|
||||
generateParams<element::Type_t::u16>(),
|
||||
generateParams<element::Type_t::u32>(),
|
||||
generateParams<element::Type_t::u64>(),
|
||||
generateParams<element::Type_t::bf16>(),
|
||||
generateParams<element::Type_t::f16>(),
|
||||
generateParams<element::Type_t::f32>(),
|
||||
generateParams<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<ExtractImagePatchesParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest,
|
||||
testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName);
|
||||
} // namespace
|
@ -4,7 +4,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/op/lstm_cell.hpp"
|
||||
#include "openvino/opsets/opset4.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
@ -12,13 +13,6 @@ using namespace ov;
|
||||
|
||||
namespace {
|
||||
struct LSTMCellParams {
|
||||
LSTMCellParams(
|
||||
int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount,
|
||||
const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B,
|
||||
const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") :
|
||||
batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount),
|
||||
X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {}
|
||||
|
||||
int32_t batchSize;
|
||||
int32_t inputSize;
|
||||
int32_t hiddenSize;
|
||||
@ -34,6 +28,22 @@ struct LSTMCellParams {
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
struct Builder : ParamsBuilder<LSTMCellParams> {
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, X);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, W);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, R);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, B);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co);
|
||||
REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName);
|
||||
};
|
||||
|
||||
class ReferenceLSTMCellTest : public testing::TestWithParam<LSTMCellParams>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
@ -63,26 +73,24 @@ public:
|
||||
result << "_hoType=" << param.Ho.type;
|
||||
result << "_hoShape=" << param.Ho.shape;
|
||||
result << "_coType=" << param.Co.type;
|
||||
result << "_coShape=" << param.Co.shape;
|
||||
if (param.testcaseName != "") {
|
||||
result << "_coShape=" << param.Co.shape;
|
||||
result << "_=" << param.testcaseName;
|
||||
} else {
|
||||
result << "_coShape=" << param.Co.shape;
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<op::v4::LSTMCell>(X,
|
||||
std::make_shared<opset4::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
@ -107,15 +115,15 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<op::v4::LSTMCell>(X,
|
||||
std::make_shared<opset4::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
@ -142,15 +150,15 @@ private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const float clip_threshold = 3.5f;
|
||||
|
||||
const auto X = std::make_shared<op::v0::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<op::v0::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<op::v0::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<op::v0::Parameter>(params.B.type, params.B.shape);
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<op::v4::LSTMCell>(X,
|
||||
std::make_shared<opset4::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
W,
|
||||
@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<opset1::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
|
||||
params.hiddenSize);
|
||||
|
||||
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
|
||||
return function;
|
||||
}
|
||||
};
|
||||
|
||||
class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<opset1::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC),
|
||||
op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC),
|
||||
params.hiddenSize);
|
||||
|
||||
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
|
||||
return function;
|
||||
}
|
||||
};
|
||||
|
||||
class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const LSTMCellParams& params) {
|
||||
const float clip_threshold = 3.5f;
|
||||
|
||||
const auto X = std::make_shared<opset1::Parameter>(params.X.type, params.X.shape);
|
||||
const auto W = std::make_shared<opset1::Parameter>(params.W.type, params.W.shape);
|
||||
const auto R = std::make_shared<opset1::Parameter>(params.R.type, params.R.shape);
|
||||
const auto H_t = std::make_shared<opset1::Parameter>(params.H_t.type, params.H_t.shape);
|
||||
const auto C_t = std::make_shared<opset1::Parameter>(params.C_t.type, params.C_t.shape);
|
||||
const auto B = std::make_shared<opset1::Parameter>(params.B.type, params.B.shape);
|
||||
|
||||
const auto lstm_cell =
|
||||
std::make_shared<opset1::LSTMCell>(X,
|
||||
H_t,
|
||||
C_t,
|
||||
W,
|
||||
R,
|
||||
B,
|
||||
params.hiddenSize,
|
||||
op::LSTMWeightsFormat::IFCO,
|
||||
std::vector<std::string>{"sigmoid", "tanh", "tanh"},
|
||||
std::vector<float>{},
|
||||
std::vector<float>{},
|
||||
clip_threshold);
|
||||
|
||||
auto function = std::make_shared<Model>(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B});
|
||||
return function;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParams() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
LSTMCellParams(
|
||||
2, 3, 3, 4,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
|
||||
Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}),
|
||||
"lstm_cell_zero_bias_default_attrs"),
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
|
||||
.testcaseName("lstm_cell_zero_bias_default_attrs")
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@ -232,29 +334,32 @@ template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
LSTMCellParams(
|
||||
2, 3, 3, 4,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
|
||||
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
@ -265,20 +370,20 @@ std::vector<LSTMCellParams> generateParamsBiasDefaultAttrs() {
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82509011030197144,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4578367471694946,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}),
|
||||
"lstm_cell_bias_default_attrs"),
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_bias_default_attrs"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@ -302,29 +407,32 @@ template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasClip() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
LSTMCellParams(
|
||||
2, 3, 3, 4,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}),
|
||||
Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}),
|
||||
Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
@ -335,20 +443,20 @@ std::vector<LSTMCellParams> generateParamsBiasClip() {
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82387429475784302,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}),
|
||||
Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4510968923568726,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}),
|
||||
"lstm_cell_bias_clip"),
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_bias_clip"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip,
|
||||
testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName);
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsV1() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>(4 * 3, 0.f)))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}))
|
||||
.testcaseName("lstm_cell_v1_zero_bias_default_attrs")
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<LSTMCellParams> generateCombinedParamsV1() {
|
||||
const std::vector<std::vector<LSTMCellParams>> generatedParams {
|
||||
generateParamsV1<element::Type_t::bf16>(),
|
||||
generateParamsV1<element::Type_t::f16>(),
|
||||
generateParamsV1<element::Type_t::f32>(),
|
||||
generateParamsV1<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<LSTMCellParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasDefaultAttrsV1() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82509011030197144,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4578367471694946,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_v1_bias_default_attrs"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<LSTMCellParams> generateCombinedParamsBiasDefaultAttrsV1() {
|
||||
const std::vector<std::vector<LSTMCellParams>> generatedParams {
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::bf16>(),
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::f16>(),
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::f32>(),
|
||||
generateParamsBiasDefaultAttrsV1<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<LSTMCellParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
template <element::Type_t ET>
|
||||
std::vector<LSTMCellParams> generateParamsBiasClipV1() {
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
std::vector<LSTMCellParams> params {
|
||||
Builder {}
|
||||
.batchSize(2)
|
||||
.inputSize(3)
|
||||
.hiddenSize(3)
|
||||
.gatesCount(4)
|
||||
.X(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}))
|
||||
.W(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f,
|
||||
9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f,
|
||||
6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f,
|
||||
8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f,
|
||||
5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f,
|
||||
3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}))
|
||||
.R(Tensor(ET, {4 * 3, 3}, std::vector<T>{
|
||||
0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f,
|
||||
0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f,
|
||||
0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f,
|
||||
0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f,
|
||||
0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f,
|
||||
0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}))
|
||||
.H_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}))
|
||||
.C_t(Tensor(ET, {2, 3}, std::vector<T>{
|
||||
0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}))
|
||||
.B(Tensor(ET, {4 * 3}, std::vector<T>{1.07393714f,
|
||||
1.15248052f,
|
||||
1.16671345f,
|
||||
0.21450312f,
|
||||
1.2380678f,
|
||||
1.51688835f,
|
||||
0.46718366f,
|
||||
0.91810346f,
|
||||
1.1274234f,
|
||||
0.51022074f,
|
||||
1.11389844f,
|
||||
0.74174305f}))
|
||||
.Ho(Tensor(ET, {2, 3}, std::vector<T>{0.81014400720596313,
|
||||
0.76665538549423218,
|
||||
0.82387429475784302,
|
||||
0.6479143500328064,
|
||||
0.66586339473724365,
|
||||
0.74838578701019287}))
|
||||
.Co(Tensor(ET, {2, 3}, std::vector<T>{1.6800162792205811,
|
||||
1.1150213479995728,
|
||||
1.4510968923568726,
|
||||
1.0649888515472412,
|
||||
0.93761754035949707,
|
||||
1.3659683465957642}))
|
||||
.testcaseName("lstm_cell_v1_bias_clip"),
|
||||
};
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<LSTMCellParams> generateCombinedParamsBiasClipV1() {
|
||||
const std::vector<std::vector<LSTMCellParams>> generatedParams {
|
||||
generateParamsBiasClipV1<element::Type_t::bf16>(),
|
||||
generateParamsBiasClipV1<element::Type_t::f16>(),
|
||||
generateParamsBiasClipV1<element::Type_t::f32>(),
|
||||
generateParamsBiasClipV1<element::Type_t::f64>(),
|
||||
};
|
||||
std::vector<LSTMCellParams> combinedParams;
|
||||
|
||||
for (const auto& params : generatedParams) {
|
||||
combinedParams.insert(combinedParams.end(), params.begin(), params.end());
|
||||
}
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test,
|
||||
testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs,
|
||||
testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip,
|
||||
testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName);
|
||||
} // namespace
|
@ -4,8 +4,8 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "openvino/op/topk.hpp"
|
||||
#include "openvino/op/constant.hpp"
|
||||
#include "openvino/opsets/opset3.hpp"
|
||||
#include "openvino/opsets/opset1.hpp"
|
||||
#include "base_reference_test.hpp"
|
||||
|
||||
using namespace reference_tests;
|
||||
@ -15,7 +15,7 @@ namespace {
|
||||
struct TopKParams {
|
||||
TopKParams(
|
||||
const Tensor& A, const Tensor& k, const int64_t axis,
|
||||
const op::v1::TopK::Mode mode, const op::v1::TopK::SortType sort,
|
||||
const opset1::TopK::Mode mode, const opset1::TopK::SortType sort,
|
||||
const Tensor& result0, const Tensor& result1, const size_t outIdx,
|
||||
const std::string& testcaseName = "") :
|
||||
A(A), k(k), axis(axis), mode(mode), sort(sort),
|
||||
@ -25,8 +25,8 @@ struct TopKParams {
|
||||
Tensor A;
|
||||
Tensor k;
|
||||
int64_t axis;
|
||||
op::v1::TopK::Mode mode;
|
||||
op::v1::TopK::SortType sort;
|
||||
opset1::TopK::Mode mode;
|
||||
opset1::TopK::SortType sort;
|
||||
Tensor result0;
|
||||
Tensor result1;
|
||||
size_t outIdx;
|
||||
@ -71,7 +71,6 @@ struct TopKParamsResnet50 {
|
||||
std::string testcaseName;
|
||||
};
|
||||
|
||||
|
||||
class ReferenceTopKTestResnet50 : public testing::TestWithParam<TopKParamsResnet50>, public CommonReferenceTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
@ -101,18 +100,18 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto B = std::make_shared<op::v1::TopK>(A,
|
||||
op::v0::Constant::create(element::i64, {}, {5}),
|
||||
const auto B = std::make_shared<opset1::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES);
|
||||
const auto C = std::make_shared<op::v1::TopK>(A,
|
||||
op::v0::Constant::create(element::i64, {}, {1}),
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
const auto C = std::make_shared<opset1::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES);
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
|
||||
const auto out5_value = B->output(0);
|
||||
const auto out5_index = B->output(1);
|
||||
@ -220,12 +219,12 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
@ -253,8 +252,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -292,8 +291,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -331,8 +330,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -366,8 +365,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -401,8 +400,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape, std::vector<size_t> shape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -440,8 +439,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
}({128, 1000})),
|
||||
Tensor(ET2, {}, std::vector<T2>{5}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {128, 5}, [](std::vector<size_t> rshape) -> std::vector<T>{
|
||||
std::vector<T> expected_value;
|
||||
for (size_t i = 0; i < rshape[0]; i++) {
|
||||
@ -467,8 +466,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
|
||||
0,
|
||||
@ -478,8 +477,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
|
||||
0,
|
||||
@ -489,8 +488,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
|
||||
0,
|
||||
@ -500,8 +499,8 @@ std::vector<TopKParams> generateParamsMaxMinSort() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
|
||||
0,
|
||||
@ -536,7 +535,7 @@ std::vector<TopKParams> generateCombinedParamsMaxMinSort() {
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSort,
|
||||
testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTest::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestV3 : public ReferenceTopKTest {
|
||||
class ReferenceTopKTestBackend : public ReferenceTopKTest {
|
||||
public:
|
||||
void SetUp() override {
|
||||
auto params = GetParam();
|
||||
@ -547,18 +546,18 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestV3, CompareWithRefs) {
|
||||
TEST_P(ReferenceTopKTestBackend, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
@ -572,8 +571,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{5, 4, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{3, 4, 0}),
|
||||
0,
|
||||
@ -583,8 +582,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 3, 4}),
|
||||
0,
|
||||
@ -594,8 +593,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{1, 2, 0}),
|
||||
0,
|
||||
@ -605,8 +604,8 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
Tensor(ET, {5}, std::vector<T>{3, 1, 2, 5, 4}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_INDICES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_INDICES,
|
||||
Tensor(ET, {3}, std::vector<T>{3, 1, 2}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{0, 1, 2}),
|
||||
0,
|
||||
@ -615,7 +614,7 @@ std::vector<TopKParams> generateParamsV3() {
|
||||
return params;
|
||||
}
|
||||
|
||||
std::vector<TopKParams> generateCombinedParamsV3() {
|
||||
std::vector<TopKParams> generateCombinedParamsBackend() {
|
||||
const std::vector<std::vector<TopKParams>> generatedParams {
|
||||
generateParamsMaxMinSort<element::Type_t::i8, element::Type_t::i64, element::Type_t::i32>(),
|
||||
generateParamsMaxMinSort<element::Type_t::i16, element::Type_t::i64, element::Type_t::i32>(),
|
||||
@ -638,8 +637,8 @@ std::vector<TopKParams> generateCombinedParamsV3() {
|
||||
return combinedParams;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestV3,
|
||||
testing::ValuesIn(generateCombinedParamsV3()), ReferenceTopKTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackend,
|
||||
testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTest::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTest1dMaxMin : public ReferenceTopKTest {
|
||||
public:
|
||||
@ -673,12 +672,12 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
@ -698,8 +697,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
0,
|
||||
@ -709,8 +708,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
1,
|
||||
@ -720,8 +719,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
0,
|
||||
@ -731,8 +730,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{6, 5, 4}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
1,
|
||||
@ -742,8 +741,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{6}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
0,
|
||||
@ -753,8 +752,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{6}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
1,
|
||||
@ -764,8 +763,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
0,
|
||||
@ -775,8 +774,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{6}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {6}, std::vector<T>{1, 2, 3, 4, 5, 6}),
|
||||
Tensor(ET_OUT, {6}, std::vector<T_OUT>{5, 4, 3, 2, 1, 0}),
|
||||
1,
|
||||
@ -786,8 +785,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
0,
|
||||
@ -797,8 +796,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {3}, std::vector<T>{1, 2, 3}),
|
||||
Tensor(ET_OUT, {3}, std::vector<T_OUT>{5, 4, 3}),
|
||||
1,
|
||||
@ -808,8 +807,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{1}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
0,
|
||||
@ -819,8 +818,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
Tensor(ET, {6}, std::vector<T>{6, 5, 4, 3, 2, 1}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1}, std::vector<T>{1}),
|
||||
Tensor(ET_OUT, {1}, std::vector<T_OUT>{5}),
|
||||
1,
|
||||
@ -832,8 +831,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -849,8 +848,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -882,8 +881,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
|
||||
169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
|
||||
187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
|
||||
@ -923,8 +922,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 3, 2, 4}, std::vector<T>{
|
||||
169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251,
|
||||
187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222,
|
||||
@ -948,8 +947,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 11, 7, 6, 3
|
||||
}),
|
||||
@ -965,8 +964,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 11, 7, 6, 3
|
||||
}),
|
||||
@ -982,8 +981,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
10, 12, 11, 7
|
||||
}),
|
||||
@ -999,8 +998,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
10, 12, 11, 7
|
||||
}),
|
||||
@ -1016,8 +1015,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
|
||||
}),
|
||||
@ -1033,8 +1032,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7
|
||||
}),
|
||||
@ -1050,8 +1049,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 5, 1, 6, 3
|
||||
}),
|
||||
@ -1067,8 +1066,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{
|
||||
8, 2, 10, 4, 5, 1, 6, 3
|
||||
}),
|
||||
@ -1084,8 +1083,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
8, 2, 5, 1
|
||||
}),
|
||||
@ -1101,8 +1100,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1, 2}, std::vector<T>{
|
||||
8, 2, 5, 1
|
||||
}),
|
||||
@ -1118,8 +1117,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
|
||||
}),
|
||||
@ -1135,8 +1134,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4
|
||||
}),
|
||||
@ -1152,8 +1151,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7
|
||||
}),
|
||||
@ -1169,8 +1168,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
12, 11, 10, 9, 8, 7
|
||||
}),
|
||||
@ -1186,8 +1185,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
12, 11, 10
|
||||
}),
|
||||
@ -1203,8 +1202,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
12, 11, 10
|
||||
}),
|
||||
@ -1220,8 +1219,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1}, std::vector<T>{
|
||||
4, 3
|
||||
}),
|
||||
@ -1237,8 +1236,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 1}, std::vector<T>{
|
||||
4, 3
|
||||
}),
|
||||
@ -1254,8 +1253,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
|
||||
}),
|
||||
@ -1271,8 +1270,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{4}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {4, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10
|
||||
}),
|
||||
@ -1288,8 +1287,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5
|
||||
}),
|
||||
@ -1305,8 +1304,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3}, std::vector<T>{
|
||||
3, 1, 4, 6, 2, 5
|
||||
}),
|
||||
@ -1322,8 +1321,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
3, 1, 4
|
||||
}),
|
||||
@ -1339,8 +1338,8 @@ std::vector<TopKParams> generateParams1dMaxMin() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{1}),
|
||||
0,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::NONE,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::NONE,
|
||||
Tensor(ET, {1, 3}, std::vector<T>{
|
||||
3, 1, 4
|
||||
}),
|
||||
@ -1380,12 +1379,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxM
|
||||
class ReferenceTopKTestInt64 : public ReferenceTopKTest1dMaxMin {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A,
|
||||
const auto B = std::make_shared<opset1::TopK>(A,
|
||||
k,
|
||||
params.axis,
|
||||
params.mode,
|
||||
@ -1412,8 +1411,8 @@ std::vector<TopKParams> generateParamsInt64() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -1428,8 +1427,8 @@ std::vector<TopKParams> generateParamsInt64() {
|
||||
}),
|
||||
Tensor(ET2, {}, std::vector<T2>{3}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MAX,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{
|
||||
10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1
|
||||
}),
|
||||
@ -1468,12 +1467,12 @@ public:
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<op::v0::Parameter>(params.A.type,
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = op::v0::Constant::create(params.k.type,
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<op::v1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto B = std::make_shared<opset1::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
@ -1493,8 +1492,8 @@ std::vector<TopKParams> generateParamsSingleOutput() {
|
||||
Tensor(ET, {2, 3, 2}, std::vector<T>{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7}),
|
||||
Tensor(ET2, {}, std::vector<T2>{2}),
|
||||
1,
|
||||
op::v1::TopK::Mode::MIN,
|
||||
op::v1::TopK::SortType::SORT_VALUES,
|
||||
opset1::TopK::Mode::MIN,
|
||||
opset1::TopK::SortType::SORT_VALUES,
|
||||
Tensor(ET, {2, 2, 2}, std::vector<T>{}),
|
||||
Tensor(ET_OUT, {2, 2, 2}, std::vector<T_OUT>{2, 0, 1, 2, 1, 0, 0, 1}),
|
||||
0,
|
||||
@ -1530,19 +1529,181 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingle
|
||||
testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTest::getTestCaseName);
|
||||
|
||||
TEST(ReferenceTopKTestInvalid, topk_v1_invalid_strings) {
|
||||
const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k = op::v0::Constant::create(element::i64, Shape{}, {1});
|
||||
EXPECT_THROW(op::v1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
|
||||
EXPECT_THROW(op::v1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
|
||||
EXPECT_THROW(opset1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
|
||||
EXPECT_THROW(opset1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
|
||||
}
|
||||
|
||||
TEST(ReferenceTopKTestInvalid, topk_v1_invalid_k) {
|
||||
const auto data = std::make_shared<op::v0::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k_non_scalar = op::v0::Constant::create(element::i64, Shape{2}, {1, 2});
|
||||
EXPECT_THROW(op::v1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_float = op::v0::Constant::create(element::f32, Shape{}, {1.0f});
|
||||
EXPECT_THROW(op::v1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_negative = op::v0::Constant::create(element::i8, Shape{}, {-1});
|
||||
EXPECT_THROW(op::v1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
|
||||
EXPECT_THROW(opset1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
|
||||
EXPECT_THROW(opset1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
|
||||
EXPECT_THROW(opset1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
class ReferenceTopKTestResnet50V3 : public ReferenceTopKTestResnet50 {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParamsResnet50& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto B = std::make_shared<opset3::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {5}),
|
||||
1,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
const auto C = std::make_shared<opset3::TopK>(A,
|
||||
opset1::Constant::create(element::i64, {}, {1}),
|
||||
1,
|
||||
opset1::TopK::Mode::MAX,
|
||||
opset1::TopK::SortType::SORT_VALUES);
|
||||
|
||||
const auto out5_value = B->output(0);
|
||||
const auto out5_index = B->output(1);
|
||||
const auto out1_value = C->output(0);
|
||||
const auto out1_index = C->output(1);
|
||||
const auto f = std::make_shared<Model>(OutputVector{out5_value, out5_index, out1_value, out1_index}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestResnet50V3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestResnet50V3,
|
||||
testing::ValuesIn(generateCombinedParamsResnet50()), ReferenceTopKTestResnet50V3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestMaxMinSortV3 : public ReferenceTopKTestMaxMinSort {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestMaxMinSortV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSortV3,
|
||||
testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTestMaxMinSortV3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestBackendV3 : public ReferenceTopKTestBackend {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(B->outputs(), ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestBackendV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackendV3,
|
||||
testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTestBackendV3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTest1dMaxMinV3 : public ReferenceTopKTest1dMaxMin {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTest1dMaxMinV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxMinV3,
|
||||
testing::ValuesIn(generateCombinedParams1dMaxMin()), ReferenceTopKTest1dMaxMinV3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestInt64V3 : public ReferenceTopKTestInt64 {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params, size_t out_idx) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A,
|
||||
k,
|
||||
params.axis,
|
||||
params.mode,
|
||||
params.sort,
|
||||
element::i64);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(out_idx)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestInt64V3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestInt64V3,
|
||||
testing::ValuesIn(generateCombinedParamsInt64()), ReferenceTopKTestInt64V3::getTestCaseName);
|
||||
|
||||
class ReferenceTopKTestSingleOutputV3 : public ReferenceTopKTestSingleOutput {
|
||||
private:
|
||||
static std::shared_ptr<Model> CreateFunction(const TopKParams& params) {
|
||||
const auto A = std::make_shared<opset1::Parameter>(params.A.type,
|
||||
params.A.shape);
|
||||
const auto k = opset1::Constant::create(params.k.type,
|
||||
params.k.shape,
|
||||
params.k.data.data());
|
||||
const auto B = std::make_shared<opset3::TopK>(A, k, params.axis, params.mode, params.sort);
|
||||
const auto f = std::make_shared<Model>(OutputVector{B->output(1)}, ParameterVector{A});
|
||||
return f;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReferenceTopKTestSingleOutputV3, CompareWithRefs) {
|
||||
Exec();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingleOutputV3,
|
||||
testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTestSingleOutputV3::getTestCaseName);
|
||||
|
||||
TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_strings) {
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k = opset1::Constant::create(element::i64, Shape{}, {1});
|
||||
EXPECT_THROW(opset3::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure);
|
||||
EXPECT_THROW(opset3::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure);
|
||||
}
|
||||
|
||||
TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_k) {
|
||||
const auto data = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 3});
|
||||
const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2});
|
||||
EXPECT_THROW(opset3::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f});
|
||||
EXPECT_THROW(opset3::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1});
|
||||
EXPECT_THROW(opset3::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure);
|
||||
}
|
||||
} // namespace
|
||||
|
@ -319,7 +319,6 @@ void MKLDNNGraph::InitGraph() {
|
||||
SortTopologically();
|
||||
|
||||
InitDescriptors();
|
||||
RemoveDroppedEdges();
|
||||
|
||||
InitOptimalPrimitiveDescriptors();
|
||||
|
||||
|
@ -717,7 +717,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) {
|
||||
selectedPD->setConfig(rightConfig);
|
||||
}
|
||||
|
||||
void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
|
||||
void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) {
|
||||
for (size_t i = 0; i < getChildEdges().size(); i++) {
|
||||
auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
@ -1049,7 +1049,9 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr) {
|
||||
void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr,
|
||||
std::unordered_map<int, mkldnn::memory>& primArgs,
|
||||
const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs) {
|
||||
auto post_ops = attr.get_post_ops();
|
||||
int idx = 0;
|
||||
for (int i = 0; i < post_ops.len(); i++) {
|
||||
|
@ -208,7 +208,9 @@ public:
|
||||
return 1;
|
||||
}
|
||||
|
||||
void appendPostOpArgs(const mkldnn::primitive_attr& attr);
|
||||
static void appendPostOpArgs(const mkldnn::primitive_attr& attr,
|
||||
std::unordered_map<int, mkldnn::memory>& primArgs,
|
||||
const std::vector<MKLDNNMemoryPtr>& binaryPostOpsArgs);
|
||||
|
||||
bool isFusedWith(Type type) const;
|
||||
|
||||
@ -425,7 +427,7 @@ public:
|
||||
if (impl_type == selected_pd->getImplementationType() &&
|
||||
descsCompatible(srcDescs, selected_pd->getConfig().inConfs) &&
|
||||
descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) {
|
||||
prepareMemory(selected_pd, itpd);
|
||||
prepareMemory(itpd);
|
||||
PD prim_desc = createPd<PD, D, FPD>(desc);
|
||||
return {itpd.get()};
|
||||
}
|
||||
@ -722,6 +724,8 @@ protected:
|
||||
supportedPrimitiveDescriptors.push_back({config, implType});
|
||||
}
|
||||
|
||||
void prepareMemory(mkldnn::primitive_desc_iterator& itpd);
|
||||
|
||||
bool isDynamic = false;
|
||||
|
||||
bool inputShapesDefined() const;
|
||||
@ -746,6 +750,7 @@ protected:
|
||||
}
|
||||
|
||||
std::vector<VectorDims> lastInputDims = {};
|
||||
|
||||
std::shared_ptr<ngraph::Node> opToShapeInfer;
|
||||
|
||||
private:
|
||||
@ -788,7 +793,6 @@ private:
|
||||
return PD(*selected_desc_ptr, engine);
|
||||
}
|
||||
|
||||
void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd);
|
||||
enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 };
|
||||
ConstantType checkConstant(LOOK look, std::vector<MKLDNNNodePtr>& checkNodes);
|
||||
|
||||
|
@ -504,21 +504,22 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
|
||||
|
||||
// verification of supported input
|
||||
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
|
||||
for (const auto &ii : _networkInputs) {
|
||||
for (const auto &ii : network.getInputsInfo()) {
|
||||
auto input_precision = ii.second->getPrecision();
|
||||
if (input_precision != InferenceEngine::Precision::FP64 &&
|
||||
input_precision != InferenceEngine::Precision::FP32 &&
|
||||
input_precision != InferenceEngine::Precision::I32 &&
|
||||
input_precision != InferenceEngine::Precision::U32 &&
|
||||
input_precision != InferenceEngine::Precision::U16 &&
|
||||
input_precision != InferenceEngine::Precision::I16 &&
|
||||
input_precision != InferenceEngine::Precision::I8 &&
|
||||
input_precision != InferenceEngine::Precision::U8 &&
|
||||
input_precision != InferenceEngine::Precision::BF16 &&
|
||||
input_precision != InferenceEngine::Precision::BOOL &&
|
||||
input_precision != InferenceEngine::Precision::I64 &&
|
||||
input_precision != InferenceEngine::Precision::U64) {
|
||||
|
||||
using hash_t = std::hash<typename std::underlying_type<Precision::ePrecision>::type>;
|
||||
|
||||
static const std::unordered_set<Precision::ePrecision, hash_t> supported_precisions = {
|
||||
Precision::U8, Precision::I8,
|
||||
Precision::U16, Precision::I16,
|
||||
Precision::U32, Precision::I32,
|
||||
Precision::U64, Precision::I64,
|
||||
Precision::BF16, Precision::FP16,
|
||||
Precision::FP32, Precision::FP64,
|
||||
Precision::BOOL
|
||||
};
|
||||
|
||||
if (!supported_precisions.count(input_precision)) {
|
||||
IE_THROW(NotImplemented)
|
||||
<< "Input image format " << input_precision << " is not supported yet...";
|
||||
}
|
||||
|
@ -18,7 +18,6 @@ public:
|
||||
operator bool() const;
|
||||
MKLDNNPrimitive& operator=(const std::shared_ptr<mkldnn::primitive>& primitive);
|
||||
mkldnn::primitive operator*();
|
||||
|
||||
void reset(mkldnn::primitive* primitive);
|
||||
|
||||
private:
|
||||
|
@ -4,27 +4,208 @@
|
||||
|
||||
#include "cpu_convert.h"
|
||||
#include "cpu_memcpy.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
#include <utils/bfloat16.hpp>
|
||||
#include <utils/general_utils.h>
|
||||
#include <mkldnn_selective_build.h>
|
||||
#include <ie_parallel.hpp>
|
||||
#include <openvino/core/type/float16.hpp>
|
||||
#include <cpu/x64/jit_generator.hpp>
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <ie_parallel.hpp>
|
||||
#include <cmath>
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
using namespace dnnl::impl::cpu::x64;
|
||||
using namespace dnnl::impl::utils;
|
||||
using namespace Xbyak;
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename srcType, typename dstType>
|
||||
void convert(const void *srcPtr, void *dstPtr, const size_t size) {
|
||||
if (std::is_same<srcType, dstType>::value) {
|
||||
cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType));
|
||||
} else {
|
||||
const srcType *srcData = reinterpret_cast<const srcType *>(srcPtr);
|
||||
dstType *dstData = reinterpret_cast<dstType *>(dstPtr);
|
||||
template <typename src_t, typename dst_t>
|
||||
void convert_vec(jit_generator & gen,
|
||||
const RegExp & src,
|
||||
const RegExp & dst);
|
||||
|
||||
parallel_for(size, [&](size_t i) {
|
||||
dstData[i] = static_cast<dstType>(srcData[i]);
|
||||
template <>
|
||||
void convert_vec<ov::float16, float>(jit_generator & gen,
|
||||
const RegExp & src,
|
||||
const RegExp & dst) {
|
||||
auto const & f16vec = gen.xmm3;
|
||||
auto const & f32vec = gen.ymm4;
|
||||
|
||||
gen.movdqu(f16vec, gen.xword[src]);
|
||||
gen.vcvtph2ps(f32vec, f16vec);
|
||||
gen.vmovups(gen.yword[dst], f32vec);
|
||||
}
|
||||
|
||||
template <>
|
||||
void convert_vec<float, ov::float16>(jit_generator & gen,
|
||||
const RegExp & src,
|
||||
const RegExp & dst) {
|
||||
auto const & f16vec = gen.xmm3;
|
||||
auto const & f32vec = gen.ymm4;
|
||||
|
||||
gen.vmovups(f32vec, gen.yword[src]);
|
||||
gen.vcvtps2ph(f16vec, f32vec, 0);
|
||||
gen.movdqu(gen.xword[dst], f16vec);
|
||||
}
|
||||
|
||||
class jit_convert_array : public jit_generator {
|
||||
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array)
|
||||
|
||||
void generate() override {
|
||||
const size_t vlen = 8u;
|
||||
const size_t vlen_log2 = 3;
|
||||
|
||||
auto reg_src = rax;
|
||||
auto reg_dst = rbx;
|
||||
auto reg_sz = rdx;
|
||||
|
||||
Label tail, exit;
|
||||
|
||||
preamble();
|
||||
|
||||
mov(reg_src, ptr[param1 + offsetof(args_t, src)]);
|
||||
mov(reg_dst, ptr[param1 + offsetof(args_t, out)]);
|
||||
mov(reg_sz, ptr[param1 + offsetof(args_t, count)]);
|
||||
|
||||
xor_(rsi, rsi);
|
||||
mov(r8, reg_sz);
|
||||
shr(r8, vlen_log2);
|
||||
|
||||
foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) {
|
||||
_convert_vec(*this, reg_src, reg_dst);
|
||||
add(reg_src, _src_size * vlen);
|
||||
add(reg_dst, _dst_size * vlen);
|
||||
});
|
||||
|
||||
L(tail);
|
||||
|
||||
shl(rsi, vlen_log2);
|
||||
sub(reg_sz, rsi);
|
||||
test(reg_sz, reg_sz);
|
||||
jz(exit);
|
||||
|
||||
// allocate array for 8 floats on stack
|
||||
sub(rsp, vlen * sizeof(float));
|
||||
mov(r8, rsp);
|
||||
|
||||
vpxor(ymm4, ymm4, ymm4);
|
||||
vmovups(yword[r8], ymm4);
|
||||
|
||||
// Tail conversion
|
||||
copy(r8, reg_src, reg_sz, _src_size);
|
||||
_convert_vec(*this, r8, r8);
|
||||
copy(reg_dst, r8, reg_sz, _dst_size);
|
||||
|
||||
// Free the array on stack
|
||||
add(rsp, vlen * sizeof(float));
|
||||
|
||||
L(exit);
|
||||
|
||||
postamble();
|
||||
}
|
||||
|
||||
void foreach(const Xbyak::Reg64& idx,
|
||||
size_t step,
|
||||
const Xbyak::Reg64& end,
|
||||
std::function<void(const Xbyak::Reg64&)> && fn) {
|
||||
Label loop, exit;
|
||||
|
||||
L(loop);
|
||||
cmp(idx, end);
|
||||
jge(exit);
|
||||
|
||||
fn(idx);
|
||||
|
||||
add(idx, step);
|
||||
jmp(loop);
|
||||
L(exit);
|
||||
}
|
||||
|
||||
void copy(const Xbyak::Reg64& dst,
|
||||
const Xbyak::Reg64& src,
|
||||
const Xbyak::Reg64& size,
|
||||
size_t item_size) {
|
||||
push(rsi);
|
||||
push(r15);
|
||||
|
||||
xor_(rsi, rsi);
|
||||
|
||||
auto address_frame = [this](size_t size) -> const AddressFrame& {
|
||||
switch (size) {
|
||||
case 1: return byte;
|
||||
case 2: return word;
|
||||
case 4: return dword;
|
||||
case 8: return qword;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ptr;
|
||||
};
|
||||
|
||||
const auto & addr_frame = address_frame(item_size);
|
||||
|
||||
foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) {
|
||||
mov(r15, addr_frame[src + idx * item_size]);
|
||||
mov(addr_frame[dst + idx * item_size], r15);
|
||||
});
|
||||
|
||||
pop(r15);
|
||||
pop(rsi);
|
||||
}
|
||||
|
||||
public:
|
||||
typedef struct {
|
||||
const void* src;
|
||||
void* out;
|
||||
const size_t count;
|
||||
} args_t;
|
||||
|
||||
typedef void (*fn_t)(const args_t*);
|
||||
|
||||
typedef void (*convert_vec_t)(jit_generator &,
|
||||
const RegExp &,
|
||||
const RegExp &);
|
||||
|
||||
jit_convert_array(convert_vec_t convert_vec,
|
||||
size_t src_size,
|
||||
size_t dst_size)
|
||||
: _convert_vec(convert_vec)
|
||||
, _src_size(src_size)
|
||||
, _dst_size(dst_size) {}
|
||||
|
||||
template<typename src_t, typename dst_t>
|
||||
static fn_t get() {
|
||||
if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) {
|
||||
static jit_convert_array converter(convert_vec<src_t, dst_t>, sizeof(src_t), sizeof(dst_t));
|
||||
auto & generator = static_cast<jit_generator&>(converter);
|
||||
generator.create_kernel();
|
||||
return (fn_t)generator.jit_ker();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
convert_vec_t _convert_vec;
|
||||
size_t _src_size;
|
||||
size_t _dst_size;
|
||||
};
|
||||
|
||||
template <typename TI, typename TO>
|
||||
void jit_convert(const TI* arg, TO* out, size_t count) {
|
||||
using jit_impl = jit_convert_array;
|
||||
static auto converter = jit_impl::get<TI, TO>();
|
||||
|
||||
if (converter) {
|
||||
typename jit_impl::args_t args = { arg, out, count };
|
||||
converter(&args);
|
||||
} else {
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
out[i] = static_cast<TO>(arg[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,84 +216,391 @@ struct PrecisionInfo {
|
||||
|
||||
template <>
|
||||
struct PrecisionInfo<Precision::BF16> {
|
||||
using value_type = MKLDNNPlugin::bfloat16_t;
|
||||
using value_type = bfloat16_t;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct PrecisionInfo<Precision::FP16> {
|
||||
using value_type = ov::float16;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct PrecisionInfo<Precision::BOOL> {
|
||||
using value_type = uint8_t;
|
||||
};
|
||||
|
||||
template<typename T,
|
||||
typename U = typename std::conditional<
|
||||
std::is_same<ov::float16, T>::value
|
||||
|| std::is_same<bfloat16_t, T>::value,
|
||||
float, T>::type>
|
||||
struct Range {
|
||||
const std::tuple<U, U> & fit(const Precision & prec);
|
||||
|
||||
private:
|
||||
std::tuple<U, U> _range {
|
||||
std::numeric_limits<T>::lowest(),
|
||||
std::numeric_limits<T>::max()
|
||||
};
|
||||
};
|
||||
|
||||
template<typename T, typename U>
|
||||
const std::tuple<U, U> & Range<T, U>::fit(const Precision & prec) {
|
||||
if (prec.is_float()) {
|
||||
double lbound, ubound;
|
||||
switch (prec) {
|
||||
case Precision::BF16:
|
||||
lbound = static_cast<double>(std::numeric_limits<bfloat16_t>::lowest());
|
||||
ubound = static_cast<double>(std::numeric_limits<bfloat16_t>::max());
|
||||
break;
|
||||
case Precision::FP16:
|
||||
lbound = static_cast<double>(std::numeric_limits<ov::float16>::lowest());
|
||||
ubound = static_cast<double>(std::numeric_limits<ov::float16>::max());
|
||||
break;
|
||||
case Precision::FP32:
|
||||
lbound = static_cast<double>(std::numeric_limits<float>::lowest());
|
||||
ubound = static_cast<double>(std::numeric_limits<float>::max());
|
||||
break;
|
||||
case Precision::FP64:
|
||||
lbound = std::numeric_limits<double>::lowest();
|
||||
ubound = std::numeric_limits<double>::max();
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << "Unsupported precision";
|
||||
}
|
||||
std::get<0>(_range) = static_cast<U>(std::max(static_cast<double>(std::get<0>(_range)), lbound));
|
||||
std::get<1>(_range) = static_cast<U>(std::min(static_cast<double>(std::get<1>(_range)), ubound));
|
||||
} else {
|
||||
int64_t lbound;
|
||||
uint64_t ubound;
|
||||
switch (prec) {
|
||||
case Precision::BOOL:
|
||||
case Precision::U8:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint8_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
|
||||
break;
|
||||
case Precision::I8:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int8_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int8_t>::max());
|
||||
break;
|
||||
case Precision::U16:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint16_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
|
||||
break;
|
||||
case Precision::I16:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int16_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int16_t>::max());
|
||||
break;
|
||||
case Precision::U32:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint32_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
|
||||
break;
|
||||
case Precision::I32:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int32_t>::max());
|
||||
break;
|
||||
case Precision::U64:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<uint64_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
|
||||
break;
|
||||
case Precision::I64:
|
||||
lbound = static_cast<int64_t>(std::numeric_limits<int64_t>::lowest());
|
||||
ubound = static_cast<uint64_t>(std::numeric_limits<int64_t>::max());
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << "Unsupported precision";
|
||||
}
|
||||
using ltype = typename std::conditional<
|
||||
std::is_floating_point<U>::value,
|
||||
double, int64_t>::type;
|
||||
using utype = typename std::conditional<
|
||||
std::is_floating_point<U>::value,
|
||||
double, uint64_t>::type;
|
||||
std::get<0>(_range) = static_cast<U>(std::max(static_cast<ltype>(std::get<0>(_range)), static_cast<ltype>(lbound)));
|
||||
std::get<1>(_range) = static_cast<U>(std::min(static_cast<utype>(std::get<1>(_range)), static_cast<utype>(ubound)));
|
||||
}
|
||||
return _range;
|
||||
}
|
||||
|
||||
struct ConvertContext {
|
||||
const void *srcPtr;
|
||||
void *dstPtr;
|
||||
size_t size;
|
||||
Precision interimPrc;
|
||||
Precision dstPrc;
|
||||
bool converted;
|
||||
|
||||
template<typename T>
|
||||
std::tuple<T, T> range() const {
|
||||
Range<T> r;
|
||||
r.fit(interimPrc);
|
||||
return r.fit(dstPrc);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ConvertPrecision {
|
||||
using src_t = typename std::tuple_element<0, T>::type;
|
||||
using dst_t = typename std::tuple_element<1, T>::type;
|
||||
struct ConvertPrecision;
|
||||
|
||||
template<typename src_t, typename dst_t>
|
||||
struct ConvertPrecision<std::tuple<src_t, dst_t>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
convert<src_t, dst_t>(ctx.srcPtr, ctx.dstPtr, ctx.size);
|
||||
auto src = static_cast<const src_t *>(ctx.srcPtr);
|
||||
auto dst = static_cast<dst_t *>(ctx.dstPtr);
|
||||
src_t lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<src_t>();
|
||||
|
||||
if (std::is_integral<src_t>::value
|
||||
|| ctx.interimPrc.is_float()
|
||||
|| std::is_integral<dst_t>::value) {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<dst_t>(std::max(std::min(src[i], ubound), lbound));
|
||||
});
|
||||
} else {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<dst_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ConvertPrecision<std::tuple<float, bfloat16_t>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const float *>(ctx.srcPtr);
|
||||
auto dst = static_cast<bfloat16_t *>(ctx.dstPtr);
|
||||
|
||||
if (ctx.interimPrc.is_float()) {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<bfloat16_t>(src[i]);
|
||||
});
|
||||
} else {
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<float>();
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<bfloat16_t>(std::trunc(std::max(std::min(src[i], ubound), lbound)));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ConvertPrecision<std::tuple<bfloat16_t, float>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const bfloat16_t *>(ctx.srcPtr);
|
||||
auto dst = static_cast<float *>(ctx.dstPtr);
|
||||
|
||||
if (ctx.interimPrc.is_float()) {
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = static_cast<float>(src[i]);
|
||||
});
|
||||
} else {
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<bfloat16_t>();
|
||||
parallel_for(ctx.size, [&](size_t i) {
|
||||
dst[i] = std::trunc(std::max(std::min(static_cast<float>(src[i]), ubound), lbound));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename src_t>
|
||||
struct ConvertPrecision<std::tuple<src_t, ov::float16>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const src_t *>(ctx.srcPtr);
|
||||
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
|
||||
|
||||
constexpr size_t batch = 64;
|
||||
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
|
||||
typedef float batch_type[batch];
|
||||
|
||||
src_t lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<src_t>();
|
||||
|
||||
if (std::is_integral<src_t>::value
|
||||
|| ctx.interimPrc.is_float()) {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
|
||||
tmp[j] = static_cast<float>(std::max(std::min(src[offset + j], ubound), lbound));
|
||||
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
|
||||
});
|
||||
} else {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32
|
||||
tmp[j] = static_cast<float>(std::trunc(std::max(std::min(src[offset + j], ubound), lbound)));
|
||||
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename dst_t>
|
||||
struct ConvertPrecision<std::tuple<ov::float16, dst_t>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
|
||||
auto dst = static_cast<dst_t *>(ctx.dstPtr);
|
||||
|
||||
constexpr size_t batch = 64;
|
||||
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
|
||||
typedef float batch_type[batch];
|
||||
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<ov::float16>();
|
||||
|
||||
if (ctx.interimPrc.is_float()
|
||||
|| std::is_integral<dst_t>::value) {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
|
||||
dst[offset + j] = static_cast<dst_t>(std::max(std::min(tmp[j], ubound), lbound));
|
||||
});
|
||||
} else {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t
|
||||
dst[offset + j] = static_cast<dst_t>(std::trunc(std::max(std::min(tmp[j], ubound), lbound)));
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct ConvertPrecision<std::tuple<ov::float16, ov::float16>> {
|
||||
void operator()(ConvertContext & ctx) {
|
||||
auto src = static_cast<const ov::float16 *>(ctx.srcPtr);
|
||||
auto dst = static_cast<ov::float16 *>(ctx.dstPtr);
|
||||
|
||||
constexpr size_t batch = 64;
|
||||
const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch);
|
||||
typedef float batch_type[batch];
|
||||
|
||||
float lbound, ubound;
|
||||
std::tie(lbound, ubound) = ctx.range<ov::float16>();
|
||||
|
||||
if (ctx.interimPrc.is_float()) {
|
||||
cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16));
|
||||
} else {
|
||||
parallel_for(iterations, [&](size_t i) {
|
||||
batch_type tmp;
|
||||
const size_t offset = i * batch;
|
||||
const size_t current_batch_size = std::min(ctx.size - offset, batch);
|
||||
jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32
|
||||
for (size_t j = 0; j < current_batch_size; ++j) // truncate fp32
|
||||
tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound));
|
||||
jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16
|
||||
});
|
||||
}
|
||||
|
||||
ctx.converted = true;
|
||||
}
|
||||
};
|
||||
|
||||
bool isConversionTruncatesRange(const Precision & from, const Precision & to) {
|
||||
return to.bitsSize() < from.bitsSize()
|
||||
|| (from.is_float() && !to.is_float()) // float -> integral
|
||||
|| (from.isSigned() != to.isSigned()) // signed <-> unsigned
|
||||
|| (to == Precision::BOOL && from != to); // T -> bool
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo<Precision::ST>::value_type, PrecisionInfo<Precision::DT>::value_type)
|
||||
|
||||
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
|
||||
using namespace MKLDNNPlugin;
|
||||
#define MKLDNN_CVT_LIST \
|
||||
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \
|
||||
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \
|
||||
MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \
|
||||
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \
|
||||
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \
|
||||
MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \
|
||||
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \
|
||||
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \
|
||||
MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \
|
||||
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \
|
||||
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \
|
||||
MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \
|
||||
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \
|
||||
MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \
|
||||
MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \
|
||||
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \
|
||||
MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \
|
||||
MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \
|
||||
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \
|
||||
MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \
|
||||
MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \
|
||||
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \
|
||||
MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \
|
||||
MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \
|
||||
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \
|
||||
MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \
|
||||
MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \
|
||||
MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \
|
||||
MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \
|
||||
MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \
|
||||
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \
|
||||
MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \
|
||||
MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \
|
||||
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \
|
||||
MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \
|
||||
MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \
|
||||
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \
|
||||
MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \
|
||||
MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \
|
||||
MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \
|
||||
MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \
|
||||
MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \
|
||||
MKLDNN_CVT(BOOL, BOOL)
|
||||
|
||||
void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) {
|
||||
cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size);
|
||||
}
|
||||
|
||||
void cpu_convert(const void *srcPtr,
|
||||
void *dstPtr,
|
||||
InferenceEngine::Precision srcPrc,
|
||||
InferenceEngine::Precision interimPrc,
|
||||
InferenceEngine::Precision dstPrc,
|
||||
const size_t size) {
|
||||
if (srcPtr == nullptr || dstPtr == nullptr)
|
||||
IE_THROW() << "cpu_convert has null data pointer";
|
||||
|
||||
if (srcPrc == dstPrc) {
|
||||
if (srcPrc == dstPrc && srcPrc == interimPrc) {
|
||||
cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size());
|
||||
return;
|
||||
}
|
||||
|
||||
ConvertContext ctx = { srcPtr, dstPtr, size, false };
|
||||
|
||||
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc),
|
||||
MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16),
|
||||
MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64),
|
||||
MKLDNN_CVT(U8, FP32), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, BOOL),
|
||||
MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16),
|
||||
MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64),
|
||||
MKLDNN_CVT(I8, FP32), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, BOOL),
|
||||
MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16),
|
||||
MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64),
|
||||
MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, BOOL),
|
||||
MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16),
|
||||
MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64),
|
||||
MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, BOOL),
|
||||
MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16),
|
||||
MKLDNN_CVT(I32, I16), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64),
|
||||
MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, BOOL),
|
||||
MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16),
|
||||
MKLDNN_CVT(U64, I16), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64),
|
||||
MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, BOOL),
|
||||
MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16),
|
||||
MKLDNN_CVT(I64, I16), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64),
|
||||
MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, BOOL),
|
||||
MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16),
|
||||
MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64),
|
||||
MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL),
|
||||
MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16),
|
||||
MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64),
|
||||
MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL),
|
||||
MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16),
|
||||
MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64),
|
||||
MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16),
|
||||
MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16),
|
||||
MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64),
|
||||
MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL),
|
||||
MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16),
|
||||
MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64),
|
||||
MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL));
|
||||
|
||||
} else {
|
||||
ConvertContext ctx = {
|
||||
srcPtr,
|
||||
dstPtr,
|
||||
size,
|
||||
interimPrc,
|
||||
dstPrc,
|
||||
false
|
||||
};
|
||||
OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST);
|
||||
if (!ctx.converted)
|
||||
IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc;
|
||||
}
|
||||
}
|
||||
|
||||
#undef MKLDNN_CVT
|
||||
#undef MKLDNN_CVT_LIST
|
||||
|
@ -19,5 +19,32 @@
|
||||
* number of elements in buffers to be converted
|
||||
* @return none.
|
||||
*/
|
||||
void cpu_convert(const void *srcPtr,
|
||||
void *dstPtr,
|
||||
InferenceEngine::Precision srcPrc,
|
||||
InferenceEngine::Precision dstPrc,
|
||||
const size_t size);
|
||||
|
||||
void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size);
|
||||
/**
|
||||
* @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr.
|
||||
* If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed.
|
||||
* @param srcPtr
|
||||
* pointer to the buffer to convert from
|
||||
* @param dstPtr
|
||||
* pointer to the buffer to convert to
|
||||
* @param srcPrc
|
||||
* precision the buffer from which convert
|
||||
* @param interimPrc
|
||||
* intermediate precision used for type truncation
|
||||
* @param dstPrc
|
||||
* precision the buffer to which convert
|
||||
* @param size
|
||||
* number of elements in buffers to be converted
|
||||
* @return none.
|
||||
*/
|
||||
void cpu_convert(const void *srcPtr,
|
||||
void *dstPtr,
|
||||
InferenceEngine::Precision srcPrc,
|
||||
InferenceEngine::Precision interimPrc,
|
||||
InferenceEngine::Precision dstPrc,
|
||||
const size_t size);
|
||||
|
@ -979,7 +979,7 @@ void MKLDNNConvolutionNode::prepareParams() {
|
||||
primArgs[DNNL_ARG_BIAS] = getBias();
|
||||
}
|
||||
|
||||
appendPostOpArgs(*pAttrLocal);
|
||||
appendPostOpArgs(*pAttrLocal, primArgs, binaryPostOpsArgs);
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
|
@ -7,7 +7,8 @@
|
||||
#include "common/cpu_convert.h"
|
||||
#include "common/blocked_desc_creator.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "utils/ngraph_utils.hpp"
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <utils/ngraph_utils.hpp>
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph:
|
||||
return true;
|
||||
}
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(op, eng, cache) {
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode(op, eng, cache) {
|
||||
std::string errorMessage;
|
||||
if (isSupportedOperation(op, errorMessage)) {
|
||||
errorPrefix = "Convert node with name '" + getName() + "'";
|
||||
} else {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
auto convert = ov::as_type_ptr<const ngraph::opset1::Convert>(op);
|
||||
origPrc = details::convertPrecision(convert->get_destination_type());
|
||||
}
|
||||
|
||||
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
|
||||
@ -42,7 +46,8 @@ std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
|
||||
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode("Convert", nodeName, eng, cache) {
|
||||
: MKLDNNNode("Convert", nodeName, eng, cache)
|
||||
, origPrc(outPrc) {
|
||||
inputShapes.push_back(shape);
|
||||
addOriginalInputPrecision(inPrc);
|
||||
outputShapes.push_back(shape);
|
||||
@ -147,7 +152,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) {
|
||||
|
||||
void* srcPtr = parentMem.GetPtr();
|
||||
void* dstPtr = childMem.GetPtr();
|
||||
cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount);
|
||||
|
||||
cpu_convert(srcPtr,
|
||||
dstPtr,
|
||||
parentMem.getDesc().getPrecision(),
|
||||
origPrc,
|
||||
childMem.getDesc().getPrecision(),
|
||||
parentPaddElemCount);
|
||||
}
|
||||
|
||||
bool MKLDNNConvertNode::created() const {
|
||||
|
@ -49,6 +49,7 @@ public:
|
||||
private:
|
||||
MemoryDescPtr input;
|
||||
MemoryDescPtr output;
|
||||
InferenceEngine::Precision origPrc;
|
||||
|
||||
std::string errorPrefix;
|
||||
};
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "ie_precision.hpp"
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "mkldnn_cum_sum_node.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
@ -70,8 +71,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() {
|
||||
return;
|
||||
|
||||
dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA);
|
||||
if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
|
||||
dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
|
||||
if (!one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::BF16, Precision::I32, Precision::FP32, Precision::I64, Precision::U64))
|
||||
IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name();
|
||||
|
||||
if (inputShapes.size() == numOfInputs) {
|
||||
@ -95,42 +95,16 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) {
|
||||
if (inputShapes.size() == numOfInputs)
|
||||
axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory());
|
||||
|
||||
switch (dataPrecision) {
|
||||
case Precision::I8 : {
|
||||
exec<int8_t>();
|
||||
break;
|
||||
OV_SWITCH(MKLDNNPlugin, CumSumExecute, this, dataPrecision,
|
||||
OV_CASE(Precision::I8, int8_t),
|
||||
OV_CASE(Precision::U8, uint8_t),
|
||||
OV_CASE(Precision::I16, int16_t),
|
||||
OV_CASE(Precision::BF16, bfloat16_t),
|
||||
OV_CASE(Precision::I32, int32_t),
|
||||
OV_CASE(Precision::FP32, float),
|
||||
OV_CASE(Precision::I64, int64_t),
|
||||
OV_CASE(Precision::U64, uint64_t))
|
||||
}
|
||||
case Precision::U8 : {
|
||||
exec<uint8_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::I16 : {
|
||||
exec<int16_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::I32 : {
|
||||
exec<int32_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::FP32 : {
|
||||
exec<float>();
|
||||
break;
|
||||
}
|
||||
case Precision::I64 : {
|
||||
exec<int64_t>();
|
||||
break;
|
||||
}
|
||||
case Precision::U64 : {
|
||||
exec<uint64_t>();
|
||||
break;
|
||||
}
|
||||
default : {
|
||||
std::string errorMsg = errorPrefix + " has unsupported 'data' input precision: " + dataPrecision.name();
|
||||
IE_THROW() << errorMsg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename dataType>
|
||||
void MKLDNNCumSumNode::exec() {
|
||||
|
@ -47,6 +47,13 @@ private:
|
||||
|
||||
InferenceEngine::Precision dataPrecision;
|
||||
std::string errorPrefix;
|
||||
|
||||
template<typename T>
|
||||
struct CumSumExecute {
|
||||
void operator()(MKLDNNCumSumNode* node) {
|
||||
node->exec<T>();
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -13,34 +13,38 @@
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "utils/general_utils.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <cpu/x64/cpu_isa_traits.hpp>
|
||||
#include <nodes/common/cpu_memcpy.h>
|
||||
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "convolution_shape_inference.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op) == nullptr &&
|
||||
std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op) == nullptr) {
|
||||
errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported";
|
||||
return false;
|
||||
}
|
||||
size_t ndims = op->get_input_shape(0).size();
|
||||
size_t ndims = op->get_input_partial_shape(0).rank().get_length();
|
||||
if ((ndims < 3) || (ndims > 5)) {
|
||||
errorMessage = "Only 3D, 4D and 5D blobs are supported as input";
|
||||
return false;
|
||||
}
|
||||
if (op->get_input_partial_shape(1).is_dynamic() || (op->get_input_size() > 2 && op->get_input_partial_shape(2).is_dynamic())) {
|
||||
errorMessage = "Doesn't support dynamic shapes for 'weights' and 'output_shape' inputs";
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
@ -58,15 +62,14 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
|
||||
|
||||
auto convBackprop = std::dynamic_pointer_cast<const ngraph::opset1::ConvolutionBackpropData>(op);
|
||||
auto groupConvBackprop = std::dynamic_pointer_cast<const ngraph::opset1::GroupConvolutionBackpropData>(op);
|
||||
const auto dataShape = op->get_input_shape(0);
|
||||
weightDims = op->get_input_shape(1);
|
||||
const auto outShape = op->get_shape();
|
||||
OC = outShape[1];
|
||||
IC = dataShape[1];
|
||||
const auto& weightDims = getWeightDims();
|
||||
|
||||
if (convBackprop) {
|
||||
algorithm = DeconvolutionCommon;
|
||||
|
||||
IC = weightDims[0];
|
||||
OC = weightDims[1];
|
||||
|
||||
groupNum = 1;
|
||||
withGroups = false;
|
||||
|
||||
@ -78,10 +81,17 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
|
||||
}
|
||||
paddingL = convBackprop->get_pads_begin();
|
||||
paddingR = convBackprop->get_pads_end();
|
||||
|
||||
outputPadding = convBackprop->get_output_padding();
|
||||
|
||||
autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
|
||||
} else if (groupConvBackprop) {
|
||||
algorithm = DeconvolutionGrouped;
|
||||
|
||||
groupNum = weightDims[0];
|
||||
IC = groupNum * weightDims[1];
|
||||
OC = groupNum * weightDims[2];
|
||||
|
||||
withGroups = groupNum > 1;
|
||||
isDW = withGroups && groupNum == OC && groupNum == IC;
|
||||
|
||||
@ -93,10 +103,26 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::N
|
||||
}
|
||||
paddingL = groupConvBackprop->get_pads_begin();
|
||||
paddingR = groupConvBackprop->get_pads_end();
|
||||
|
||||
outputPadding = groupConvBackprop->get_output_padding();
|
||||
|
||||
autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER);
|
||||
}
|
||||
for (int i = 0; i < dilation.size(); i++) {
|
||||
kernel.push_back(weightDims[withGroups + 2 + i]);
|
||||
}
|
||||
|
||||
externOutShape = inputShapes.size() == 3;
|
||||
if (externOutShape && isDynamicNode()) {
|
||||
bool isConstOutShape = ngraph::is_type<ov::op::v0::Constant>(op->get_input_node_shared_ptr(2));
|
||||
if (isConstOutShape) {
|
||||
lastOutputSpatialDims = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(2))->cast_vector<int32_t>();
|
||||
}
|
||||
const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
|
||||
if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || (isConstOutShape && lastOutputSpatialDims.size() != spDimsNum)) {
|
||||
IE_THROW() << "'output_shape' input has incorrect number of elements. Expected = " << spDimsNum;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
@ -113,14 +139,6 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE
|
||||
auto const blbSize = blb->GetSize();
|
||||
|
||||
// WA: In int8 case, we are processing weights using internal blob.
|
||||
// So we disconnect constant node containing weights from the graph and then don't use it.
|
||||
if (getParentEdges().size() == 3) {
|
||||
removeEdge(getParentEdgeAt(2));
|
||||
inputShapes.erase(inputShapes.begin() + 2);
|
||||
}
|
||||
removeEdge(getParentEdgeAt(1));
|
||||
inputShapes.erase(inputShapes.begin() + 1);
|
||||
|
||||
InferenceEngine::SizeVector dimsForBlockedDesc{dims};
|
||||
std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);
|
||||
|
||||
@ -160,13 +178,16 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
|
||||
if (!withGroups && stride.back() > 3)
|
||||
return false;
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
|
||||
auto inDims = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims();
|
||||
if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) {
|
||||
return false;
|
||||
}
|
||||
// heuristicConst = 2^26
|
||||
// heuristicParam = IC^2 * SP
|
||||
auto heuristicConst = 67108864;
|
||||
auto heuristicParam = IC * IC;
|
||||
for (int i = 2; i < inDims.size(); i++)
|
||||
heuristicParam *= inDims[i];
|
||||
for (int i = 2; i < inMaxDims.size(); i++)
|
||||
heuristicParam *= inMaxDims[i];
|
||||
if (heuristicParam > heuristicConst)
|
||||
return false;
|
||||
}
|
||||
@ -203,10 +224,65 @@ bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
return (fusedWith.empty() && node->canBePerformedAsScaleShift(this));
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
if (!descs_fwd.empty() && !descs_bwd.empty())
|
||||
return;
|
||||
void MKLDNNDeconvolutionNode::initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inDims, const std::vector<int32_t>& outSpDims) {
|
||||
std::vector<ov::StaticShape> input_shapes{inDims.getStaticDims(), getWeightDims()};
|
||||
ov::StaticShape output_shape_input;
|
||||
if (externOutShape) {
|
||||
IE_ASSERT(outSpDims.size() == getInputShapeAtPort(2).getStaticDims()[0]);
|
||||
input_shapes.push_back({outSpDims.size()});
|
||||
for (size_t i = 0; i < outSpDims.size(); i++) {
|
||||
output_shape_input.push_back(outSpDims[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (getAlgorithm() == DeconvolutionCommon) {
|
||||
auto deconv = ngraph::as_type_ptr<ngraph::op::v1::ConvolutionBackpropData>(op);
|
||||
IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 2));
|
||||
} else if (getAlgorithm() == DeconvolutionGrouped) {
|
||||
auto deconv = ngraph::as_type_ptr<ngraph::op::v1::GroupConvolutionBackpropData>(op);
|
||||
IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 3));
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<VectorDims, VectorDims> MKLDNNDeconvolutionNode::makeDummyInOutShape() {
|
||||
auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0));
|
||||
auto outShape = getOutputShapeAtPort(0);
|
||||
|
||||
if (isDynamicNode()) {
|
||||
if (externOutShape) {
|
||||
if (lastOutputSpatialDims.empty()) {
|
||||
const auto& shape = getOutputShapeAtPort(0);
|
||||
lastOutputSpatialDims.resize(shape.getRank() - 2);
|
||||
|
||||
const auto& minDims = shape.getMinDims();
|
||||
const auto& maxDims = shape.getMaxDims();
|
||||
const auto& dims = shape.getDims();
|
||||
for (size_t i = 0; i < dims.size() - 2; ++i) {
|
||||
lastOutputSpatialDims[i] = dims[i + 2] == Shape::UNDEFINED_DIM ? std::min(maxDims[i + 2],
|
||||
std::max(minDims[i + 2], static_cast<Dim>(64))) : dims[i + 2];
|
||||
}
|
||||
}
|
||||
ov::CoordinateDiff pb = autoPad ? ov::CoordinateDiff(paddingL.size(), 0) : paddingL;
|
||||
ov::CoordinateDiff pe = autoPad ? ov::CoordinateDiff(paddingR.size(), 0) : paddingR;
|
||||
|
||||
auto inputDims = inShape.getStaticDims();
|
||||
const auto& weightDims = getWeightDims();
|
||||
const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
|
||||
for (size_t i = 0; i < inputDims.size() - 2; i++) {
|
||||
inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) *
|
||||
(weightDims[wghOffset + 2 + i] - 1) - 1 + pb[i] + pe[i] - outputPadding[i])) /
|
||||
stride[i] + 1;
|
||||
}
|
||||
|
||||
inShape = Shape(inputDims);
|
||||
}
|
||||
initPadding(opToShapeInfer, inShape, lastOutputSpatialDims);
|
||||
outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims));
|
||||
}
|
||||
return {inShape.getStaticDims(), outShape.getStaticDims()};
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
isInt8 = canBeExecutedInInt8();
|
||||
|
||||
InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0);
|
||||
@ -236,21 +312,17 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
if (getChildEdges().empty())
|
||||
IE_THROW() << errorPrefix << " has incorrect number of output edges";
|
||||
|
||||
for (int i = 0; i < paddingR.size(); i++) {
|
||||
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
|
||||
int krn = weightDims[with_group + 2 + i];
|
||||
int src = getOutputShapeAtPort(0).getStaticDims()[2 + i];
|
||||
int dst = getInputShapeAtPort(0).getStaticDims()[2 + i];
|
||||
|
||||
krn = (krn - 1)*(dilation[i] + 1) + 1;
|
||||
int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
|
||||
paddingR[i] = (dst - calc_dst) * stride[i];
|
||||
}
|
||||
VectorDims inDims, outDims;
|
||||
std::tie(inDims, outDims) = makeDummyInOutShape();
|
||||
inShape = Shape(inDims);
|
||||
Shape outShape(outDims);
|
||||
initPaddingR(inShape, outShape);
|
||||
|
||||
if (isInt8) {
|
||||
int8WeightDims = getWeightDims();
|
||||
// WA: if int8 deconvolution is supported, we create internal weights blob in IO format
|
||||
std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]);
|
||||
internalBlobs.push_back(createWeiBlobAsIO(weightDims));
|
||||
std::swap(int8WeightDims[withGroups + 0], int8WeightDims[withGroups + 1]);
|
||||
internalBlobs.push_back(createWeiBlobAsIO(int8WeightDims));
|
||||
auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc;
|
||||
MemoryDescPtr in_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getInputShapeAtPort(0), inputDataType, format);
|
||||
MemoryDescPtr out_candidate = std::make_shared<DnnlBlockedMemoryDesc>(getOutputShapeAtPort(0), outputDataType, format);
|
||||
@ -262,18 +334,31 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
createDescriptor({in_candidate}, {out_candidate});
|
||||
}
|
||||
}
|
||||
setPostOps(attr);
|
||||
setPostOps(attr, outShape.getStaticDims());
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) {
|
||||
for (int i = 0; i < paddingR.size(); i++) {
|
||||
int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0;
|
||||
const auto& weightDims = getWeightDims();
|
||||
int krn = weightDims[with_group + 2 + i];
|
||||
int src = outShape.getStaticDims()[2 + i];
|
||||
int dst = inShape.getStaticDims()[2 + i];
|
||||
|
||||
krn = (krn - 1)*(dilation[i] + 1) + 1;
|
||||
int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
|
||||
paddingR[i] = (dst - calc_dst) * stride[i];
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) {
|
||||
mkldnn::post_ops ops;
|
||||
|
||||
auto getBinPostOpShape = [&](){
|
||||
const auto outShape = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
|
||||
const auto chIdx = getFusingAxis();
|
||||
std::vector<size_t> binaryShape(outShapeRank, 1);
|
||||
binaryShape[chIdx] = outShape[chIdx];
|
||||
binaryShape[chIdx] = dims[chIdx];
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
@ -282,7 +367,7 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
// use legacy depthwise since backprop convolution does not support binary post ops
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
continue;
|
||||
}
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
@ -339,80 +424,277 @@ bool MKLDNNDeconvolutionNode::created() const {
|
||||
return getType() == Deconvolution;
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createPrimitive() {
|
||||
if (prim)
|
||||
return;
|
||||
bool MKLDNNDeconvolutionNode::needShapeInfer() const {
|
||||
if (inputShapesModified()) {
|
||||
return true;
|
||||
}
|
||||
if (externOutShape) {
|
||||
if (lastOutputSpatialDims != readOutputSpatialDims()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<VectorDims> MKLDNNDeconvolutionNode::shapeInfer() const {
|
||||
const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
std::vector<int32_t> outSpDims;
|
||||
if (externOutShape) {
|
||||
outSpDims = readOutputSpatialDims();
|
||||
}
|
||||
return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)};
|
||||
}
|
||||
|
||||
VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const {
|
||||
std::vector<ov::StaticShape> inputShapes = {
|
||||
inDims,
|
||||
getWeightDims()
|
||||
};
|
||||
|
||||
std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>> inputValues;
|
||||
|
||||
if (externOutShape) {
|
||||
if (outSpDims.size() != getInputShapeAtPort(2).getStaticDims()[0]) {
|
||||
IE_THROW() << "Can't compute output shape for node with name: " << getName()
|
||||
<< ", because the node has 'output_shape' input, but provided output spatial dims number is incorrect";
|
||||
}
|
||||
inputShapes.push_back({outSpDims.size()});
|
||||
inputValues.insert({2, std::make_shared<ngraph::runtime::HostTensor>(ngraph::element::Type_t::i32,
|
||||
inputShapes.back().to_shape(),
|
||||
outSpDims.data())});
|
||||
}
|
||||
|
||||
std::vector<ov::StaticShape> outputShapes(1);
|
||||
shape_inference(opToShapeInfer.get(), inputShapes, outputShapes, inputValues);
|
||||
|
||||
return outputShapes.back().to_shape();
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
|
||||
if (!execPtr) {
|
||||
IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled";
|
||||
}
|
||||
execPtr->exec(strm);
|
||||
|
||||
if (externOutShape) {
|
||||
lastOutputSpatialDims = readOutputSpatialDims();
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc,
|
||||
bool isWinograd) const {
|
||||
mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct;
|
||||
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
|
||||
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
|
||||
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(srcDesc, wghDesc, dstDesc, alg);
|
||||
if (fwd_conv_pd->get(true) == nullptr) {
|
||||
IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName();
|
||||
}
|
||||
return std::make_shared<MKLDNNDescriptor>(deconv_desc, fwd_conv_pd);
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc) const {
|
||||
return std::make_shared<MKLDNNDescriptor>(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc));
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
|
||||
MKLDNNMemoryPtr srcMemPtr,
|
||||
MKLDNNMemoryPtr wghMemPtr,
|
||||
MKLDNNMemoryPtr dstMemPtr,
|
||||
AttrPtr attr,
|
||||
impl_desc_type selectedImpl) {
|
||||
auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr);
|
||||
|
||||
while (static_cast<bool>(itpd)) {
|
||||
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
|
||||
|
||||
if (impl_type == selectedImpl) {
|
||||
if (isInt8) {
|
||||
auto prim_desc = createPrimitiveDescriptor<deconvolution_forward::primitive_desc,
|
||||
deconvolution_forward::desc>(attr);
|
||||
|
||||
prim.reset(new deconvolution_forward(prim_desc));
|
||||
|
||||
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}};
|
||||
if (internalBlobMemory.empty()) {
|
||||
prepareMemory(itpd);
|
||||
}
|
||||
auto prim_desc = deconvolution_forward::primitive_desc(itpd.get());
|
||||
execPtr = std::make_shared<DeconvExecutorInt8>(prim_desc, srcMemPtr, internalBlobMemory.front(), dstMemPtr, *attr,
|
||||
binaryPostOpsArgs, getEngine());
|
||||
} else {
|
||||
auto prim_desc = createPrimitiveDescriptor<convolution_backward_data::primitive_desc,
|
||||
convolution_backward_data::desc, convolution_forward::primitive_desc>(attr);
|
||||
|
||||
prim.reset(new convolution_backward_data(prim_desc));
|
||||
|
||||
auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive();
|
||||
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
|
||||
auto prim_desc = convolution_backward_data::primitive_desc(itpd.get());
|
||||
execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc, srcMemPtr, wghMemPtr, dstMemPtr, *attr,
|
||||
binaryPostOpsArgs, getEngine());
|
||||
}
|
||||
|
||||
appendPostOpArgs(attr);
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
|
||||
const std::vector<MemoryDescPtr> &outputDesc) {
|
||||
const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]);
|
||||
const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]);
|
||||
|
||||
// grouping and autoblicking is not compatible
|
||||
if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
|
||||
return;
|
||||
}
|
||||
|
||||
if (!itpd.next_impl()) {
|
||||
auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()),
|
||||
memory::data_type::f32,
|
||||
memory::format_tag::any);
|
||||
auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()),
|
||||
memory::data_type::f32,
|
||||
memory::format_tag::any);
|
||||
auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()),
|
||||
memory::data_type::f32,
|
||||
memory::format_tag::any);
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false);
|
||||
auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr);
|
||||
if (static_cast<bool>(anyDeconvItpd)) {
|
||||
auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get());
|
||||
execPtr = std::make_shared<DeconvExecutorDefault>(prim_desc, srcMemPtr, wghMemPtr, dstMemPtr, *attr,
|
||||
binaryPostOpsArgs, getEngine());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::prepareParams() {
|
||||
auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
IE_THROW() << "Input memory didn't allocate.";
|
||||
const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";
|
||||
|
||||
auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType<DnnlMemoryDesc>();
|
||||
|
||||
auto initPrimitiveAttr = [&]() {
|
||||
mkldnn::primitive_attr attr;
|
||||
setPostOps(attr, dstMemPtr->getStaticDims());
|
||||
return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
|
||||
};
|
||||
|
||||
AttrPtr pAttrLocal;
|
||||
|
||||
if (isDynamicNode()) {
|
||||
if (!pAttr) {
|
||||
pAttr = initPrimitiveAttr();
|
||||
}
|
||||
pAttrLocal = pAttr;
|
||||
if (autoPad || externOutShape) {
|
||||
initPadding(opToShapeInfer, inMemoryDesc->getShape(), externOutShape ? readOutputSpatialDims() : std::vector<int32_t>{});
|
||||
}
|
||||
initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape());
|
||||
} else {
|
||||
pAttrLocal = initPrimitiveAttr();
|
||||
}
|
||||
|
||||
const auto in_candidate = inMemoryDesc->getDnnlDesc();
|
||||
const auto out_candidate = outMemoryDesc->getDnnlDesc();
|
||||
|
||||
mkldnn::memory::desc wgh_candidate;
|
||||
if (isInt8) {
|
||||
if (internalBlobMemory.empty()) {
|
||||
wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
} else {
|
||||
wgh_candidate = internalBlobMemory.front()->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
}
|
||||
} else {
|
||||
wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
}
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> desc;
|
||||
if (isInt8) {
|
||||
desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate);
|
||||
} else {
|
||||
desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate,
|
||||
selected_pd->getImplementationType() == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd);
|
||||
}
|
||||
|
||||
createDeconvPrim(desc, srcMemPtr, getParentEdgesAtPort(1)[0]->getMemoryPtr(), dstMemPtr, pAttrLocal, selected_pd->getImplementationType());
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate,
|
||||
mkldnn::algorithm alg) const {
|
||||
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
|
||||
return memory::dims(orig_dims.begin(), orig_dims.end());
|
||||
};
|
||||
|
||||
if (isInt8) {
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
std::shared_ptr<mkldnn::deconvolution_forward::desc> deconv_desc;
|
||||
deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
|
||||
in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(),
|
||||
convertDims(stride), convertDims(dilation),
|
||||
convertDims(paddingL), convertDims(paddingR)));
|
||||
descs.emplace_back(deconv_desc);
|
||||
} else {
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any);
|
||||
for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
|
||||
std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
|
||||
conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg,
|
||||
out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(),
|
||||
conv_desc = std::make_shared<convolution_forward::desc>(prop_kind::forward_inference, alg,
|
||||
out_candidate, wgh_candidate, in_candidate,
|
||||
convertDims(stride),
|
||||
convertDims(dilation),
|
||||
convertDims(paddingL),
|
||||
convertDims(paddingR)));
|
||||
convertDims(paddingR));
|
||||
|
||||
std::shared_ptr<mkldnn::convolution_backward_data::desc> deconv_desc;
|
||||
deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate,
|
||||
in_candidate.getDnnlDesc(),
|
||||
deconv_desc = std::make_shared<convolution_backward_data::desc>(alg, out_candidate, wgh_candidate,
|
||||
in_candidate,
|
||||
convertDims(stride),
|
||||
convertDims(dilation),
|
||||
convertDims(paddingL),
|
||||
convertDims(paddingR)));
|
||||
descs_fwd.push_back(conv_desc);
|
||||
descs_bwd.push_back(deconv_desc);
|
||||
convertDims(paddingR));
|
||||
|
||||
auto fwd_conv_pd = std::make_shared<convolution_forward::primitive_desc>(*conv_desc, getEngine(), true);
|
||||
|
||||
return {deconv_desc, fwd_conv_pd};
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate) const {
|
||||
auto convertDims = [] (const std::vector<ptrdiff_t>& orig_dims) {
|
||||
return memory::dims(orig_dims.begin(), orig_dims.end());
|
||||
};
|
||||
|
||||
MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc;
|
||||
deconv_desc = std::make_shared<mkldnn::deconvolution_forward::desc>(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct,
|
||||
in_candidate, wgh_candidate, out_candidate,
|
||||
convertDims(stride), convertDims(dilation),
|
||||
convertDims(paddingL), convertDims(paddingR));
|
||||
return deconv_desc;
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
|
||||
const std::vector<MemoryDescPtr> &outputDesc) {
|
||||
auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims());
|
||||
auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc);
|
||||
auto in_candidate = dnnlInDesc.getDnnlDesc();
|
||||
|
||||
auto outDesc = outputDesc[0];
|
||||
if (!outDesc->isDefined()) {
|
||||
const auto outShape = shapeInferInternal(inDesc->getShape().getStaticDims(), lastOutputSpatialDims);
|
||||
outDesc = outDesc->cloneWithNewDims(outShape);
|
||||
}
|
||||
auto dnnlOutDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outDesc);
|
||||
auto out_candidate = dnnlOutDesc.getDnnlDesc();
|
||||
|
||||
// grouping and autoblocking is not compatible
|
||||
if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended()))
|
||||
return;
|
||||
|
||||
if (isInt8) {
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any);
|
||||
descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate));
|
||||
} else {
|
||||
mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()),
|
||||
dnnlInDesc.getDataType(), memory::format_tag::any);
|
||||
for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) {
|
||||
std::shared_ptr<convolution_backward_data::desc> deconv_desc;
|
||||
std::shared_ptr<convolution_forward::primitive_desc> fwd_conv_pd;
|
||||
std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, alg);
|
||||
if (fwd_conv_pd->get(true) == nullptr)
|
||||
continue;
|
||||
|
||||
descs.emplace_back(deconv_desc, fwd_conv_pd);
|
||||
}
|
||||
}
|
||||
@ -420,15 +702,25 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr>
|
||||
|
||||
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
if (idx == 2) {
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims()));
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims()));
|
||||
} else if (idx > 0 && isInt8) {
|
||||
// we need to store 'weight' input as edge,
|
||||
// because at this moment we can't simple replace internal blob with input, since we need to save weight data as is, but with different order
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(getOriginalInputPrecisionAtPort(idx), Shape(getInputShapeAtPort(idx).getStaticDims()));
|
||||
}
|
||||
|
||||
auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx);
|
||||
if (getInputShapeAtPort(idx).isDynamic()) {
|
||||
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx));
|
||||
}
|
||||
return MKLDNNExtensionUtils::makeDescriptor(desc);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemoryDesc> MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx);
|
||||
if (getOutputShapeAtPort(idx).isDynamic()) {
|
||||
return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx));
|
||||
}
|
||||
return MKLDNNExtensionUtils::makeDescriptor(desc);
|
||||
}
|
||||
|
||||
@ -446,4 +738,117 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const
|
||||
return getMaxPrecision(inputPrecisions);
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::IntermReorder(MKLDNNMemoryPtr memFrom,
|
||||
const mkldnn::memory::desc& descTo,
|
||||
const mkldnn::engine& engine) : m_memFrom(memFrom) {
|
||||
m_memTo = std::make_shared<MKLDNNMemory>(engine);
|
||||
m_memTo->Create(MKLDNNExtensionUtils::makeDescriptor(descTo));
|
||||
m_reorder = mkldnn::reorder(m_memFrom->GetPrimitive(), m_memTo->GetPrimitive());
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descFrom,
|
||||
MKLDNNMemoryPtr memTo,
|
||||
const mkldnn::engine& engine) : m_memTo(memTo) {
|
||||
m_memFrom = std::make_shared<MKLDNNMemory>(engine);
|
||||
m_memFrom->Create(MKLDNNExtensionUtils::makeDescriptor(descFrom));
|
||||
m_reorder = mkldnn::reorder(m_memFrom->GetPrimitive(), m_memTo->GetPrimitive());
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::exec(mkldnn::stream strm) {
|
||||
auto src = m_memFrom->GetPrimitive();
|
||||
auto dst = m_memTo->GetPrimitive();
|
||||
m_reorder.execute(strm, src, dst);
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::DeconvExecutor::exec(mkldnn::stream strm) {
|
||||
for (auto &inReorder : inputReorders) {
|
||||
inReorder.exec(strm);
|
||||
}
|
||||
(*execPrim).execute(strm, primArgs);
|
||||
for (auto &outReorder : outputReorders) {
|
||||
outReorder.exec(strm);
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
|
||||
MKLDNNMemoryPtr inMem,
|
||||
MKLDNNMemoryPtr weightMem,
|
||||
MKLDNNMemoryPtr outMem,
|
||||
const mkldnn::primitive_attr &attr,
|
||||
const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
|
||||
const mkldnn::engine& engine) {
|
||||
execPrim.reset(new mkldnn::convolution_backward_data(pd));
|
||||
|
||||
if (inMem->GetPrimitive().get_desc() != pd.diff_dst_desc()) {
|
||||
inputReorders.push_back(IntermReorder(inMem, pd.diff_dst_desc(), engine));
|
||||
primArgs[DNNL_ARG_DIFF_DST] = inputReorders.back().getToMem()->GetPrimitive();
|
||||
} else {
|
||||
primArgs[DNNL_ARG_DIFF_DST] = inMem->GetPrimitive();
|
||||
}
|
||||
|
||||
if (weightMem->GetPrimitive().get_desc() != pd.weights_desc()) {
|
||||
inputReorders.push_back(IntermReorder(weightMem, pd.weights_desc(), engine));
|
||||
primArgs[DNNL_ARG_WEIGHTS] = inputReorders.back().getToMem()->GetPrimitive();
|
||||
} else {
|
||||
primArgs[DNNL_ARG_WEIGHTS] = weightMem->GetPrimitive();
|
||||
}
|
||||
|
||||
if (outMem->GetPrimitive().get_desc() != pd.diff_src_desc()) {
|
||||
outputReorders.push_back(IntermReorder(pd.diff_src_desc(), outMem, engine));
|
||||
primArgs[DNNL_ARG_DIFF_SRC] = outputReorders.back().getFromMem()->GetPrimitive();
|
||||
} else {
|
||||
primArgs[DNNL_ARG_DIFF_SRC] = outMem->GetPrimitive();
|
||||
}
|
||||
MKLDNNNode::appendPostOpArgs(attr, primArgs, binPostOpsArgs);
|
||||
}
|
||||
|
||||
MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
|
||||
MKLDNNMemoryPtr inMem,
|
||||
MKLDNNMemoryPtr weightMem,
|
||||
MKLDNNMemoryPtr outMem,
|
||||
const mkldnn::primitive_attr &attr,
|
||||
const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
|
||||
const mkldnn::engine& engine) {
|
||||
execPrim.reset(new mkldnn::deconvolution_forward(pd));
|
||||
|
||||
if (inMem->GetPrimitive().get_desc() != pd.src_desc()) {
|
||||
inputReorders.push_back(IntermReorder(inMem, pd.src_desc(), engine));
|
||||
primArgs[DNNL_ARG_SRC] = inputReorders.back().getToMem()->GetPrimitive();
|
||||
} else {
|
||||
primArgs[DNNL_ARG_SRC] = inMem->GetPrimitive();
|
||||
}
|
||||
|
||||
if (weightMem->GetPrimitive().get_desc() != pd.weights_desc()) {
|
||||
inputReorders.push_back(IntermReorder(weightMem, pd.weights_desc(), engine));
|
||||
primArgs[DNNL_ARG_WEIGHTS] = inputReorders.back().getToMem()->GetPrimitive();
|
||||
} else {
|
||||
primArgs[DNNL_ARG_WEIGHTS] = weightMem->GetPrimitive();
|
||||
}
|
||||
|
||||
if (outMem->GetPrimitive().get_desc() != pd.dst_desc()) {
|
||||
outputReorders.push_back(IntermReorder(pd.dst_desc(), outMem, engine));
|
||||
primArgs[DNNL_ARG_DST] = outputReorders.back().getFromMem()->GetPrimitive();
|
||||
} else {
|
||||
primArgs[DNNL_ARG_DST] = outMem->GetPrimitive();
|
||||
}
|
||||
MKLDNNNode::appendPostOpArgs(attr, primArgs, binPostOpsArgs);
|
||||
}
|
||||
|
||||
std::vector<int32_t> MKLDNNDeconvolutionNode::readOutputSpatialDims() const {
|
||||
if (getParentEdges().size() < 3) {
|
||||
IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size();
|
||||
}
|
||||
const auto &shapeMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr();
|
||||
if (!shapeMemPtr || !shapeMemPtr->GetPrimitivePtr()) {
|
||||
IE_THROW() << "'output_shape' input memory is not allocated.";
|
||||
}
|
||||
const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2;
|
||||
if (shapeMemPtr->getStaticDims()[0] != spDimsNum) {
|
||||
IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements";
|
||||
}
|
||||
const int32_t *outShapePtr = reinterpret_cast<const int32_t *>(shapeMemPtr->GetPtr());
|
||||
std::vector<int32_t> outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]);
|
||||
return outSpDims;
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
|
||||
|
@ -13,6 +13,10 @@
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNDeconvolutionNode : public MKLDNNNode {
|
||||
using DefaultDeconvDescs = std::pair<std::shared_ptr<mkldnn::convolution_backward_data::desc>,
|
||||
std::shared_ptr<mkldnn::convolution_forward::primitive_desc>>;
|
||||
using Int8DeconvDesc = std::shared_ptr<mkldnn::deconvolution_forward::desc>;
|
||||
|
||||
public:
|
||||
MKLDNNDeconvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
|
||||
@ -39,27 +43,120 @@ public:
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
|
||||
const InferenceEngine::SizeVector& getWeightDims() { return weightDims; }
|
||||
const std::vector<ptrdiff_t>& getStride() { return stride; }
|
||||
const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
|
||||
const std::vector<ptrdiff_t>& getStride() const { return stride; }
|
||||
|
||||
void prepareParams() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
bool needShapeInfer() const override;
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
|
||||
private:
|
||||
class DeconvExecutor {
|
||||
protected:
|
||||
class IntermReorder {
|
||||
public:
|
||||
IntermReorder(MKLDNNMemoryPtr memFrom, const mkldnn::memory::desc& descTo, const mkldnn::engine& engine);
|
||||
IntermReorder(const mkldnn::memory::desc& descFrom, MKLDNNMemoryPtr memTo, const mkldnn::engine& engine);
|
||||
MKLDNNMemoryPtr getFromMem() const { return m_memFrom; }
|
||||
MKLDNNMemoryPtr getToMem() const { return m_memTo; }
|
||||
void exec(mkldnn::stream strm);
|
||||
|
||||
private:
|
||||
MKLDNNMemoryPtr m_memFrom;
|
||||
MKLDNNMemoryPtr m_memTo;
|
||||
mkldnn::reorder m_reorder;
|
||||
};
|
||||
|
||||
public:
|
||||
void exec(mkldnn::stream strm);
|
||||
virtual ~DeconvExecutor() = default;
|
||||
|
||||
protected:
|
||||
DeconvExecutor() = default;
|
||||
std::vector<IntermReorder> inputReorders;
|
||||
MKLDNNPrimitive execPrim;
|
||||
std::vector<IntermReorder> outputReorders;
|
||||
std::unordered_map<int, mkldnn::memory> primArgs;
|
||||
};
|
||||
|
||||
using executorPtr = std::shared_ptr<DeconvExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
|
||||
class DeconvExecutorDefault : public DeconvExecutor {
|
||||
public:
|
||||
DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd,
|
||||
MKLDNNMemoryPtr inMem,
|
||||
MKLDNNMemoryPtr weightMem,
|
||||
MKLDNNMemoryPtr outMem,
|
||||
const mkldnn::primitive_attr &attr,
|
||||
const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
|
||||
const mkldnn::engine& engine);
|
||||
};
|
||||
|
||||
class DeconvExecutorInt8 : public DeconvExecutor {
|
||||
public:
|
||||
DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd,
|
||||
MKLDNNMemoryPtr inMem,
|
||||
MKLDNNMemoryPtr weightMem,
|
||||
MKLDNNMemoryPtr outMem,
|
||||
const mkldnn::primitive_attr &attr,
|
||||
const std::vector<MKLDNNMemoryPtr>& binPostOpsArgs,
|
||||
const mkldnn::engine& engine);
|
||||
};
|
||||
|
||||
bool withGroups = false;
|
||||
bool isDW = false;
|
||||
bool isInt8 = false;
|
||||
bool autoPad = false;
|
||||
bool externOutShape = false;
|
||||
size_t groupNum = 1;
|
||||
size_t IC;
|
||||
size_t OC;
|
||||
std::vector<ptrdiff_t> kernel;
|
||||
std::vector<ptrdiff_t> stride;
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
std::vector<ptrdiff_t> paddingL;
|
||||
std::vector<ptrdiff_t> paddingR;
|
||||
InferenceEngine::SizeVector weightDims;
|
||||
std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
|
||||
std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
|
||||
ov::CoordinateDiff paddingL;
|
||||
ov::CoordinateDiff paddingR;
|
||||
ov::CoordinateDiff outputPadding;
|
||||
std::vector<int32_t> lastOutputSpatialDims;
|
||||
VectorDims int8WeightDims;
|
||||
|
||||
Shape inShape;
|
||||
|
||||
AttrPtr pAttr;
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
void setPostOps(mkldnn::primitive_attr &attr);
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims);
|
||||
|
||||
VectorDims shapeInferInternal(const VectorDims &inDims, std::vector<int32_t> outSpDims) const;
|
||||
void initPadding(std::shared_ptr<ngraph::Node> op, const Shape &inShape, const std::vector<int32_t>& outSpDims);
|
||||
void initPaddingR(const Shape &inShape, const Shape &outShape);
|
||||
std::vector<int32_t> readOutputSpatialDims() const;
|
||||
std::pair<VectorDims, VectorDims> makeDummyInOutShape();
|
||||
|
||||
DefaultDeconvDescs createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate,
|
||||
mkldnn::algorithm alg) const;
|
||||
Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate,
|
||||
const mkldnn::memory::desc& wgh_candidate,
|
||||
const mkldnn::memory::desc& out_candidate) const;
|
||||
std::shared_ptr<MKLDNNDescriptor> createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc,
|
||||
bool isWinograd) const;
|
||||
std::shared_ptr<MKLDNNDescriptor> createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc,
|
||||
const mkldnn::memory::desc& wghDesc,
|
||||
const mkldnn::memory::desc& dstDesc) const;
|
||||
|
||||
void createDeconvPrim(std::shared_ptr<MKLDNNDescriptor> desc,
|
||||
MKLDNNMemoryPtr srcMemPtr,
|
||||
MKLDNNMemoryPtr wghMemPtr,
|
||||
MKLDNNMemoryPtr dstMemPtr,
|
||||
AttrPtr attr,
|
||||
impl_desc_type selectedImpl);
|
||||
|
||||
std::string errorPrefix;
|
||||
|
||||
|
@ -147,7 +147,7 @@ void MKLDNNFullyConnectedNode::createPrimitive() {
|
||||
else
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}};
|
||||
|
||||
appendPostOpArgs(*attr);
|
||||
appendPostOpArgs(*attr, primArgs, binaryPostOpsArgs);
|
||||
}
|
||||
|
||||
void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) {
|
||||
|
@ -421,7 +421,7 @@ void MKLDNNMatMulNode::prepareParams() {
|
||||
if (withBiases)
|
||||
primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive();
|
||||
|
||||
appendPostOpArgs(*attr);
|
||||
appendPostOpArgs(*attr, primArgs, binaryPostOpsArgs);
|
||||
}
|
||||
|
||||
void MKLDNNMatMulNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
|
@ -85,6 +85,7 @@ struct format {
|
||||
bs_fs_zyx_bsv16_fsv16, ///< format used for 3D blocked convolution (batch and features blocked by 16)
|
||||
bs_fs_yx_bsv16_fsv16, ///< format used for 2D blocked convolution (batch and features blocked by 16)
|
||||
bs_fs_yx_bsv4_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 4)
|
||||
bs_fs_yx_bsv8_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 8 and 4)
|
||||
bs_fs_yx_bsv4_fsv2, ///< format used for 2D blocked convolution (batch blocked by 4, features blocked by 2)
|
||||
bs_fs_zyx_bsv4_fsv4, ///< format used for 3D blocked convolution (batch and features blocked by 4)
|
||||
bs_fs_zyx_bsv4_fsv2, ///< format used for 3D blocked convolution (batch blocked by 4, features blocked by 2)
|
||||
@ -255,6 +256,7 @@ struct format {
|
||||
{ bs_fs_zyx_bsv16_fsv16, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 16 }, {1, 16}}}},
|
||||
{ bs_fs_yx_bsv16_fsv16, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 16 }, {1, 16}}}},
|
||||
{ bs_fs_yx_bsv4_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 4}}}},
|
||||
{ bs_fs_yx_bsv8_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 8 }, {1, 4}}}},
|
||||
{ bs_fs_yx_bsv4_fsv2, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 2}}}},
|
||||
{ bs_fs_zyx_bsv4_fsv4, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 4}}}},
|
||||
{ bs_fs_zyx_bsv4_fsv2, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 2}}}},
|
||||
|
@ -29,6 +29,7 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{
|
||||
{ DataLayout::bs_fs_zyx_bsv16_fsv16, { 0, 1, 2, -1, 3, 4 } },
|
||||
{ DataLayout::bs_fs_yx_bsv16_fsv16, { 0, 1, -1, -1, 2, 3 } },
|
||||
{ DataLayout::bs_fs_yx_bsv4_fsv4, { 0, 1, -1, -1, 2, 3 } },
|
||||
{ DataLayout::bs_fs_yx_bsv8_fsv4, { 0, 1, -1, -1, 2, 3 } },
|
||||
{ DataLayout::bs_fs_yx_bsv4_fsv2, { 0, 1, -1, -1, 2, 3 } },
|
||||
{ DataLayout::bs_fs_yx_bsv32_fsv32, { 0, 1, -1, -1, 2, 3 } },
|
||||
{ DataLayout::bs_fs_yx_bsv32_fsv16, { 0, 1, -1, -1, 2, 3 } },
|
||||
@ -206,6 +207,11 @@ NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l) {
|
||||
newDims[2] = RoundUp(newDims[2], 4);
|
||||
newDims[3] = RoundUp(newDims[3], 4);
|
||||
break;
|
||||
case bs_fs_yx_bsv8_fsv4:
|
||||
assert(newDims.size() == 4);
|
||||
newDims[2] = RoundUp(newDims[2], 4);
|
||||
newDims[3] = RoundUp(newDims[3], 8);
|
||||
break;
|
||||
case bs_fs_yx_bsv4_fsv2:
|
||||
assert(newDims.size() == 4);
|
||||
newDims[2] = RoundUp(newDims[2], 2);
|
||||
|
@ -39,6 +39,7 @@ enum DataLayout {
|
||||
bs_fs_yx_bsv16_fsv16, // batch, feature, 2D spatial. Blocks of 16 batch and channels
|
||||
bs_fs_zyx_bsv16_fsv16, // batch, feature, 3D spatial. Blocks of 16 batch and channels
|
||||
bs_fs_yx_bsv4_fsv4, // batch, feature, 2D spatial. Blocks of 4 batch and 4 channels
|
||||
bs_fs_yx_bsv8_fsv4, // batch, feature, 2D spatial. Blocks of 8 batch and 4 channels
|
||||
bs_fs_yx_bsv4_fsv2, // batch, feature, 2D spatial. Blocks of 4 batch and 2 channels
|
||||
bs_fs_yx_bsv32_fsv32, // batch, feature, 2D spatial. Blocks of 32 batch and 32 channels
|
||||
bs_fs_yx_bsv32_fsv16, // batch, feature, 2D spatial. Blocks of 32 batch and 16 channels
|
||||
|
@ -43,6 +43,9 @@ JitConstants ConvertColorKernelBase::GetJitConstants(const convert_color_params&
|
||||
case color_format::NV12:
|
||||
jit.AddConstant(MakeJitConstant("CONVERT_FROM_NV12", ""));
|
||||
break;
|
||||
case color_format::I420:
|
||||
jit.AddConstant(MakeJitConstant("CONVERT_FROM_I420", ""));
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << "Not supported input color format";
|
||||
}
|
||||
|
@ -196,15 +196,16 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
|
||||
return {};
|
||||
}
|
||||
|
||||
auto preferredWeightsLayout = GetPreferredWeightsLayout(newParams);
|
||||
bool succeed = UpdateWeightsParams(newParams,
|
||||
options,
|
||||
GetPreferredWeightsLayout(newParams),
|
||||
preferredWeightsLayout,
|
||||
kd.weightsReorderParams,
|
||||
GetSupportedKey(),
|
||||
newParams.groups,
|
||||
newParams.transposed);
|
||||
|
||||
bool bSupportedWeightsLayout = newParams.weights.GetLayout() == GetPreferredWeightsLayout(newParams);
|
||||
bool bSupportedWeightsLayout = newParams.weights.GetLayout() == preferredWeightsLayout;
|
||||
const bool bWeightsOK = bSupportedWeightsLayout || options.allowStaticInputReordering;
|
||||
|
||||
if (!succeed || !bWeightsOK) {
|
||||
|
@ -13,6 +13,8 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const {
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableInputLayout(DataLayout::bfzyx);
|
||||
@ -21,6 +23,7 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const {
|
||||
k.EnableOutputLayout(DataLayout::bfwzyx);
|
||||
|
||||
k.EnableBatching();
|
||||
k.EnableDifferentTypes();
|
||||
|
||||
return k;
|
||||
}
|
||||
@ -117,25 +120,29 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
|
||||
if (tuning_data.tile_k_size > tuning_data.simd_size) {
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("A_VEC_SIZE", tuning_data.tile_k_size / tuning_data.simd_size),
|
||||
MakeJitConstant("A_FLOATN", std::string("UNIT_TYPE") + toCodeString(tuning_data.tile_k_size / tuning_data.simd_size)),
|
||||
MakeJitConstant("A_FLOATN", std::string("CAT(INPUT0_TYPE, ") + toCodeString(tuning_data.tile_k_size / tuning_data.simd_size) + ")"),
|
||||
});
|
||||
} else {
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("A_VEC_SIZE", 1),
|
||||
MakeJitConstant("A_FLOATN", std::string("UNIT_TYPE")),
|
||||
MakeJitConstant("A_FLOATN", std::string("INPUT0_TYPE")),
|
||||
});
|
||||
}
|
||||
|
||||
if (tuning_data.tile_n_size > tuning_data.simd_size) {
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("B_VEC_SIZE", b_vec_size),
|
||||
MakeJitConstant("B_FLOATN", std::string("UNIT_TYPE") + toCodeString(b_vec_size)),
|
||||
MakeJitConstant("B_FLOATN", std::string("CAT(INPUT1_TYPE, ") + toCodeString(b_vec_size) + ")"),
|
||||
MakeJitConstant("OUTPUT_TYPE_VEC", std::string("CAT(OUTPUT_TYPE, ") + toCodeString(b_vec_size) + ")"),
|
||||
MakeJitConstant("ACCUMULATOR_TYPE_VEC", std::string("CAT(ACCUMULATOR_TYPE, ") + toCodeString(b_vec_size) + ")"),
|
||||
});
|
||||
} else {
|
||||
b_vec_size = 1;
|
||||
jit.AddConstants({
|
||||
MakeJitConstant("B_VEC_SIZE", 1),
|
||||
MakeJitConstant("B_FLOATN", std::string("UNIT_TYPE")),
|
||||
MakeJitConstant("B_VEC_SIZE", b_vec_size),
|
||||
MakeJitConstant("B_FLOATN", std::string("INPUT1_TYPE")),
|
||||
MakeJitConstant("OUTPUT_TYPE_VEC", std::string("OUTPUT_TYPE")),
|
||||
MakeJitConstant("ACCUMULATOR_TYPE_VEC", std::string("ACCUMULATOR_TYPE")),
|
||||
});
|
||||
}
|
||||
|
||||
@ -183,6 +190,10 @@ bool GemmKernelTiledOpt::Validate(const Params& params, const optional_params& o
|
||||
if ((gmm_params.transpose_input0 || gmm_params.transpose_input1) && gemm_leftovers)
|
||||
return false;
|
||||
|
||||
for (size_t i = 1; i < gmm_params.inputs.size(); i++)
|
||||
if (gmm_params.inputs[0].GetDType() != gmm_params.inputs[i].GetDType())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel_selector
|
||||
|
@ -5,11 +5,14 @@
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#ifdef CONVERT_FROM_NV12
|
||||
#if defined(CONVERT_FROM_NV12) || defined(CONVERT_FROM_I420)
|
||||
#ifdef BUFFER_MEM
|
||||
KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
|
||||
#if INPUTS_COUNT == 2
|
||||
const __global INPUT1_TYPE* input_uv,
|
||||
KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input1,
|
||||
#if INPUTS_COUNT > 1
|
||||
const __global INPUT1_TYPE* input2,
|
||||
#if INPUTS_COUNT == 3
|
||||
const __global INPUT2_TYPE* input3,
|
||||
#endif
|
||||
#endif
|
||||
__global OUTPUT_TYPE* output) {
|
||||
|
||||
@ -17,16 +20,19 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
|
||||
const uint y = get_global_id(1);
|
||||
const uint x = get_global_id(2);
|
||||
|
||||
float Y = input_y[GET_DATA_INDEX(INPUT0, b, 0, y, x)];
|
||||
float Y = input1[GET_DATA_INDEX(INPUT0, b, 0, y, x)];
|
||||
|
||||
#if INPUTS_COUNT == 2
|
||||
float U = input_uv[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
|
||||
float V = input_uv[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)];
|
||||
#if INPUTS_COUNT == 3
|
||||
float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
|
||||
float V = input3[GET_DATA_INDEX(INPUT2, b, 0, y / 2, x / 2)];
|
||||
#elif INPUTS_COUNT == 2
|
||||
float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)];
|
||||
float V = input2[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)];
|
||||
#else // Single plane
|
||||
uint input_uv_offset = INPUT0_SIZE_X * INPUT0_SIZE_Y / 3 * 2;
|
||||
|
||||
float U = input_y[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset];
|
||||
float V = input_y[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset];
|
||||
float U = input1[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset];
|
||||
float V = input1[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset];
|
||||
#endif
|
||||
|
||||
float Ycomponent = mad(Y, 1.164f, -18.624f);
|
||||
@ -57,9 +63,12 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y,
|
||||
|
||||
|
||||
#ifdef SURFACE_MEM
|
||||
KERNEL(convert_color_ref)(read_only image2d_t input_y,
|
||||
#if INPUTS_COUNT == 2
|
||||
read_only image2d_t input_uv,
|
||||
KERNEL(convert_color_ref)(read_only image2d_t input1,
|
||||
#if INPUTS_COUNT > 1
|
||||
read_only image2d_t input2,
|
||||
#if INPUTS_COUNT == 3
|
||||
read_only image2d_t input3,
|
||||
#endif
|
||||
#endif
|
||||
__global OUTPUT_TYPE* output) {
|
||||
|
||||
@ -67,17 +76,22 @@ KERNEL(convert_color_ref)(read_only image2d_t input_y,
|
||||
const uint y = get_global_id(1);
|
||||
const uint x = get_global_id(2);
|
||||
|
||||
float4 Y = read_imagef(input_y, (int2)(x, y));
|
||||
float4 Y = read_imagef(input1, (int2)(x, y));
|
||||
float Ycomponent = mad(Y.x, 296.82f, -18.624f);
|
||||
|
||||
#if INPUTS_COUNT == 2
|
||||
float4 UV = read_imagef(input_uv, (int2)(x / 2, y / 2));
|
||||
#if INPUTS_COUNT == 3
|
||||
float4 U = read_imagef(input2, (int2)(x / 2, y / 2));
|
||||
float4 V = read_imagef(input3, (int2)(x / 2, y / 2));
|
||||
float Ucomponent = mad(U.x, 255.0f, -128.f);
|
||||
float Vcomponent = mad(V.x, 255.0f, -128.f);
|
||||
#elif INPUTS_COUNT == 2
|
||||
float4 UV = read_imagef(input2, (int2)(x / 2, y / 2));
|
||||
float Ucomponent = mad(UV.x, 255.0f, -128.f);
|
||||
float Vcomponent = mad(UV.y, 255.0f, -128.f);
|
||||
#else // Single plane
|
||||
uint input_y_offset = INPUT0_SIZE_Y / 3 * 2;
|
||||
float4 U = read_imagef(input_y, (int2)((x / 2) * 2, y / 2 + input_y_offset));
|
||||
float4 V = read_imagef(input_y, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset));
|
||||
float4 U = read_imagef(input1, (int2)((x / 2) * 2, y / 2 + input_y_offset));
|
||||
float4 V = read_imagef(input1, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset));
|
||||
float Ucomponent = mad(U.x, 255.0f, -128.f);
|
||||
float Vcomponent = mad(V.x, 255.0f, -128.f);
|
||||
#endif
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/unit_type.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
@ -14,17 +14,17 @@
|
||||
#endif // INPUT0_TYPE_SIZE == 4
|
||||
|
||||
#if TILE_K > SIMD_WIDTH
|
||||
#define BLOCK_READ_A(ptr, offset) CAT(UNIT_BLOCK_READ, A_VEC_SIZE)(ptr, offset)
|
||||
#define BLOCK_READ_A(ptr, offset) BLOCK_READN(INPUT0_TYPE, A_VEC_SIZE, ptr, offset)
|
||||
#else // TILE_K > SIMD_WIDTH
|
||||
#define BLOCK_READ_A(ptr, offset) UNIT_BLOCK_READ(ptr, offset)
|
||||
#define BLOCK_READ_A(ptr, offset) BLOCK_READN(INPUT0_TYPE, 1, ptr, offset)
|
||||
#endif // TILE_K > SIMD_WIDTH
|
||||
|
||||
#if TILE_N > SIMD_WIDTH
|
||||
#define BLOCK_READ_B(ptr, offset) CAT(UNIT_BLOCK_READ, B_VEC_SIZE)(ptr, offset)
|
||||
#define BLOCK_WRITE_C(ptr, offset, data) CAT(UNIT_BLOCK_WRITE, B_VEC_SIZE)(ptr, offset, data)
|
||||
#define BLOCK_READ_B(ptr, offset) BLOCK_READN(INPUT1_TYPE, B_VEC_SIZE, ptr, offset)
|
||||
#define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, B_VEC_SIZE, ptr, offset, data)
|
||||
#else // TILE_N > SIMD_WIDTH
|
||||
#define BLOCK_READ_B(ptr, offset) UNIT_BLOCK_READ(ptr, offset)
|
||||
#define BLOCK_WRITE_C(ptr, offset, data) UNIT_BLOCK_WRITE(ptr, offset, data)
|
||||
#define BLOCK_READ_B(ptr, offset) BLOCK_READN(INPUT1_TYPE, 1, ptr, offset)
|
||||
#define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, 1, ptr, offset, data)
|
||||
#endif // TILE_N > SIMD_WIDTH
|
||||
|
||||
inline uint FUNC(get_input0_batch_offset)(uint b, uint f, uint w, uint z) {
|
||||
@ -294,9 +294,9 @@ KERNEL(gemm_tiled_opt)(
|
||||
#if TILE_N_NOT_DIVISIBLE
|
||||
if (b_raw_global_id < N) {
|
||||
#ifdef INPUT2_TYPE
|
||||
OUTPUT_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_ptr[sglid];
|
||||
ACCUMULATOR_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_ptr[sglid];
|
||||
#else // INPUT2_TYPE
|
||||
OUTPUT_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
|
||||
ACCUMULATOR_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
|
||||
#endif // INPUT2_TYPE
|
||||
|
||||
#if HAS_FUSED_OPS
|
||||
@ -316,9 +316,9 @@ KERNEL(gemm_tiled_opt)(
|
||||
|
||||
#ifdef INPUT2_TYPE
|
||||
B_FLOATN c_val = BLOCK_READ_B(c_ptr, 0);
|
||||
B_FLOATN dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_val;
|
||||
ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_val;
|
||||
#else // INPUT2_TYPE
|
||||
B_FLOATN dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
|
||||
ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id];
|
||||
#endif // INPUT2_TYPE
|
||||
|
||||
#if HAS_FUSED_OPS
|
||||
@ -327,7 +327,7 @@ KERNEL(gemm_tiled_opt)(
|
||||
#else // FUSED_OPS_CAN_USE_PRELOAD
|
||||
FUSED_OPS_VEC;
|
||||
#endif // FUSED_OPS_CAN_USE_PRELOAD
|
||||
B_FLOATN res = FUSED_OPS_RESULT_VEC;
|
||||
OUTPUT_TYPE_VEC res = FUSED_OPS_RESULT_VEC;
|
||||
BLOCK_WRITE_C(d_ptr, 0, res);
|
||||
#else // HAS_FUSED_OPS
|
||||
BLOCK_WRITE_C(d_ptr, 0, dequantized);
|
||||
|
@ -506,6 +506,22 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f, uint z, uint y, uint x,
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_X), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4)
|
||||
|
||||
#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX(prefix, b, f, y, x) \
|
||||
get_bs_fs_zyx_bsv_fsv_index( \
|
||||
b, f, 0, y, x, \
|
||||
CAT(prefix, _SIZE_X), \
|
||||
CAT(prefix, _SIZE_Y), \
|
||||
CAT(prefix, _SIZE_Z), \
|
||||
CAT(prefix, _FEATURE_NUM), \
|
||||
CAT(prefix, _PAD_BEFORE_FEATURE_NUM), \
|
||||
CAT(prefix, _PAD_AFTER_FEATURE_NUM), \
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_Z), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_Z), \
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_Y), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_Y), \
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_X), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4)
|
||||
|
||||
#define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX(prefix, b, f, y, x) \
|
||||
get_bs_fs_zyx_bsv_fsv_index( \
|
||||
b, f, 0, y, x, \
|
||||
@ -605,6 +621,23 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f, uint z, uint y, uint x,
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_X), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4)
|
||||
|
||||
#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX_SAFE(prefix, b, f, y, x) \
|
||||
get_bs_fs_zyx_bsv_fsv_index_safe( \
|
||||
b, f, 0, y, x, \
|
||||
CAT(prefix, _SIZE_X), \
|
||||
CAT(prefix, _SIZE_Y), \
|
||||
CAT(prefix, _SIZE_Z), \
|
||||
CAT(prefix, _FEATURE_NUM), \
|
||||
CAT(prefix, _BATCH_NUM), \
|
||||
CAT(prefix, _PAD_BEFORE_FEATURE_NUM), \
|
||||
CAT(prefix, _PAD_AFTER_FEATURE_NUM), \
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_Z), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_Z), \
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_Y), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_Y), \
|
||||
CAT(prefix, _PAD_BEFORE_SIZE_X), \
|
||||
CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4)
|
||||
|
||||
#define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX_SAFE(prefix, b, f, y, x) \
|
||||
get_bs_fs_zyx_bsv_fsv_index_safe( \
|
||||
b, f, 0, y, x, \
|
||||
|
@ -334,6 +334,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
|
||||
layout == DataLayout::fs_b_yx_fsv32 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv16_fsv16 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv4_fsv4 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv8_fsv4 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv4_fsv2 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv32_fsv16 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv32_fsv32) {
|
||||
@ -346,6 +347,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
|
||||
layout == DataLayout::bs_fs_yx_bsv32_fsv32 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv32_fsv16 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv4_fsv4 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv8_fsv4 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv4_fsv2 ||
|
||||
layout == DataLayout::bs_fs_yx_bsv16_fsv16)
|
||||
safe_index_func_val = "GET_DATA_" + layout_str + "_INDEX_SAFE(" + _name + ", b, f, y, x)";
|
||||
|
@ -105,6 +105,7 @@ std::string toString(DataLayout l) {
|
||||
case kernel_selector::DataLayout::bs_fs_yx_bsv16_fsv16: return "BS_FS_YX_BSV16_FSV16";
|
||||
case kernel_selector::DataLayout::bs_fs_zyx_bsv16_fsv16: return "BS_FS_ZYX_BSV16_FSV16";
|
||||
case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv4: return "BS_FS_YX_BSV4_FSV4";
|
||||
case kernel_selector::DataLayout::bs_fs_yx_bsv8_fsv4: return "BS_FS_YX_BSV8_FSV4";
|
||||
case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv2: return "BS_FS_YX_BSV4_FSV2";
|
||||
case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv32: return "BS_FS_YX_BSV32_FSV32";
|
||||
case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv16: return "BS_FS_YX_BSV32_FSV16";
|
||||
|
@ -125,7 +125,7 @@ binary_convolution_inst::typed_primitive_inst(network& network, binary_convoluti
|
||||
"Only one-dimensional batch size are supported");
|
||||
CLDNN_ERROR_LESS_THAN(node.id(),
|
||||
"Weights feature maps number",
|
||||
(input_inst.size.feature[0] + pad.feature[0]) / split,
|
||||
input_inst.size.feature[0],
|
||||
"input feature maps number",
|
||||
filter_inst.size.feature[0],
|
||||
"Weights/ifm mismatch");
|
||||
|
@ -97,7 +97,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
|
||||
input_layout.format == format::image_2d_weights_winograd_6x3_s1_xfbyb)
|
||||
CLDNN_ERROR_MESSAGE(
|
||||
node.id(),
|
||||
"Input for convolution should not be in windograd weights format - it is reserved for weights only");
|
||||
"Input for convolution should not be in winograd weights format - it is reserved for weights only");
|
||||
|
||||
if (input_layout.format == format::winograd_2x3_s1_data) {
|
||||
CLDNN_ERROR_NOT_EQUAL(node.id(),
|
||||
@ -369,10 +369,19 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const&
|
||||
"Only one-dimensional batch size are supported");
|
||||
CLDNN_ERROR_LESS_THAN(node.id(),
|
||||
"Weights feature maps number",
|
||||
(input_inst.size.feature[0] + pad.feature[0]) / split,
|
||||
input_inst.size.feature[0],
|
||||
"input feature maps number",
|
||||
weights_ifm,
|
||||
"Weights/ifm mismatch");
|
||||
|
||||
if (!argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
|
||||
CLDNN_ERROR_NOT_EQUAL(node.id(),
|
||||
"Weights feature maps number",
|
||||
input_inst.size.feature[0],
|
||||
"input feature maps number",
|
||||
weights_ifm,
|
||||
"Weights/ifm mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -208,6 +208,7 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
|
||||
1,
|
||||
"Spatial[0] of bias should be 1. Bias isn't 1D vector.");
|
||||
}
|
||||
|
||||
CLDNN_ERROR_NOT_EQUAL(node.id(),
|
||||
"deconvolution padding filling value",
|
||||
node.get_output_layout().data_padding.filling_value(),
|
||||
@ -240,10 +241,19 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co
|
||||
"Only one-dimensional features are supported");
|
||||
CLDNN_ERROR_LESS_THAN(node.id(),
|
||||
"Weights feature maps number",
|
||||
(input_inst.size.feature[0] + pad.feature[0]) / split,
|
||||
input_inst.size.feature[0],
|
||||
"input feature maps number",
|
||||
weights_ifm,
|
||||
"Weights/ifm mimsmatch");
|
||||
"Weights/ifm mismatch");
|
||||
|
||||
if (!argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
|
||||
CLDNN_ERROR_NOT_EQUAL(node.id(),
|
||||
"Weights feature maps number",
|
||||
input_inst.size.feature[0],
|
||||
"input feature maps number",
|
||||
weights_ifm,
|
||||
"Weights/ifm mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -536,7 +536,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
|
||||
}
|
||||
};
|
||||
|
||||
const auto reorder_input_deconvolution = [&p, &lo, &rf](typed_program_node<deconvolution>& deconv_node) {
|
||||
const auto reorder_input_and_weights_deconvolution = [&p, &lo, &rf](typed_program_node<deconvolution>& deconv_node) {
|
||||
auto& input = deconv_node.input();
|
||||
auto input_layout = input.get_output_layout();
|
||||
auto new_format = lo.get_preferred_format(deconv_node);
|
||||
@ -547,14 +547,41 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
|
||||
p.add_intermediate(reorder.first, deconv_node, 0, !reorder.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto& weights = deconv_node.weights();
|
||||
auto weights_layout = weights.get_output_layout();
|
||||
if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type<data>() && !weights.is_constant()) {
|
||||
auto dims = weights_layout.format.dimension();
|
||||
auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx;
|
||||
auto reorder = rf.get_reorder(weights.id(), weights_layout,
|
||||
layout{ weights_layout.data_type, preferred_format, weights_layout.size });
|
||||
if (reorder.first) {
|
||||
p.add_intermediate(reorder.first, deconv_node, 1, !reorder.second);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const auto reorder_weights_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
|
||||
auto& weights = conv_node.weights();
|
||||
auto weights_layout = weights.get_output_layout();
|
||||
if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type<data>() && !weights.is_constant()) {
|
||||
auto dims = weights_layout.format.dimension();
|
||||
auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx;
|
||||
auto reorder = rf.get_reorder(weights.id(), weights_layout,
|
||||
layout{ weights_layout.data_type, preferred_format, weights_layout.size });
|
||||
if (reorder.first) {
|
||||
p.add_intermediate(reorder.first, conv_node, 1, !reorder.second);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (auto& prim : p.get_processing_order()) {
|
||||
program_helpers::do_for_types<detection_output, binary_convolution, deconvolution>(
|
||||
program_helpers::do_for_types<detection_output, binary_convolution, deconvolution, convolution>(
|
||||
*prim,
|
||||
reorder_input_detection_output,
|
||||
reorder_input_binary_convolution,
|
||||
reorder_input_deconvolution);
|
||||
reorder_input_and_weights_deconvolution,
|
||||
reorder_weights_convolution);
|
||||
}
|
||||
|
||||
for (auto n : p.get_processing_order()) {
|
||||
|
@ -225,6 +225,11 @@ attach_convolution_impl::attach_convolution_impl() {
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
|
||||
|
@ -214,6 +214,13 @@ attach_eltwise_impl::attach_eltwise_impl() {
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2),
|
||||
|
@ -119,6 +119,11 @@ attach_concatenation_onednn::attach_concatenation_onednn() {
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -256,6 +256,11 @@ attach_convolution_onednn::attach_convolution_onednn() {
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
|
||||
|
@ -199,6 +199,11 @@ attach_deconvolution_onednn::attach_deconvolution_onednn() {
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
|
||||
|
@ -91,6 +91,7 @@ dnnl::memory::format_tag convert_data_format(cldnn::format fmt) {
|
||||
case cldnn::format::bs_fs_yx_bsv16_fsv16: return dnnl::memory::format_tag::NChw16n16c;
|
||||
case cldnn::format::bs_fs_yx_bsv32_fsv32: return dnnl::memory::format_tag::NChw32n32c;
|
||||
case cldnn::format::bs_fs_yx_bsv4_fsv4: return dnnl::memory::format_tag::ABcd4a4b;
|
||||
case cldnn::format::bs_fs_yx_bsv8_fsv4: return dnnl::memory::format_tag::ABcd8a4b;
|
||||
case cldnn::format::bs_fs_yx_bsv4_fsv2: return dnnl::memory::format_tag::ABcd4a2b;
|
||||
case cldnn::format::bs_fs_yx_bsv32_fsv16: return dnnl::memory::format_tag::NChw32n16c;
|
||||
case cldnn::format::bs_fs_zyx_bsv16_fsv16: return dnnl::memory::format_tag::NCdhw16n16c;
|
||||
|
@ -97,6 +97,8 @@ inline std::string fmt_to_str(format fmt) {
|
||||
return "bs_fs_yx_bsv4_fsv2";
|
||||
case format::bs_fs_yx_bsv4_fsv4:
|
||||
return "bs_fs_yx_bsv4_fsv4";
|
||||
case format::bs_fs_yx_bsv8_fsv4:
|
||||
return "bs_fs_yx_bsv8_fsv4";
|
||||
case format::bs_fs_yx_bsv32_fsv32:
|
||||
return "bs_fs_yx_bsv32_fsv32";
|
||||
case format::b_fs_zyx_fsv16:
|
||||
|
@ -136,6 +136,8 @@ kernel_selector::data_layout to_data_layout(format f) {
|
||||
return kernel_selector::data_layout::bs_fs_yx_bsv32_fsv16;
|
||||
case format::bs_fs_yx_bsv4_fsv4:
|
||||
return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4;
|
||||
case format::bs_fs_yx_bsv8_fsv4:
|
||||
return kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4;
|
||||
case format::bs_fs_yx_bsv4_fsv2:
|
||||
return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv2;
|
||||
case format::bs_fs_yx_bsv32_fsv32:
|
||||
@ -193,6 +195,8 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) {
|
||||
return cldnn::format::bs_fs_yx_bsv4_fsv2;
|
||||
case kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4:
|
||||
return cldnn::format::bs_fs_yx_bsv4_fsv4;
|
||||
case kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4:
|
||||
return cldnn::format::bs_fs_yx_bsv8_fsv4;
|
||||
case kernel_selector::data_layout::bs_fs_yx_bsv32_fsv32:
|
||||
return cldnn::format::bs_fs_yx_bsv32_fsv32;
|
||||
case kernel_selector::data_layout::nv12:
|
||||
|
@ -284,10 +284,11 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
return true;
|
||||
|
||||
if (next.is_type<convolution>() &&
|
||||
(fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4) &&
|
||||
(fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4 || fmt_prev == format::bs_fs_yx_bsv8_fsv4) &&
|
||||
((fmt_next == format::b_fs_yx_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
|
||||
(fmt_next == format::bs_fs_yx_bsv32_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
|
||||
(fmt_next == format::bs_fs_yx_bsv4_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
|
||||
(fmt_next == format::bs_fs_yx_bsv8_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
|
||||
(fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 &&
|
||||
(prev_output_layout.size.feature[0] == 3 || (prev_output_layout.size.feature[0] == 4 && (prev_dt == data_types::u8 || prev_dt == data_types::i8))))))
|
||||
return true;
|
||||
@ -1269,6 +1270,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
format::bs_fs_yx_bsv4_fsv4,
|
||||
format::bs_fs_yx_bsv8_fsv4,
|
||||
format::bs_fs_yx_bsv4_fsv2,
|
||||
format::bs_fs_zyx_bsv4_fsv4,
|
||||
format::bs_fs_zyx_bsv4_fsv2,
|
||||
@ -1320,18 +1322,28 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
impl_candidate = impl_types::ocl;
|
||||
}
|
||||
|
||||
size_t eltw_dep = 0;
|
||||
for (auto& fo : node.get_fused_primitives()) {
|
||||
if (fo.node->is_type<eltwise>()) {
|
||||
auto in_layout = node.get_dependency(fo.dep_start_idx).get_output_layout();
|
||||
auto out_layout = node.get_output_layout();
|
||||
auto in_dt = in_layout.data_type;
|
||||
auto out_dt = out_layout.data_type;
|
||||
if (fo.node->as<eltwise>().get_primitive()->needs_onednn_sum_post_op(in_layout)) {
|
||||
if ((out_layout.count() == in_layout.count()) &&
|
||||
(data_type_traits::is_floating_point(in_dt) || data_type_traits::is_floating_point(out_dt)) && in_dt != out_dt &&
|
||||
fo.node->as<eltwise>().get_primitive()->needs_onednn_sum_post_op(in_layout)) {
|
||||
(data_type_traits::is_floating_point(in_dt) || data_type_traits::is_floating_point(out_dt)) && in_dt != out_dt) {
|
||||
impl_candidate = impl_types::ocl;
|
||||
break;
|
||||
}
|
||||
if (in_layout.size == out_layout.size && in_layout.format == out_layout.format && in_layout.data_padding == out_layout.data_padding &&
|
||||
data_type_traits::size_of(in_dt) == data_type_traits::size_of(out_dt)) {
|
||||
if (eltw_dep > 0) {
|
||||
impl_candidate = impl_types::ocl;
|
||||
break;
|
||||
}
|
||||
eltw_dep = fo.dep_start_idx;
|
||||
}
|
||||
}
|
||||
} else if (fo.node->is_type<activation>()) {
|
||||
// Some activations aren't implemented in oneDNN
|
||||
auto activation_prim = fo.node->as<activation>().get_primitive();
|
||||
@ -1453,7 +1465,7 @@ format layout_optimizer::get_preferred_format(program_node& node) {
|
||||
if (data_type_traits::is_floating_point(conv.get_output_layout().data_type) || ws.spatial[0] != 7 || conv.get_primitive()->groups > 1)
|
||||
expected = format::bfyx;
|
||||
else
|
||||
expected = format::bs_fs_yx_bsv4_fsv4;
|
||||
expected = format::bs_fs_yx_bsv8_fsv4;
|
||||
|
||||
auto conv_output_layout = conv.get_output_layout();
|
||||
auto weights_layout = conv.weights(0).get_output_layout();
|
||||
|
@ -514,6 +514,7 @@ void network::allocate_primitives() {
|
||||
can_reuse_eltwise_mem = true;
|
||||
}
|
||||
|
||||
if (!can_reuse_eltwise_mem) {
|
||||
if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) {
|
||||
auto& eltw_inst = _primitives.at(eltw_in.id());
|
||||
auto& prim_inst = _primitives.at(node->id());
|
||||
@ -524,6 +525,7 @@ void network::allocate_primitives() {
|
||||
if (eltw_mem_type != prim_mem_type && eltw_mem_type != allocation_type::cl_mem && eltw_mem_type != allocation_type::usm_host)
|
||||
can_reuse_eltwise_mem = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fused_op.node->as<eltwise>().get_primitive()->needs_onednn_sum_post_op(eltw_in_layout) && !can_reuse_eltwise_mem) {
|
||||
throw std::runtime_error("Buffer reuse is required for onednn sum post operation.");
|
||||
|
@ -139,30 +139,25 @@ std::pair<bool, bool> program_helpers::are_layouts_identical(layout const& l1, l
|
||||
return {false, false};
|
||||
if (l1.get_linear_size() != l2.get_linear_size())
|
||||
return {false, false};
|
||||
if ((l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) ||
|
||||
(l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4) ||
|
||||
(l1.format == format::fs_b_yx_fsv32 && l2.format != format::fs_b_yx_fsv32) ||
|
||||
(l2.format == format::fs_b_yx_fsv32 && l1.format != format::fs_b_yx_fsv32) ||
|
||||
(l1.format == format::b_fs_yx_fsv16 && l2.format != format::b_fs_yx_fsv16) ||
|
||||
(l2.format == format::b_fs_yx_fsv16 && l1.format != format::b_fs_yx_fsv16) ||
|
||||
(l1.format == format::b_fs_yx_fsv32 && l2.format != format::b_fs_yx_fsv32) ||
|
||||
(l2.format == format::b_fs_yx_fsv32 && l1.format != format::b_fs_yx_fsv32) ||
|
||||
(l1.format == format::b_fs_zyx_fsv32 && l2.format != format::b_fs_zyx_fsv32) ||
|
||||
(l2.format == format::b_fs_zyx_fsv32 && l1.format != format::b_fs_zyx_fsv32) ||
|
||||
(l1.format == format::b_fs_zyx_fsv16 && l2.format != format::b_fs_zyx_fsv16) ||
|
||||
(l2.format == format::b_fs_zyx_fsv16 && l1.format != format::b_fs_zyx_fsv16) ||
|
||||
(l1.format == format::bs_fs_yx_bsv4_fsv4 && l2.format != format::bs_fs_yx_bsv4_fsv4) ||
|
||||
(l2.format == format::bs_fs_yx_bsv4_fsv4 && l1.format != format::bs_fs_yx_bsv4_fsv4) ||
|
||||
(l1.format == format::bs_fs_yx_bsv4_fsv2 && l2.format != format::bs_fs_yx_bsv4_fsv2) ||
|
||||
(l2.format == format::bs_fs_yx_bsv4_fsv2 && l1.format != format::bs_fs_yx_bsv4_fsv2) ||
|
||||
(l1.format == format::bs_fs_yx_bsv32_fsv16 && l2.format != format::bs_fs_yx_bsv32_fsv16) ||
|
||||
(l2.format == format::bs_fs_yx_bsv32_fsv16 && l1.format != format::bs_fs_yx_bsv32_fsv16) ||
|
||||
(l1.format == format::bs_fs_yx_bsv32_fsv32 && l2.format != format::bs_fs_yx_bsv32_fsv32) ||
|
||||
(l2.format == format::bs_fs_yx_bsv32_fsv32 && l1.format != format::bs_fs_yx_bsv32_fsv32) ||
|
||||
(l1.format == format::bs_fs_yx_bsv16_fsv16 && l2.format != format::bs_fs_yx_bsv16_fsv16) ||
|
||||
(l2.format == format::bs_fs_yx_bsv16_fsv16 && l1.format != format::bs_fs_yx_bsv16_fsv16) ||
|
||||
(l1.format == format::bs_fs_zyx_bsv16_fsv16 && l2.format != format::bs_fs_zyx_bsv16_fsv16) ||
|
||||
(l2.format == format::bs_fs_zyx_bsv16_fsv16 && l1.format != format::bs_fs_zyx_bsv16_fsv16))
|
||||
|
||||
auto check_format = [&l1, &l2](cldnn::format format) {
|
||||
return (l1.format == format && l2.format != format) ||
|
||||
(l2.format == format && l1.format != format);
|
||||
};
|
||||
|
||||
if (check_format(format::b_fs_yx_fsv4) ||
|
||||
check_format(format::fs_b_yx_fsv32) ||
|
||||
check_format(format::b_fs_yx_fsv16) ||
|
||||
check_format(format::b_fs_yx_fsv32) ||
|
||||
check_format(format::b_fs_zyx_fsv32) ||
|
||||
check_format(format::b_fs_zyx_fsv16) ||
|
||||
check_format(format::bs_fs_yx_bsv4_fsv4) ||
|
||||
check_format(format::bs_fs_yx_bsv8_fsv4) ||
|
||||
check_format(format::bs_fs_yx_bsv4_fsv2) ||
|
||||
check_format(format::bs_fs_yx_bsv32_fsv16) ||
|
||||
check_format(format::bs_fs_yx_bsv32_fsv32) ||
|
||||
check_format(format::bs_fs_yx_bsv16_fsv16) ||
|
||||
check_format(format::bs_fs_zyx_bsv16_fsv16))
|
||||
return {false, false};
|
||||
|
||||
auto l1_pitch = l1.get_pitches();
|
||||
|
@ -428,6 +428,7 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const
|
||||
// Ignore optimized operations for "previous" operation in our operation pair
|
||||
while (type_is_any_optimized(prev_type) && cur_post_op_idx < post_ops_size - 1) {
|
||||
prev_post_op_idx++;
|
||||
if (prev_post_op_idx == cur_post_op_idx)
|
||||
cur_post_op_idx++;
|
||||
prev_type = cur_post_ops[prev_post_op_idx].op_type;
|
||||
cur_type = cur_post_ops[cur_post_op_idx].op_type;
|
||||
|
@ -16,15 +16,15 @@ using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
template <typename T, typename U>
|
||||
void createReferenceData(const T* arg_y, const T* arg_uv, U* out_ptr,
|
||||
void createReferenceDataNV12(const T* arg_y, const T* arg_uv, U* out_ptr,
|
||||
size_t batch_size, size_t image_h, size_t image_w,
|
||||
size_t stride_y, size_t stride_uv, bool to_rgb) {
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (size_t batch = 0; batch < batch_size; ++batch) {
|
||||
U* out = out_ptr + batch * image_w * image_h * 3;
|
||||
auto y_ptr = arg_y + batch * stride_y;
|
||||
auto uv_ptr = arg_uv + batch * stride_uv;
|
||||
for (int h = 0; h < image_h; h++) {
|
||||
for (int w = 0; w < image_w; w++) {
|
||||
for (size_t h = 0; h < image_h; ++h) {
|
||||
for (size_t w = 0; w < image_w; ++w) {
|
||||
auto y_index = h * image_w + w;
|
||||
auto y_val = static_cast<float>(y_ptr[y_index]);
|
||||
auto uv_index = (h / 2) * image_w + (w / 2) * 2;
|
||||
@ -87,7 +87,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceData<float, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
createReferenceDataNV12<float, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
1, height, width, height * width, height * width / 2, true);
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
@ -126,7 +126,7 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceData<float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
createReferenceDataNV12<float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
1, height, width, height * width, height * width / 2, false);
|
||||
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
@ -166,7 +166,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceData<uint8_t, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
createReferenceDataNV12<uint8_t, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
1, height, width, height * width, height * width / 2, true);
|
||||
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
@ -206,7 +206,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceData<FLOAT16, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
createReferenceDataNV12<FLOAT16, float>(input_y_data.data(), input_uv_data.data(), ref_res.data(),
|
||||
1, height, width, height * width, height * width / 2, true);
|
||||
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
@ -243,7 +243,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceData<float, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
|
||||
createReferenceDataNV12<float, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
|
||||
1, height, width, input_height * width, input_height * width, true);
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
@ -279,7 +279,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceData<uint8_t, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
|
||||
createReferenceDataNV12<uint8_t, float>(input_data.data(), input_data.data() + height * width, ref_res.data(),
|
||||
1, height, width, input_height * width, input_height * width, true);
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
cldnn::mem_lock<uint8_t> output_ptr(output, get_test_stream());
|
||||
@ -356,7 +356,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> reference_results(width * height * 3);
|
||||
createReferenceData<uint8_t, float>(data.data(), data.data() + height * width, reference_results.data(),
|
||||
createReferenceDataNV12<uint8_t, float>(data.data(), data.data() + height * width, reference_results.data(),
|
||||
1, height, width, height * width, height * width / 2, true);
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
@ -419,7 +419,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> reference_results(width * height * 3);
|
||||
createReferenceData<uint8_t, float>(input_data.data(), input_data.data() + height * width, reference_results.data(),
|
||||
createReferenceDataNV12<uint8_t, float>(input_data.data(), input_data.data() + height * width, reference_results.data(),
|
||||
1, height, width, input_height * width, input_height * width, true);
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
@ -429,3 +429,185 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) {
|
||||
}
|
||||
checkStatus(clReleaseMemObject(nv12_image), "clReleaseMemObject");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::tuple<T, T, T> yuv_pixel_to_rgb(float y_val, float u_val, float v_val) {
|
||||
auto c = y_val - 16.f;
|
||||
auto d = u_val - 128.f;
|
||||
auto e = v_val - 128.f;
|
||||
auto clip = [](float a) -> T {
|
||||
if (std::is_integral<T>()) {
|
||||
return static_cast<T>(std::min(std::max(std::round(a), 0.f), 255.f));
|
||||
} else {
|
||||
return static_cast<T>(std::min(std::max(a, 0.f), 255.f));
|
||||
}
|
||||
};
|
||||
auto b = clip(1.164f * c + 2.018f * d);
|
||||
auto g = clip(1.164f * c - 0.391f * d - 0.813f * e);
|
||||
auto r = clip(1.164f * c + 1.596f * e);
|
||||
return std::tuple<T, T, T>{r, g, b};
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
void createReferenceDataI420(const T* arg_y, const T* arg_u, const T* arg_v, U* out_ptr,
|
||||
size_t batch_size, size_t image_h, size_t image_w,
|
||||
size_t stride_y, size_t stride_uv, bool rgb_color_format) {
|
||||
for (size_t batch = 0; batch < batch_size; ++batch) {
|
||||
U* out = out_ptr + batch * image_w * image_h * 3;
|
||||
auto y_ptr = arg_y + batch * stride_y;
|
||||
auto u_ptr = arg_u + batch * stride_uv;
|
||||
auto v_ptr = arg_v + batch * stride_uv;
|
||||
for (size_t h = 0; h < image_h; ++h) {
|
||||
for (size_t w = 0; w < image_w; ++w) {
|
||||
auto y_index = h * image_w + w;
|
||||
auto y_val = static_cast<float>(y_ptr[y_index]);
|
||||
auto uv_index = (h / 2) * (image_w / 2) + (w / 2);
|
||||
auto u_val = static_cast<float>(u_ptr[uv_index]);
|
||||
auto v_val = static_cast<float>(v_ptr[uv_index]);
|
||||
T r, g, b;
|
||||
std::tie(r, g, b) = yuv_pixel_to_rgb<U>(y_val, u_val, v_val);
|
||||
if (rgb_color_format) {
|
||||
out[y_index * 3] = r;
|
||||
out[y_index * 3 + 1] = g;
|
||||
out[y_index * 3 + 2] = b;
|
||||
} else {
|
||||
out[y_index * 3] = b;
|
||||
out[y_index * 3 + 1] = g;
|
||||
out[y_index * 3 + 2] = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) {
|
||||
auto& engine = get_test_engine();
|
||||
int width = 224;
|
||||
int height = 448;
|
||||
|
||||
auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } });
|
||||
auto input_u = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } });
|
||||
auto input_v = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } });
|
||||
|
||||
std::vector<float> input_y_data = generate_random_1d<float>(width * height, 0, 255);
|
||||
std::vector<float> input_u_data = generate_random_1d<float>(width * height / 4, 0, 255);
|
||||
std::vector<float> input_v_data = generate_random_1d<float>(width * height / 4, 0, 255);
|
||||
|
||||
set_values(input_y, input_y_data);
|
||||
set_values(input_u, input_u_data);
|
||||
set_values(input_v, input_v_data);
|
||||
|
||||
layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height });
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_y", input_y->get_layout()));
|
||||
topology.add(input_layout("input_u", input_u->get_layout()));
|
||||
topology.add(input_layout("input_v", input_v->get_layout()));
|
||||
topology.add(convert_color("convert_color", { "input_y", "input_u", "input_v" }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
|
||||
cldnn::convert_color::memory_type::buffer, output_layout));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input_y", input_y);
|
||||
network.set_input_data("input_u", input_u);
|
||||
network.set_input_data("input_v", input_v);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> ref_res(width * height * 3);
|
||||
createReferenceDataI420<float, float>(input_y_data.data(), input_u_data.data(), input_v_data.data(), ref_res.data(),
|
||||
1, height, width, height * width, height * width / 2, true);
|
||||
auto output = outputs.at("convert_color").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < ref_res.size(); ++i) {
|
||||
EXPECT_NEAR(ref_res[i], output_ptr[i], 1.001f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(convert_color, i420_to_rgb_three_planes_surface_u8) {
|
||||
int width = 224;
|
||||
int height = 448;
|
||||
|
||||
auto ocl_instance = std::make_shared<OpenCL>();
|
||||
device_query query(engine_types::ocl, runtime_types::ocl, static_cast<void*>(ocl_instance->_context.get()));
|
||||
auto devices = query.get_available_devices();
|
||||
|
||||
auto engine_config = cldnn::engine_configuration();
|
||||
auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second, engine_config);
|
||||
|
||||
if (!engine->get_device_info().supports_image) {
|
||||
GTEST_SKIP() << "Device doesn't support images";
|
||||
}
|
||||
|
||||
int data_size = width * (height + height / 2);
|
||||
std::vector<uint8_t> data = generate_random_1d<uint8_t>(data_size, 0, 255);
|
||||
|
||||
cl_int err;
|
||||
cl_image_format image_format;
|
||||
image_format.image_channel_order = CL_R;
|
||||
image_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0,
|
||||
0, 0, 0, 0, 0, { nullptr } };
|
||||
|
||||
cl_mem i420_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
|
||||
checkStatus(err, "Creating i420 image plane_y failed");
|
||||
|
||||
image_desc.image_width = width / 2;
|
||||
image_desc.image_height = height / 2;
|
||||
|
||||
cl_mem i420_image_plane_u = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
|
||||
checkStatus(err, "Creating i420 image plane_u failed");
|
||||
|
||||
cl_mem i420_image_plane_v = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err);
|
||||
checkStatus(err, "Creating i420 image plane_v failed");
|
||||
|
||||
size_t origin[3] = { 0, 0, 0 };
|
||||
size_t y_region[3] = { (size_t)width, (size_t)height, 1 };
|
||||
size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 };
|
||||
|
||||
err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr);
|
||||
checkStatus(err, "Writing i420 image plane_y failed");
|
||||
|
||||
err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_u, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr);
|
||||
checkStatus(err, "Writing i420 image plane_u failed");
|
||||
|
||||
err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_v, true, origin, uv_region, 0, 0, &data[width * (height + height / 4)], 0, nullptr, nullptr);
|
||||
checkStatus(err, "Writing i420 image plane_v failed");
|
||||
|
||||
auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, height } });
|
||||
auto input2 = input_layout("input2", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } });
|
||||
auto input3 = input_layout("input3", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } });
|
||||
auto output_format = cldnn::format::byxf;
|
||||
layout output_layout(data_types::f32, output_format, { 1, 3, width, height });
|
||||
|
||||
auto input_memory = engine->share_image(input.layout, i420_image_plane_y);
|
||||
auto input_memory2 = engine->share_image(input2.layout, i420_image_plane_u);
|
||||
auto input_memory3 = engine->share_image(input3.layout, i420_image_plane_v);
|
||||
|
||||
topology topology;
|
||||
topology.add(input);
|
||||
topology.add(input2);
|
||||
topology.add(input3);
|
||||
topology.add(convert_color("convert_color", { "input", "input2", "input3" }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
|
||||
cldnn::convert_color::memory_type::image, output_layout));
|
||||
|
||||
network network(*engine, topology);
|
||||
network.set_input_data("input", input_memory);
|
||||
network.set_input_data("input2", input_memory2);
|
||||
network.set_input_data("input3", input_memory3);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
std::vector<float> reference_results(width * height * 3);
|
||||
createReferenceDataI420<uint8_t, float>(data.data(), data.data() + height * width, data.data() + width * (height + height / 4), reference_results.data(),
|
||||
1, height, width, height * width, height * width / 2, true);
|
||||
|
||||
auto output_prim = outputs.begin()->second.get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output_prim, get_test_stream());
|
||||
for (auto i = 0; i < reference_results.size(); i++) {
|
||||
EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f);
|
||||
}
|
||||
checkStatus(clReleaseMemObject(i420_image_plane_y), "clReleaseMemObject");
|
||||
checkStatus(clReleaseMemObject(i420_image_plane_u), "clReleaseMemObject");
|
||||
checkStatus(clReleaseMemObject(i420_image_plane_v), "clReleaseMemObject");
|
||||
}
|
||||
|
@ -1009,7 +1009,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
|
||||
TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
|
||||
// data is similar as in basic_convolution3D
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 2, 4, 4, 4 } });
|
||||
@ -1141,138 +1141,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) {
|
||||
// data is similar as in basic_convolution3D_split2
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } });
|
||||
auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 1, 2, 2, 2 } });
|
||||
auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1, 1 } });
|
||||
|
||||
set_values(input, {
|
||||
1.0f, 0.0f, 1.0f, 0.0f,
|
||||
1.0f, 1.0f, 3.0f, 1.0f,
|
||||
1.0f, 1.0f, 0.0f, 2.0f,
|
||||
0.0f, 2.0f, 1.0f, 1.0f,
|
||||
1.0f, 0.0f, 0.0f, 1.0f,
|
||||
2.0f, 0.0f, 1.0f, 2.0f,
|
||||
3.0f, 1.0f, 1.0f, 1.0f,
|
||||
0.0f, 0.0f, 3.0f, 1.0f,
|
||||
2.0f, 0.0f, 1.0f, 1.0f,
|
||||
3.0f, 3.0f, 1.0f, 0.0f,
|
||||
2.0f, 1.0f, 1.0f, 0.0f,
|
||||
3.0f, 2.0f, 1.0f, 2.0f,
|
||||
1.0f, 0.0f, 2.0f, 0.0f,
|
||||
1.0f, 0.0f, 3.0f, 3.0f,
|
||||
3.0f, 1.0f, 0.0f, 0.0f,
|
||||
1.0f, 1.0f, 0.0f, 2.0f,
|
||||
1.0f, 0.0f, 1.0f, 0.0f,
|
||||
1.0f, 1.0f, 3.0f, 1.0f,
|
||||
1.0f, 1.0f, 0.0f, 2.0f,
|
||||
0.0f, 2.0f, 1.0f, 1.0f,
|
||||
1.0f, 0.0f, 0.0f, 1.0f,
|
||||
2.0f, 0.0f, 1.0f, 2.0f,
|
||||
3.0f, 1.0f, 1.0f, 1.0f,
|
||||
0.0f, 0.0f, 3.0f, 1.0f,
|
||||
2.0f, 0.0f, 1.0f, 1.0f,
|
||||
3.0f, 3.0f, 1.0f, 0.0f,
|
||||
2.0f, 1.0f, 1.0f, 0.0f,
|
||||
3.0f, 2.0f, 1.0f, 2.0f,
|
||||
1.0f, 0.0f, 2.0f, 0.0f,
|
||||
1.0f, 0.0f, 3.0f, 3.0f,
|
||||
3.0f, 1.0f, 0.0f, 0.0f,
|
||||
1.0f, 1.0f, 0.0f, 2.0f,
|
||||
});
|
||||
|
||||
set_values(weights, {
|
||||
0.0f, 1.0f,
|
||||
0.0f, 0.0f,
|
||||
2.0f, 1.0f,
|
||||
0.0f, 0.0f,
|
||||
0.0f, 1.0f,
|
||||
0.0f, 0.0f,
|
||||
2.0f, 1.0f,
|
||||
0.0f, 0.0f,
|
||||
});
|
||||
|
||||
set_values(biases, { 1.0f, 2.0f });
|
||||
|
||||
VVVVF<float> output_vec = {
|
||||
{
|
||||
{
|
||||
{ 3.0f, 2.0f, 2.0f },
|
||||
{ 6.0f, 5.0f, 6.0f },
|
||||
{ 9.0f, 4.0f, 6.0f }
|
||||
},
|
||||
{
|
||||
{ 5.0f, 2.0f, 5.0f },
|
||||
{ 10.0f, 9.0f, 5.0f },
|
||||
{ 7.0f, 5.0f, 4.0f }
|
||||
},
|
||||
{
|
||||
{ 3.0f, 4.0f, 6.0f },
|
||||
{ 6.0f, 5.0f, 10.0f },
|
||||
{ 9.0f, 4.0f, 1.0f }
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{ 4.0f, 3.0f, 3.0f },
|
||||
{ 7.0f, 6.0f, 7.0f },
|
||||
{ 10.0f, 5.0f, 7.0f }
|
||||
},
|
||||
{
|
||||
{ 6.0f, 3.0f, 6.0f },
|
||||
{ 11.0f, 10.0f, 6.0f },
|
||||
{ 8.0f, 6.0f, 5.0f }
|
||||
},
|
||||
{
|
||||
{ 4.0f, 5.0f, 7.0f },
|
||||
{ 7.0f, 6.0f, 11.0f },
|
||||
{ 10.0f, 5.0f, 2.0f }
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
data("weights", weights),
|
||||
data("biases", biases),
|
||||
convolution("conv", "input", { "weights" }, { "biases" }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, "conv");
|
||||
|
||||
auto output_memory = outputs.at("conv").get_memory();
|
||||
auto output_layout = output_memory->get_layout();
|
||||
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
|
||||
|
||||
int z_size = output_layout.size.spatial[2];
|
||||
int y_size = output_layout.size.spatial[1];
|
||||
int x_size = output_layout.size.spatial[0];
|
||||
int f_size = output_layout.size.feature[0];
|
||||
int b_size = output_layout.size.batch[0];
|
||||
EXPECT_EQ(output_layout.format, format::bfzyx);
|
||||
EXPECT_EQ(b_size, 1);
|
||||
EXPECT_EQ(f_size, 2);
|
||||
EXPECT_EQ(z_size, 3);
|
||||
EXPECT_EQ(y_size, 3);
|
||||
EXPECT_EQ(x_size, 3);
|
||||
for (int f = 0; f < f_size; ++f) {
|
||||
for (int z = 0; z < z_size; ++z) {
|
||||
for (int y = 0; y < y_size; ++y) {
|
||||
for (int x = 0; x < x_size; ++x) {
|
||||
EXPECT_EQ(output_vec[f][z][y][x],
|
||||
output_ptr[f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
|
@ -681,7 +681,7 @@ TEST_P(conv_fp32_reorder_fsv16_to_bfyx_conv, basic) {
|
||||
reorder("reorder_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32),
|
||||
convolution("conv_prim", "reorder_fsv16", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_bfyx", "conv_prim", format::bfyx, data_types::f32),
|
||||
convolution("conv_output", "reorder_bfyx", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
|
||||
convolution("conv_output", "reorder_bfyx", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs),
|
||||
reorder("reorder_output", "activation", p.default_format, data_types::f32)
|
||||
);
|
||||
@ -3264,6 +3264,35 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_quantize_u8,
|
||||
//gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 },
|
||||
}));
|
||||
|
||||
class gemm_2in_quantize_float_in : public GemmFusingTest {};
|
||||
TEST_P(gemm_2in_quantize_float_in, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input0", get_input_layout(p, 0)),
|
||||
input_layout("input1", get_input_layout(p, 1)),
|
||||
data("in_lo", get_mem(get_per_channel_layout(p), 0)),
|
||||
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||
data("out_lo", get_mem(get_single_element_layout(p), 0)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 255)),
|
||||
gemm("gemm_prim", { "input0", "input1" }, data_types::f32),
|
||||
quantize("quantize", "gemm_prim", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8),
|
||||
reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
implementation_desc gemm_impl = { format::bfyx, "gemm_tiled_opt" };
|
||||
bo_fused.set_option(build_option::force_implementations({ {"gemm_prim", gemm_impl} }));
|
||||
|
||||
tolerance = 1.0f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_quantize_float_in,
|
||||
::testing::ValuesIn(std::vector<gemm_test_params>{
|
||||
gemm_test_params{ CASE_GEMM_2IN_FP16_1, 3, 4 },
|
||||
gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 },
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 4 },
|
||||
gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 4 },
|
||||
}));
|
||||
|
||||
class gemm_2in_scale : public GemmFusingTest {};
|
||||
TEST_P(gemm_2in_scale, basic) {
|
||||
auto p = GetParam();
|
||||
@ -10059,7 +10088,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
|
||||
data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
|
||||
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs)
|
||||
);
|
||||
|
||||
@ -10088,7 +10117,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat
|
||||
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32),
|
||||
activation("activation_quantize", "reorder_fsv32", activation_func::relu),
|
||||
convolution("conv_output", "activation_quantize", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
|
||||
convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs)
|
||||
);
|
||||
|
||||
@ -10116,7 +10145,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
|
||||
data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
|
||||
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ {0, 0, 1, 1}, 0 })),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs),
|
||||
activation("activation2", "conv_prim", activation_func::abs),
|
||||
eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum)
|
||||
|
@ -373,7 +373,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
|
||||
layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }};
|
||||
auto input_1 = engine->allocate_memory(lay_batch_1);
|
||||
auto input_8 = engine->allocate_memory(lay_batch_8);
|
||||
auto weights = engine->allocate_memory({ dt, fmt, { 1, 1, 3, 2 } });
|
||||
auto weights = engine->allocate_memory({ dt, fmt, { 1, 3, 3, 2 } });
|
||||
|
||||
std::vector<float> dummy_input_data_1 = generate_random_1d<float>(batch_1 * feature_num * inp_x_size * inp_y_size, 0, 1);
|
||||
std::vector<float> dummy_input_data_8 = generate_random_1d<float>(batch_8 * feature_num * inp_x_size * inp_y_size, 0, 1);
|
||||
@ -396,14 +396,14 @@ TEST(memory_pool, shared_mem_pool_diff_batches) {
|
||||
auto outputs = network_first.execute();
|
||||
|
||||
auto dev_info = engine->get_device_info();
|
||||
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928);
|
||||
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 4744);
|
||||
|
||||
topo.change_input_layout("input", input_1->get_layout());//change input layout to batch=1
|
||||
|
||||
network network_second(*engine, topo, bo);
|
||||
network_second.set_input_data("input", input_1);
|
||||
auto outputs_second = network_second.execute();
|
||||
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328);
|
||||
EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 5912);
|
||||
}
|
||||
|
||||
TEST(memory_pool, shared_dep_two_output) {
|
||||
|
@ -9,7 +9,7 @@ file(GLOB SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/
|
||||
file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
|
||||
|
||||
# create library
|
||||
add_library(${TARGET_NAME} SHARED EXCLUDE_FROM_ALL ${HEADERS} ${SOURCES})
|
||||
add_library(${TARGET_NAME} SHARED ${HEADERS} ${SOURCES})
|
||||
|
||||
# Find OpenCV components if exist
|
||||
find_package(OpenCV COMPONENTS core imgproc imgcodecs QUIET)
|
||||
|
@ -1,31 +1,72 @@
|
||||
#include "bmp_reader.h"
|
||||
|
||||
#include <memory.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int readBmpImage(const char* fileName, BitMap* image) {
|
||||
FILE* input = fopen(fileName, "rb");
|
||||
size_t cnt;
|
||||
int status = 0;
|
||||
FILE* input = 0;
|
||||
|
||||
if (input == NULL) {
|
||||
printf("[BMP] file %s is not opened\n", fileName);
|
||||
return 1;
|
||||
if (NULL == fileName || NULL == image) {
|
||||
printf("[BMP] bad arguments\n");
|
||||
status = -1;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
fread(&image->header.type, 2, 1, input);
|
||||
memset(image, 0, sizeof(BitMap));
|
||||
|
||||
input = fopen(fileName, "rb");
|
||||
if (input == NULL) {
|
||||
printf("[BMP] file %s is not opened\n", fileName);
|
||||
status = 1;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
cnt = fread(&image->header.type, sizeof(image->header.type), sizeof(unsigned char), input);
|
||||
if (cnt != sizeof(image->header.type)) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if (image->header.type != 'M' * 256 + 'B') {
|
||||
printf("[BMP] file is not bmp type\n");
|
||||
return 2;
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
fread(&image->header.size, 4, 1, input);
|
||||
fread(&image->header.reserved, 4, 1, input);
|
||||
fread(&image->header.offset, 4, 1, input);
|
||||
cnt = fread(&image->header.size, sizeof(image->header.size), sizeof(unsigned char), input);
|
||||
if (cnt != sizeof(image->header.size)) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
fread(&image->infoHeader, sizeof(BmpInfoHeader), 1, input);
|
||||
cnt = fread(&image->header.reserved, sizeof(image->header.reserved), sizeof(unsigned char), input);
|
||||
if (cnt != sizeof(image->header.reserved)) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
cnt = fread(&image->header.offset, sizeof(image->header.offset), sizeof(unsigned char), input);
|
||||
if (cnt != sizeof(image->header.offset)) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
cnt = fread(&image->infoHeader, sizeof(BmpInfoHeader), sizeof(unsigned char), input);
|
||||
if (cnt != sizeof(image->header.offset)) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
image->width = image->infoHeader.width;
|
||||
image->height = image->infoHeader.height;
|
||||
image->height = abs(image->infoHeader.height);
|
||||
|
||||
if (image->infoHeader.bits != 24) {
|
||||
printf("[BMP] 24bpp only supported. But input has: %d\n", image->infoHeader.bits);
|
||||
@ -38,21 +79,49 @@ int readBmpImage(const char* fileName, BitMap* image) {
|
||||
}
|
||||
|
||||
int padSize = image->width & 3;
|
||||
size_t row_size = (size_t)image->width * 3;
|
||||
char pad[3];
|
||||
size_t size = image->width * image->height * 3;
|
||||
size_t size = row_size * image->height;
|
||||
|
||||
image->data = malloc(sizeof(char) * size);
|
||||
if (NULL == image->data) {
|
||||
printf("[BMP] memory allocation failed\n");
|
||||
return 5;
|
||||
}
|
||||
|
||||
fseek(input, image->header.offset, 0);
|
||||
if (0 != fseek(input, image->header.offset, SEEK_SET)) {
|
||||
printf("[BMP] file seek error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
// reading by rows in invert vertically
|
||||
int i;
|
||||
for (i = 0; i < image->height; i++) {
|
||||
unsigned int storeAt = image->infoHeader.height < 0 ? i : (unsigned int)image->height - 1 - i;
|
||||
fread(image->data + image->width * 3 * storeAt, image->width * 3, 1, input);
|
||||
fread(pad, padSize, 1, input);
|
||||
cnt = fread(image->data + row_size * storeAt, row_size, sizeof(unsigned char), input);
|
||||
if (cnt != row_size) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
fclose(input);
|
||||
return 0;
|
||||
cnt = fread(pad, padSize, sizeof(unsigned char), input);
|
||||
if (cnt != padSize) {
|
||||
printf("[BMP] file read error\n");
|
||||
status = 2;
|
||||
goto Exit;
|
||||
}
|
||||
}
|
||||
|
||||
Exit:
|
||||
if (0 != status && NULL != image && NULL != image->data) {
|
||||
free(image->data);
|
||||
}
|
||||
|
||||
if (NULL != input) {
|
||||
fclose(input);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ source_group("src" FILES ${LIBRARY_SRC})
|
||||
source_group("include" FILES ${LIBRARY_HEADERS})
|
||||
|
||||
# Create library file from sources.
|
||||
add_library(${TARGET_NAME} SHARED EXCLUDE_FROM_ALL ${MAIN_SRC} ${LIBRARY_HEADERS})
|
||||
add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
|
||||
|
||||
# Find OpenCV components if exist
|
||||
find_package(OpenCV COMPONENTS core imgproc imgcodecs QUIET)
|
||||
|
@ -26,6 +26,32 @@ def create_onnx_model():
|
||||
return make_model(graph, producer_name="ngraph ONNX Importer")
|
||||
|
||||
|
||||
def create_onnx_model_with_subgraphs():
|
||||
A = onnx.helper.make_tensor_value_info("A", onnx.TensorProto.FLOAT, [3])
|
||||
B = onnx.helper.make_tensor_value_info("B", onnx.TensorProto.FLOAT, [3])
|
||||
add_out = onnx.helper.make_tensor_value_info("add_out", onnx.TensorProto.FLOAT, [3])
|
||||
sub_out = onnx.helper.make_tensor_value_info("sub_out", onnx.TensorProto.FLOAT, [3])
|
||||
|
||||
add = onnx.helper.make_node("Add", inputs=["A", "B"], outputs=["add_out"])
|
||||
sub = onnx.helper.make_node("Sub", inputs=["A", "B"], outputs=["sub_out"])
|
||||
|
||||
then_body = make_graph([add], "then_body", [], [add_out])
|
||||
else_body = make_graph([sub], "else_body", [], [sub_out])
|
||||
|
||||
if_node = onnx.helper.make_node(
|
||||
"If",
|
||||
inputs=["cond"],
|
||||
outputs=["res"],
|
||||
then_branch=then_body,
|
||||
else_branch=else_body
|
||||
)
|
||||
cond = onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, [])
|
||||
res = onnx.helper.make_tensor_value_info("res", onnx.TensorProto.FLOAT, [3])
|
||||
|
||||
graph = make_graph([if_node], "graph", [cond, A, B], [res])
|
||||
return make_model(graph, producer_name="ngraph ONNX Importer")
|
||||
|
||||
|
||||
def run_function(function, *inputs, expected):
|
||||
runtime = get_runtime()
|
||||
computation = runtime.computation(function)
|
||||
@ -37,15 +63,18 @@ def run_function(function, *inputs, expected):
|
||||
|
||||
fem = FrontEndManager()
|
||||
onnx_model_filename = "model.onnx"
|
||||
onnx_model_with_subgraphs_filename = "model_subgraphs.onnx"
|
||||
ONNX_FRONTEND_NAME = "onnx"
|
||||
|
||||
|
||||
def setup_module():
|
||||
onnx.save_model(create_onnx_model(), onnx_model_filename)
|
||||
onnx.save_model(create_onnx_model_with_subgraphs(), onnx_model_with_subgraphs_filename)
|
||||
|
||||
|
||||
def teardown_module():
|
||||
os.remove(onnx_model_filename)
|
||||
os.remove(onnx_model_with_subgraphs_filename)
|
||||
|
||||
|
||||
def skip_if_onnx_frontend_is_disabled():
|
||||
@ -72,17 +101,29 @@ def test_convert():
|
||||
run_function(function, a, b, expected=[expected])
|
||||
|
||||
|
||||
def test_decode_and_convert():
|
||||
@pytest.mark.parametrize("model_filename, inputs, expected", [
|
||||
[onnx_model_filename,
|
||||
[np.array([[1, 2], [3, 4]], dtype=np.float32),
|
||||
np.array([[2, 3], [4, 5]], dtype=np.float32)],
|
||||
np.array([[1.5, 5], [10.5, 18]], dtype=np.float32)],
|
||||
[onnx_model_with_subgraphs_filename,
|
||||
[np.array(False, dtype=bool),
|
||||
np.array([1, 2, 3], dtype=np.float32),
|
||||
np.array([2, 3, 5], dtype=np.float32)],
|
||||
np.array([-1, -1, -2], dtype=np.float32)],
|
||||
])
|
||||
def test_decode_and_convert(model_filename, inputs, expected):
|
||||
skip_if_onnx_frontend_is_disabled()
|
||||
|
||||
fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME)
|
||||
assert fe
|
||||
|
||||
model = fe.load(onnx_model_filename)
|
||||
model = fe.load(model_filename)
|
||||
assert model
|
||||
|
||||
decoded_function = fe.decode(model)
|
||||
assert decoded_function
|
||||
|
||||
for op in decoded_function.get_ordered_ops():
|
||||
assert op.get_type_name() in ["Parameter", "Constant", "ONNXFrameworkNode",
|
||||
"ONNXSubgraphFrameworkNode", "Result"]
|
||||
@ -92,10 +133,7 @@ def test_decode_and_convert():
|
||||
for op in decoded_function.get_ordered_ops():
|
||||
assert op.get_type_name() not in ["ONNXFrameworkNode", "ONNXSubgraphFrameworkNode"]
|
||||
|
||||
a = np.array([[1, 2], [3, 4]], dtype=np.float32)
|
||||
b = np.array([[2, 3], [4, 5]], dtype=np.float32)
|
||||
expected = np.array([[1.5, 5], [10.5, 18]], dtype=np.float32)
|
||||
run_function(decoded_function, a, b, expected=[expected])
|
||||
run_function(decoded_function, *inputs, expected=[expected])
|
||||
|
||||
|
||||
def test_load_by_model():
|
||||
|
@ -101,16 +101,9 @@ tests_expected_to_fail = [
|
||||
(
|
||||
xfail_issue_FLOAT_LIKE,
|
||||
"OnnxBackendNodeModelTest.test_cast_BFLOAT16_to_FLOAT_cpu",
|
||||
"OnnxBackendNodeModelTest.test_cast_FLOAT16_to_DOUBLE_cpu",
|
||||
"OnnxBackendNodeModelTest.test_cast_FLOAT16_to_FLOAT_cpu",
|
||||
"OnnxBackendNodeModelTest.test_cast_FLOAT_to_BFLOAT16_cpu",
|
||||
"OnnxBackendNodeModelTest.test_castlike_BFLOAT16_to_FLOAT_expanded_cpu",
|
||||
"OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_DOUBLE_expanded_cpu",
|
||||
"OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_FLOAT_expanded_cpu",
|
||||
"OnnxBackendNodeModelTest.test_castlike_FLOAT_to_BFLOAT16_expanded_cpu",
|
||||
"OnnxBackendNodeModelTest.test_max_float16_cpu",
|
||||
"OnnxBackendNodeModelTest.test_min_float16_cpu",
|
||||
"OnnxBackendNodeModelTest.test_mod_mixed_sign_float16_cpu",
|
||||
),
|
||||
(
|
||||
xfail_issue_49207,
|
||||
|
@ -171,12 +171,6 @@ if len(zoo_models) > 0:
|
||||
test_cases = backend_test.test_cases["OnnxBackendModelExecutionTest"]
|
||||
if tests.MODEL_ZOO_XFAIL:
|
||||
execution_xfail_list = [
|
||||
# New Python API - fp16 blob
|
||||
(xfail_issue_67415, "test_MSFT_opset7_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"),
|
||||
(xfail_issue_67415, "test_MSFT_opset7_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"),
|
||||
(xfail_issue_67415, "test_MSFT_opset8_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"),
|
||||
(xfail_issue_67415, "test_MSFT_opset8_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"),
|
||||
|
||||
# ONNX Model Zoo
|
||||
(xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_encoder_12_t5_encoder_cpu"),
|
||||
(xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_decoder_with_lm_head_12_t5_decoder_with_lm_head_cpu"),
|
||||
|
@ -1,9 +1,9 @@
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from openvino.runtime import Function
|
||||
from openvino.runtime.impl import Shape, Type
|
||||
from openvino.runtime.impl.op import Parameter
|
||||
from openvino.runtime import Model
|
||||
from openvino.runtime import Shape, Type
|
||||
from openvino.runtime.op import Parameter
|
||||
import openvino.runtime.opset8 as ops
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ def get_test_function():
|
||||
element_type = Type.f32
|
||||
param = Parameter(element_type, Shape([1, 3, 22, 22]))
|
||||
relu = ops.relu(param)
|
||||
func = Function([relu], [param], "test")
|
||||
func = Model([relu], [param], "test")
|
||||
assert func is not None
|
||||
return func
|
||||
|
||||
|
@ -34,6 +34,8 @@ class OPENVINO_API I420toBGR : public util::ConvertColorI420Base {
|
||||
public:
|
||||
OPENVINO_OP("I420toBGR", "opset8", util::ConvertColorI420Base);
|
||||
|
||||
BWDCMP_RTTI_DECLARATION;
|
||||
|
||||
I420toBGR() = default;
|
||||
|
||||
/// \brief Constructs a conversion operation from input image in I420 format
|
||||
|
@ -34,6 +34,8 @@ class OPENVINO_API I420toRGB : public util::ConvertColorI420Base {
|
||||
public:
|
||||
OPENVINO_OP("I420toRGB", "opset8", util::ConvertColorI420Base);
|
||||
|
||||
BWDCMP_RTTI_DECLARATION;
|
||||
|
||||
I420toRGB() = default;
|
||||
|
||||
/// \brief Constructs a conversion operation from input image in I420 format
|
||||
|
@ -22,10 +22,6 @@ ie_faster_build(${TARGET_NAME}
|
||||
UNITY
|
||||
PCH PRIVATE "src/precomp.hpp")
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
target_compile_options(${TARGET_NAME} PUBLIC /wd4146)
|
||||
endif()
|
||||
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE XBYAK_NO_OP_NAMES XBYAK64)
|
||||
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
|
@ -5,11 +5,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
namespace ngraph {
|
||||
namespace runtime {
|
||||
namespace reference {
|
||||
template <typename T>
|
||||
template <typename T, typename std::enable_if<std::is_unsigned<T>::value, bool>::type = true>
|
||||
void abs(const T* arg, T* out, size_t count) {
|
||||
std::copy(arg, arg + count, out);
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<!std::is_unsigned<T>::value, bool>::type = true>
|
||||
void abs(const T* arg, T* out, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
// TODO: generic "abs" doesn't work here for some reason.
|
||||
|
@ -16,8 +16,7 @@ namespace runtime {
|
||||
namespace reference {
|
||||
template <typename T>
|
||||
void max(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) {
|
||||
T minval =
|
||||
std::numeric_limits<T>::has_infinity ? T(-std::numeric_limits<T>::infinity()) : std::numeric_limits<T>::min();
|
||||
T minval = std::numeric_limits<T>::lowest();
|
||||
|
||||
constexpr bool dont_keep_dims_in_output = false;
|
||||
const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output);
|
||||
|
@ -6,11 +6,21 @@
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
namespace ngraph {
|
||||
namespace runtime {
|
||||
namespace reference {
|
||||
template <typename T>
|
||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||
void sigmoid(const T* arg, T* out, size_t count) {
|
||||
T exp_value;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
exp_value = std::exp(-static_cast<typename std::make_signed<T>::type>(arg[i]));
|
||||
out[i] = 1 / (1 + exp_value);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
|
||||
void sigmoid(const T* arg, T* out, size_t count) {
|
||||
T exp_value;
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
BWDCMP_RTTI_DEFINITION(ov::op::v8::I420toBGR);
|
||||
|
||||
ov::op::v8::I420toBGR::I420toBGR(const Output<Node>& arg)
|
||||
: util::ConvertColorI420Base(arg, util::ConvertColorI420Base::ColorConversion::I420_TO_BGR) {
|
||||
constructor_validate_and_infer_types();
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include "itt.hpp"
|
||||
|
||||
BWDCMP_RTTI_DEFINITION(ov::op::v8::I420toRGB);
|
||||
|
||||
ov::op::v8::I420toRGB::I420toRGB(const Output<Node>& arg)
|
||||
: util::ConvertColorI420Base(arg, util::ConvertColorI420Base::ColorConversion::I420_TO_RGB) {
|
||||
constructor_validate_and_infer_types();
|
||||
|
@ -314,7 +314,8 @@ void static check_step(const op::v0::Range* node, T step) {
|
||||
|
||||
template <typename T>
|
||||
static typename std::enable_if<std::is_integral<T>::value, T>::type adjust_for_step_and_sign(T span, T step) {
|
||||
return ceil_div(span < 0 ? -span : span, step < 0 ? -step : step);
|
||||
return ceil_div(span < 0 ? -static_cast<typename std::make_signed<T>::type>(span) : span,
|
||||
step < 0 ? -static_cast<typename std::make_signed<T>::type>(step) : step);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -11,7 +11,7 @@ add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${LIBRARY_SRC} ${LIBRARY_HEAD
|
||||
|
||||
target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../..)
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC frontend_common interpreter_backend engines_test_util
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC frontend_common engines_test_util
|
||||
ngraph cnpy commonTestUtils ngraph_test_util openvino::util)
|
||||
|
||||
target_compile_definitions(${TARGET_NAME}
|
||||
|
56
src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt
Normal file
56
src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt
Normal file
@ -0,0 +1,56 @@
|
||||
ir_version: 3
|
||||
producer_name: "nGraph ONNX Importer"
|
||||
graph {
|
||||
node {
|
||||
input: "x"
|
||||
output: "y"
|
||||
op_type: "Softmax"
|
||||
attribute {
|
||||
name: "axis"
|
||||
i: 1
|
||||
type: INT
|
||||
}
|
||||
}
|
||||
name: "test_softmax_axis_1"
|
||||
input {
|
||||
name: "x"
|
||||
type {
|
||||
tensor_type {
|
||||
elem_type: 1
|
||||
shape {
|
||||
dim {
|
||||
dim_value: 3
|
||||
}
|
||||
dim {
|
||||
dim_value: 4
|
||||
}
|
||||
dim {
|
||||
dim_value: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output {
|
||||
name: "y"
|
||||
type {
|
||||
tensor_type {
|
||||
elem_type: 1
|
||||
shape {
|
||||
dim {
|
||||
dim_value: 3
|
||||
}
|
||||
dim {
|
||||
dim_value: 4
|
||||
}
|
||||
dim {
|
||||
dim_value: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
opset_import {
|
||||
version: 11
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
ir_version: 3
|
||||
producer_name: "nGraph ONNX Importer"
|
||||
graph {
|
||||
node {
|
||||
input: "x"
|
||||
output: "y"
|
||||
op_type: "Softmax"
|
||||
attribute {
|
||||
name: "axis"
|
||||
i: -1
|
||||
type: INT
|
||||
}
|
||||
}
|
||||
name: "test_softmax_axis_0"
|
||||
input {
|
||||
name: "x"
|
||||
type {
|
||||
tensor_type {
|
||||
elem_type: 1
|
||||
shape {
|
||||
dim {
|
||||
dim_value: 3
|
||||
}
|
||||
dim {
|
||||
dim_value: 4
|
||||
}
|
||||
dim {
|
||||
dim_value: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output {
|
||||
name: "y"
|
||||
type {
|
||||
tensor_type {
|
||||
elem_type: 1
|
||||
shape {
|
||||
dim {
|
||||
dim_value: 3
|
||||
}
|
||||
dim {
|
||||
dim_value: 4
|
||||
}
|
||||
dim {
|
||||
dim_value: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
opset_import {
|
||||
version: 11
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
ir_version: 3
|
||||
producer_name: "nGraph ONNX Importer"
|
||||
graph {
|
||||
node {
|
||||
input: "x"
|
||||
output: "y"
|
||||
op_type: "Softmax"
|
||||
attribute {
|
||||
name: "axis"
|
||||
i: -1
|
||||
type: INT
|
||||
}
|
||||
}
|
||||
name: "test_softmax_axis_0"
|
||||
input {
|
||||
name: "x"
|
||||
type {
|
||||
tensor_type {
|
||||
elem_type: 1
|
||||
shape {
|
||||
dim {
|
||||
dim_value: 3
|
||||
}
|
||||
dim {
|
||||
dim_value: 4
|
||||
}
|
||||
dim {
|
||||
dim_value: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
output {
|
||||
name: "y"
|
||||
type {
|
||||
tensor_type {
|
||||
elem_type: 1
|
||||
shape {
|
||||
dim {
|
||||
dim_value: 3
|
||||
}
|
||||
dim {
|
||||
dim_value: 4
|
||||
}
|
||||
dim {
|
||||
dim_value: 5
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
opset_import {
|
||||
version: 13
|
||||
}
|
@ -380,7 +380,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input) {
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function) {
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function) {
|
||||
const auto function = onnx_import::import_onnx_model(
|
||||
file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.onnx"));
|
||||
|
||||
@ -392,7 +392,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function) {
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) {
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) {
|
||||
const auto function = onnx_import::import_onnx_model(
|
||||
file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/greater_or_equal.onnx"));
|
||||
|
||||
@ -403,7 +403,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function_dependency_to_created_sub
|
||||
test_case.run();
|
||||
}
|
||||
|
||||
NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_context_dependent_function) {
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_context_dependent_function) {
|
||||
auto function = onnx_import::import_onnx_model(
|
||||
file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/softmax_crossentropy_consumed.onnx"));
|
||||
|
||||
@ -690,19 +690,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D) {
|
||||
}
|
||||
namespace {
|
||||
// common input for all Softmax 3D test cases (Shape = {3,4,5})
|
||||
// clang-format off
|
||||
const std::vector<float> SOFTMAX_INPUT = {
|
||||
2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, 0.21004745, 1.38337255,
|
||||
1.19030397, 2.0940445, -0.03551657, -0.78686039, 1.992782, 0.04300319, -0.29230777,
|
||||
-0.56797112, -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233,
|
||||
2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118,
|
||||
0.21004745, 1.38337255, 1.19030397, 2.0940445, -0.03551657,
|
||||
-0.78686039, 1.992782, 0.04300319, -0.29230777, -0.56797112,
|
||||
-1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233,
|
||||
|
||||
0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, -0.13259761, -1.14313018,
|
||||
0.2673723, -0.87996154, 1.29053106, 1.55, 0.8396538, 1.20729817, 0.23727845,
|
||||
-0.89113606, -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615,
|
||||
0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805,
|
||||
-0.13259761, -1.14313018, 0.2673723, -0.87996154, 1.29053106,
|
||||
1.55, 0.8396538, 1.20729817, 0.23727845, -0.89113606,
|
||||
-1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615,
|
||||
|
||||
-1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, 0.75425957, -2.43721014,
|
||||
-1.24478184, 2.65316853, 1.19509542, -0.95523998, 0.5149006, -0.01151649, 0.68327026,
|
||||
-0.4589638, -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919};
|
||||
-1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172,
|
||||
0.75425957, -2.43721014, -1.24478184, 2.65316853, 1.19509542,
|
||||
-0.95523998, 0.5149006, -0.01151649, 0.68327026, -0.4589638,
|
||||
-0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919};
|
||||
} // namespace
|
||||
// clang-format on
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) {
|
||||
auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_0.onnx"));
|
||||
@ -710,19 +715,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) {
|
||||
auto test_case = test::TestCase(function, s_device);
|
||||
test_case.add_input<float>(SOFTMAX_INPUT);
|
||||
|
||||
// clang-format off
|
||||
test_case.add_expected_output<float>(
|
||||
Shape{3, 4, 5},
|
||||
{0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823, 0.00757665, 0.02449322,
|
||||
0.02019284, 0.04985249, 0.00592694, 0.00279593, 0.04505148, 0.00641108, 0.00458466,
|
||||
0.00348007, 0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497,
|
||||
{0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823,
|
||||
0.00757665, 0.02449322, 0.02019284, 0.04985249, 0.00592694,
|
||||
0.00279593, 0.04505148, 0.00641108, 0.00458466, 0.00348007,
|
||||
0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497,
|
||||
|
||||
0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679, 0.00537859, 0.00195794,
|
||||
0.00802367, 0.00254737, 0.0223216, 0.02893419, 0.0142204, 0.02053893, 0.00778581,
|
||||
0.00251907, 0.00111174, 0.00800149, 0.0030324, 0.06658917, 0.0179084,
|
||||
0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679,
|
||||
0.00537859, 0.00195794, 0.00802367, 0.00254737, 0.0223216,
|
||||
0.02893419, 0.0142204, 0.02053893, 0.00778581, 0.00251907,
|
||||
0.00111174, 0.00800149, 0.0030324, 0.06658917, 0.0179084,
|
||||
|
||||
0.00181811, 0.01407243, 0.01072611, 0.0069699, 0.01158077, 0.01305647, 0.00053677,
|
||||
0.0017687, 0.08719896, 0.02028982, 0.00236265, 0.01027717, 0.0060709, 0.01216173,
|
||||
0.00388087, 0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337});
|
||||
0.00181811, 0.01407243, 0.01072611, 0.0069699, 0.01158077,
|
||||
0.01305647, 0.00053677, 0.0017687, 0.08719896, 0.02028982,
|
||||
0.00236265, 0.01027717, 0.0060709, 0.01216173, 0.00388087,
|
||||
0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337});
|
||||
// clang-format on
|
||||
|
||||
test_case.run(6);
|
||||
}
|
||||
@ -733,35 +743,113 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) {
|
||||
auto test_case = test::TestCase(function, s_device);
|
||||
test_case.add_input<float>(SOFTMAX_INPUT);
|
||||
|
||||
// clang-format off
|
||||
test_case.add_expected_output<float>(
|
||||
Shape{3, 4, 5},
|
||||
{0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188, 0.0178066, 0.05756383,
|
||||
0.04745709, 0.11716303, 0.01392945, 0.00657097, 0.10587974, 0.01506727, 0.01077484,
|
||||
0.00817884, 0.00406413, 0.00776921, 0.0256932, 0.03652405, 0.24328028,
|
||||
{0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188,
|
||||
0.0178066, 0.05756383, 0.04745709, 0.11716303, 0.01392945,
|
||||
0.00657097, 0.10587974, 0.01506727, 0.01077484, 0.00817884,
|
||||
0.00406413, 0.00776921, 0.0256932, 0.03652405, 0.24328028,
|
||||
|
||||
0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488, 0.02028993, 0.00738604,
|
||||
0.03026811, 0.00960958, 0.08420492, 0.10914991, 0.05364435, 0.07748005, 0.02937079,
|
||||
0.0095028, 0.00419387, 0.03018442, 0.01143929, 0.2511977, 0.06755678,
|
||||
0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488,
|
||||
0.02028993, 0.00738604, 0.03026811, 0.00960958, 0.08420492,
|
||||
0.10914991, 0.05364435, 0.07748005, 0.02937079, 0.0095028,
|
||||
0.00419387, 0.03018442, 0.01143929, 0.2511977, 0.06755678,
|
||||
|
||||
0.00587593, 0.04548053, 0.0346656, 0.02252594, 0.03742775, 0.04219705, 0.00173478,
|
||||
0.00571623, 0.2818174, 0.06557446, 0.00763582, 0.03321466, 0.01962049, 0.03930537,
|
||||
0.01254255, 0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617});
|
||||
0.00587593, 0.04548053, 0.0346656, 0.02252594, 0.03742775,
|
||||
0.04219705, 0.00173478, 0.00571623, 0.2818174, 0.06557446,
|
||||
0.00763582, 0.03321466, 0.01962049, 0.03930537, 0.01254255,
|
||||
0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617});
|
||||
// clang-format on
|
||||
|
||||
test_case.run(4);
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D) {
|
||||
ASSERT_THROW(
|
||||
onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_1D.onnx")),
|
||||
ngraph::ngraph_error)
|
||||
<< "Softmax model with invalid axis was successfully imported while it should have thrown.";
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1_opset11) {
|
||||
auto function =
|
||||
onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_1_opset11.onnx"));
|
||||
|
||||
auto test_case = test::TestCase(function, s_device);
|
||||
test_case.add_input<float>(SOFTMAX_INPUT);
|
||||
|
||||
// clang-format off
|
||||
test_case.add_expected_output<float>(
|
||||
Shape{3, 4, 5},
|
||||
{0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
|
||||
0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
|
||||
0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055,
|
||||
0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015,
|
||||
|
||||
0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282,
|
||||
0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
|
||||
0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801,
|
||||
0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935,
|
||||
|
||||
0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437,
|
||||
0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
|
||||
0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
|
||||
0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
|
||||
// clang-format on
|
||||
|
||||
test_case.run(4);
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_3D) {
|
||||
ASSERT_THROW(
|
||||
onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_3D.onnx")),
|
||||
ngraph::ngraph_error)
|
||||
<< "Softmax model with invalid axis was successfully imported while it should have thrown.";
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset11) {
|
||||
auto function = onnx_import::import_onnx_model(
|
||||
file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_negative_1_opset11.onnx"));
|
||||
|
||||
auto test_case = test::TestCase(function);
|
||||
test_case.add_input<float>(SOFTMAX_INPUT);
|
||||
|
||||
// clang-format off
|
||||
test_case.add_expected_output<float>(
|
||||
Shape{3, 4, 5},
|
||||
{0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
|
||||
0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
|
||||
0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055,
|
||||
0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015,
|
||||
|
||||
0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282,
|
||||
0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
|
||||
0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801,
|
||||
0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935,
|
||||
|
||||
0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437,
|
||||
0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
|
||||
0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
|
||||
0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
|
||||
// clang-format on
|
||||
|
||||
test_case.run(6);
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset13) {
|
||||
auto function = onnx_import::import_onnx_model(
|
||||
file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_negative_1_opset13.onnx"));
|
||||
|
||||
auto test_case = test::TestCase(function);
|
||||
test_case.add_input<float>(SOFTMAX_INPUT);
|
||||
|
||||
// clang-format off
|
||||
test_case.add_expected_output<float>(
|
||||
Shape{3, 4, 5},
|
||||
{0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154,
|
||||
0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776,
|
||||
0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055,
|
||||
0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015,
|
||||
|
||||
0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282,
|
||||
0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983,
|
||||
0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801,
|
||||
0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935,
|
||||
|
||||
0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437,
|
||||
0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196,
|
||||
0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297,
|
||||
0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207});
|
||||
// clang-format on
|
||||
|
||||
test_case.run(6);
|
||||
}
|
||||
|
||||
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub) {
|
||||
|
@ -199,9 +199,10 @@ void Graph::decode_to_framework_nodes() {
|
||||
if (node.has_subgraphs()) {
|
||||
const auto& subgraphs = node.get_subgraphs();
|
||||
auto inputs = node.get_ng_inputs();
|
||||
std::vector<std::shared_ptr<Function>> functions;
|
||||
for (const auto& kv : subgraphs) {
|
||||
auto& subgraph = kv.second;
|
||||
subgraph->decode();
|
||||
functions.push_back(subgraph->decode());
|
||||
for (const auto& input : subgraph->get_inputs_from_parent()) {
|
||||
const auto& name = input.get_node()->get_friendly_name();
|
||||
if (std::find_if(inputs.begin(), inputs.end(), [&name](const Output<ngraph::Node>& n) -> bool {
|
||||
@ -211,10 +212,9 @@ void Graph::decode_to_framework_nodes() {
|
||||
}
|
||||
}
|
||||
}
|
||||
framework_node =
|
||||
std::make_shared<ngraph::frontend::ONNXSubgraphFrameworkNode>(shared_from_this(), node, inputs);
|
||||
framework_node = std::make_shared<frontend::ONNXSubgraphFrameworkNode>(node, functions, inputs);
|
||||
} else {
|
||||
framework_node = std::make_shared<ngraph::frontend::ONNXFrameworkNode>(shared_from_this(), node);
|
||||
framework_node = std::make_shared<frontend::ONNXFrameworkNode>(node);
|
||||
}
|
||||
OutputVector ng_nodes{framework_node->outputs()};
|
||||
set_friendly_names(node, ng_nodes);
|
||||
@ -240,7 +240,10 @@ std::shared_ptr<Function> Graph::create_function() {
|
||||
|
||||
std::shared_ptr<Function> Graph::decode() {
|
||||
decode_to_framework_nodes();
|
||||
return create_function();
|
||||
auto function = create_function();
|
||||
auto& rt_info = function->get_rt_info();
|
||||
rt_info[ONNX_GRAPH_RT_ATTRIBUTE] = shared_from_this();
|
||||
return function;
|
||||
}
|
||||
|
||||
bool Graph::is_ng_node_in_cache(const std::string& name) const {
|
||||
@ -399,7 +402,8 @@ void Subgraph::find_inputs_from_parent() {
|
||||
for (const auto& out_name : node_proto.output()) {
|
||||
if (m_cache->contains(out_name)) {
|
||||
auto node_to_replace_input = m_cache->get_node(out_name).get_node();
|
||||
if (!dynamic_cast<op::util::MultiSubGraphOp*>(node_to_replace_input))
|
||||
if (!ov::is_type<op::util::MultiSubGraphOp>(node_to_replace_input) &&
|
||||
!ov::is_type<frontend::ONNXSubgraphFrameworkNode>(node_to_replace_input))
|
||||
continue;
|
||||
auto inputs = node_to_replace_input->input_values();
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
|
@ -121,6 +121,8 @@ inline std::ostream& operator<<(std::ostream& outs, const Graph& graph) {
|
||||
return (outs << "<Graph: " << graph.get_name() << ">");
|
||||
}
|
||||
|
||||
static const char* const ONNX_GRAPH_RT_ATTRIBUTE = "onnx_graph";
|
||||
|
||||
} // namespace onnx_import
|
||||
|
||||
} // namespace ngraph
|
||||
|
@ -21,10 +21,14 @@ namespace frontend {
|
||||
NGRAPH_RTTI_DEFINITION(ONNXFrameworkNode, "ONNXFrameworkNode", 1);
|
||||
|
||||
std::shared_ptr<Node> ONNXFrameworkNode::clone_with_new_inputs(const OutputVector& inputs) const {
|
||||
return std::make_shared<ONNXFrameworkNode>(m_graph, m_node, inputs);
|
||||
return std::make_shared<ONNXFrameworkNode>(m_node, inputs);
|
||||
}
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ONNXSubgraphFrameworkNode, "ONNXSubgraphFrameworkNode", 1);
|
||||
|
||||
std::shared_ptr<Node> ONNXSubgraphFrameworkNode::clone_with_new_inputs(const OutputVector& inputs) const {
|
||||
return std::make_shared<ONNXSubgraphFrameworkNode>(m_node, m_functions, inputs);
|
||||
}
|
||||
|
||||
} // namespace frontend
|
||||
} // namespace ngraph
|
||||
|
@ -38,20 +38,16 @@ class ONNXFrameworkNode : public ov::op::util::FrameworkNode {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
ONNXFrameworkNode(std::shared_ptr<onnx_import::Graph> graph, const onnx_import::Node& node)
|
||||
ONNXFrameworkNode(const onnx_import::Node& node)
|
||||
: ov::op::util::FrameworkNode(node.get_ng_inputs(), node.get_outputs_size()),
|
||||
m_node(node),
|
||||
m_graph(graph) {}
|
||||
m_node(node) {}
|
||||
|
||||
ONNXFrameworkNode(std::shared_ptr<onnx_import::Graph> graph,
|
||||
const onnx_import::Node& node,
|
||||
const OutputVector& inputs)
|
||||
ONNXFrameworkNode(const onnx_import::Node& node, const OutputVector& inputs)
|
||||
: ov::op::util::FrameworkNode(inputs, node.get_outputs_size()),
|
||||
m_node(node),
|
||||
m_graph(graph) {}
|
||||
m_node(node) {}
|
||||
|
||||
OutputVector get_ng_nodes() const {
|
||||
OutputVector ng_nodes{m_graph->make_ng_nodes(m_node)};
|
||||
OutputVector get_ng_nodes(const std::shared_ptr<onnx_import::Graph>& graph) const {
|
||||
OutputVector ng_nodes{graph->make_ng_nodes(m_node)};
|
||||
if (ng_nodes.size() > get_output_size()) {
|
||||
ng_nodes.resize(get_output_size());
|
||||
}
|
||||
@ -71,35 +67,31 @@ public:
|
||||
|
||||
protected:
|
||||
onnx_import::Node m_node;
|
||||
|
||||
private:
|
||||
std::shared_ptr<onnx_import::Graph> m_graph;
|
||||
};
|
||||
|
||||
class ONNXSubgraphFrameworkNode : public ONNXFrameworkNode {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
|
||||
ONNXSubgraphFrameworkNode(std::shared_ptr<onnx_import::Graph> graph,
|
||||
const onnx_import::Node& node,
|
||||
ONNXSubgraphFrameworkNode(const onnx_import::Node& node,
|
||||
const std::vector<std::shared_ptr<Function>>& functions,
|
||||
const OutputVector& inputs)
|
||||
: ONNXFrameworkNode(graph, node, inputs) {}
|
||||
: ONNXFrameworkNode(node, inputs),
|
||||
m_functions(functions) {}
|
||||
|
||||
void infer_inputs_from_parent() {
|
||||
for (auto& subgraph : m_node.get_subgraphs())
|
||||
subgraph.second->infer_inputs_from_parent();
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<Function>> get_subgraph_functions() const {
|
||||
std::vector<std::shared_ptr<Function>> ret;
|
||||
for (const auto& kv : m_node.get_subgraphs()) {
|
||||
auto& subgraph = kv.second;
|
||||
ret.push_back(std::make_shared<Function>(subgraph->get_ng_outputs(),
|
||||
subgraph->get_ng_parameters(),
|
||||
subgraph->get_name()));
|
||||
}
|
||||
return ret;
|
||||
const std::vector<std::shared_ptr<Function>>& get_subgraph_functions() const {
|
||||
return m_functions;
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override;
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<Function>> m_functions;
|
||||
};
|
||||
|
||||
} // namespace frontend
|
||||
|
@ -37,17 +37,8 @@ OutputVector softmax(const Node& node) {
|
||||
result = default_opset::Constant::create(data.get_element_type(), Shape{}, {1});
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
// checks if the axis belongs to the allowed values set (-1 and 0 for 1D)
|
||||
ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
|
||||
result = std::make_shared<default_opset::Softmax>(data, 0);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
const auto normalized_axis =
|
||||
ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
|
||||
|
||||
result = onnx_softmax(data, normalized_axis);
|
||||
result = onnx_softmax(data, axis);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -69,17 +60,8 @@ OutputVector softmax(const Node& node) {
|
||||
result = default_opset::Constant::create(data.get_element_type(), Shape{}, {1});
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
// checks if the axis belongs to the allowed values set (-1 and 0 for 1D)
|
||||
ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
|
||||
result = std::make_shared<default_opset::Softmax>(data, 0);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
const auto normalized_axis =
|
||||
ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
|
||||
|
||||
result = std::make_shared<default_opset::Softmax>(data, normalized_axis);
|
||||
result = std::make_shared<ov::op::v8::Softmax>(data, axis);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -92,9 +74,8 @@ OutputVector softmax(const Node& node) {
|
||||
const auto data = node.get_ng_inputs().at(0);
|
||||
|
||||
const auto axis = node.get_attribute_value<int64_t>("axis", -1);
|
||||
const auto normalized_axis = ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank());
|
||||
|
||||
return {std::make_shared<default_opset::Softmax>(data, normalized_axis)};
|
||||
return {std::make_shared<ov::op::v8::Softmax>(data, axis)};
|
||||
}
|
||||
} // namespace set_13
|
||||
} // namespace op
|
||||
|
@ -60,6 +60,12 @@ void apply_transformations(ONNX_NAMESPACE::ModelProto& model_proto, const std::s
|
||||
} // namespace
|
||||
|
||||
void convert_decoded_function(std::shared_ptr<Function> function) {
|
||||
auto& rt_info = function->get_rt_info();
|
||||
auto it = rt_info.find(ONNX_GRAPH_RT_ATTRIBUTE);
|
||||
OPENVINO_ASSERT(it != rt_info.end(),
|
||||
"Could not find '" + std::string(ONNX_GRAPH_RT_ATTRIBUTE) +
|
||||
"' attribute in decoded model. Model probably wasn't created by FrontEnd::decode function.");
|
||||
auto onnx_graph = it->second.as<std::shared_ptr<onnx_import::Graph>>();
|
||||
for (const auto& node : function->get_ordered_ops()) {
|
||||
if (auto raw_node = std::dynamic_pointer_cast<frontend::ONNXFrameworkNode>(node)) {
|
||||
if (auto subgraph_node = std::dynamic_pointer_cast<frontend::ONNXSubgraphFrameworkNode>(node)) {
|
||||
@ -68,7 +74,7 @@ void convert_decoded_function(std::shared_ptr<Function> function) {
|
||||
convert_decoded_function(function);
|
||||
}
|
||||
}
|
||||
auto ng_nodes = raw_node->get_ng_nodes();
|
||||
auto ng_nodes = raw_node->get_ng_nodes(onnx_graph);
|
||||
replace_node(raw_node, ng_nodes);
|
||||
} else {
|
||||
// Have to revalidate node because new intpus can affect shape/type
|
||||
@ -76,6 +82,7 @@ void convert_decoded_function(std::shared_ptr<Function> function) {
|
||||
node->revalidate_and_infer_types();
|
||||
}
|
||||
}
|
||||
rt_info.erase(it);
|
||||
detail::remove_dangling_parameters(function);
|
||||
detail::remove_dangling_results(function);
|
||||
}
|
||||
|
@ -34,5 +34,13 @@ namespace MultiDeviceConfigParams {
|
||||
*/
|
||||
DECLARE_MULTI_CONFIG_KEY(DEVICE_PRIORITIES);
|
||||
|
||||
/**
|
||||
* @brief network priority config option, the range of value is from 0 to the max integer,
|
||||
* when there are multi devices, the value is smaller, the priority is higher,
|
||||
* 0 is the highest priority. Auto plugin dispatch the network to device
|
||||
* according to priority value. when all devices are free, even if the priority value
|
||||
* is not 0, the network will be dispatched to the strongest device.
|
||||
*/
|
||||
DECLARE_CONFIG_KEY(AUTO_NETWORK_PRIORITY);
|
||||
} // namespace MultiDeviceConfigParams
|
||||
} // namespace InferenceEngine
|
||||
|
@ -309,16 +309,15 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
|
||||
OPENVINO_ASSERT(outputsInfo.size() == function->get_output_size());
|
||||
|
||||
for (const auto& param : function->get_parameters()) {
|
||||
auto new_param = param->copy_with_new_inputs({});
|
||||
auto new_param = ov::as_type_ptr<ov::op::v0::Parameter>(param->copy_with_new_inputs({}));
|
||||
new_param->set_friendly_name(param->get_friendly_name());
|
||||
if (add_operation_names)
|
||||
new_param->output(0).get_tensor().add_names({new_param->get_friendly_name()});
|
||||
// WA: use CNNNetwork's precisions since plugins sometimes override their precisions
|
||||
// after transformation pipeline is run
|
||||
new_param->set_output_type(
|
||||
0,
|
||||
InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()),
|
||||
new_param->get_output_partial_shape(0));
|
||||
new_param->set_element_type(
|
||||
InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()));
|
||||
new_param->validate_and_infer_types();
|
||||
const_params.emplace_back(new_param);
|
||||
}
|
||||
for (const auto& result : function->get_results()) {
|
||||
@ -326,10 +325,9 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
|
||||
result->get_output_partial_shape(0));
|
||||
const std::string param_name = ngraph::op::util::create_ie_output_name(result->input_value(0));
|
||||
fake_param->set_friendly_name(param_name);
|
||||
fake_param->set_output_type(
|
||||
0,
|
||||
InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()),
|
||||
fake_param->get_output_partial_shape(0));
|
||||
fake_param->set_element_type(
|
||||
InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()));
|
||||
fake_param->validate_and_infer_types();
|
||||
auto new_result = result->copy_with_new_inputs({fake_param});
|
||||
new_result->set_friendly_name(result->get_friendly_name());
|
||||
if (add_operation_names) {
|
||||
|
@ -149,11 +149,13 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
|
||||
const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& strDevices,
|
||||
MultiDeviceInferencePlugin* plugin,
|
||||
const AutoContext& context,
|
||||
const bool needPerfCounters)
|
||||
: _devicePriorities{metaDevices}
|
||||
, _devicePrioritiesInitial{metaDevices}
|
||||
, _needPerfCounters(needPerfCounters)
|
||||
, _multiPlugin(plugin)
|
||||
, _context(context)
|
||||
, _workModeIsAUTO(true) {
|
||||
if (_multiPlugin->GetCore() == nullptr) {
|
||||
IE_THROW() << "Please, work with MULTI device via InferencEngine::Core object";
|
||||
@ -173,7 +175,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
|
||||
_loadContext[ACTUALDEVICE].isEnabled = true;
|
||||
_loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(network);
|
||||
_loadContext[ACTUALDEVICE].metaDevices = metaDevices;
|
||||
_loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices, _loadContext[ACTUALDEVICE].networkPrecision);
|
||||
_loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices,
|
||||
_loadContext[ACTUALDEVICE].networkPrecision, _context.modelPriority);
|
||||
LOG_INFO("[AUTOPLUGIN]:select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str());
|
||||
bool isActualDevCPU =
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos;
|
||||
@ -292,6 +295,13 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context,
|
||||
return;
|
||||
}
|
||||
|
||||
// need to reload network, unregister it's priority
|
||||
// there maybe potential issue.
|
||||
// for example they are dGPU, VPUX, iGPU, customer want to LoadNetwork with
|
||||
// configure 0 dGPU, 1 VPUX, if dGPU load failed,
|
||||
// the result will be not sure, maybe two network are loaded into VPUX,
|
||||
// maybe 0 is loaded to VPUX, 1 is loaded to iGPU
|
||||
_multiPlugin->UnregisterPriority(_context.modelPriority, context.deviceInfo.uniqueName);
|
||||
// remove the current device from deviceList
|
||||
auto eraseDevice = std::find_if(deviceList.begin(), deviceList.end(),
|
||||
[device](DeviceInformation& d){
|
||||
@ -305,7 +315,8 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context,
|
||||
|
||||
// select next candidate device
|
||||
try {
|
||||
context.deviceInfo = _multiPlugin->SelectDevice(deviceList, context.networkPrecision);
|
||||
context.deviceInfo = _multiPlugin->SelectDevice(deviceList,
|
||||
context.networkPrecision, _context.modelPriority);
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
return;
|
||||
@ -382,7 +393,7 @@ void MultiDeviceExecutableNetwork::WaitActualNetworkReady() const {
|
||||
// for every MultiDeviceExecutableNetwork instance
|
||||
std::call_once(_oc, [this] () {
|
||||
if (_loadContext[ACTUALDEVICE].future.valid()) {
|
||||
_loadContext[ACTUALDEVICE].future.get();
|
||||
_loadContext[ACTUALDEVICE].future.wait();
|
||||
}
|
||||
// if _loadContext[ACTUALDEVICE] load failed, fall back to _loadContext[CPU]
|
||||
if (!_loadContext[ACTUALDEVICE].isAlready) {
|
||||
@ -460,14 +471,18 @@ void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) {
|
||||
}
|
||||
|
||||
MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
|
||||
if (_workModeIsAUTO) {
|
||||
// this is necessary to guarantee member destroyed after getting future
|
||||
if (_workModeIsAUTO && _loadContext[CPU].isEnabled) {
|
||||
_loadContext[CPU].future.get();
|
||||
if (_loadContext[CPU].isEnabled) {
|
||||
_loadContext[CPU].future.wait();
|
||||
WaitActualNetworkReady();
|
||||
// it's necessary to wait the loading network threads to stop here.
|
||||
InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad");
|
||||
_executor.reset();
|
||||
}
|
||||
_multiPlugin->UnregisterPriority(_context.modelPriority,
|
||||
_loadContext[ACTUALDEVICE].deviceInfo.uniqueName);
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_devicePriorities.clear();
|
||||
@ -615,21 +630,21 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map<std::string, Inferen
|
||||
_devicePriorities = metaDevices;
|
||||
|
||||
// update value in config
|
||||
_confMutex.lock();
|
||||
std::lock_guard<std::mutex> lockConf(_confMutex);
|
||||
_config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second;
|
||||
_confMutex.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::string &name) const {
|
||||
_confMutex.lock();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_confMutex);
|
||||
auto it = _config.find(name);
|
||||
if (it != _config.end()) {
|
||||
_confMutex.unlock();
|
||||
return it->second;
|
||||
} else {
|
||||
_confMutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
// find config key among networks config keys
|
||||
for (const auto& desc : _networksPerDevice) {
|
||||
const auto& execNetwork = desc.second;
|
||||
@ -642,7 +657,6 @@ InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::st
|
||||
}
|
||||
IE_THROW(NotFound) << name <<" not found in the ExecutableNetwork config";
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::string &name) const {
|
||||
if (_workModeIsAUTO) {
|
||||
|
@ -43,6 +43,12 @@ struct DeviceInformation {
|
||||
std::map<std::string, std::string> config;
|
||||
int numRequestsPerDevices;
|
||||
std::string defaultDeviceID;
|
||||
DeviceName uniqueName;
|
||||
};
|
||||
|
||||
struct AutoContext {
|
||||
bool needPerfCounters = {false};
|
||||
unsigned int modelPriority = 0;
|
||||
};
|
||||
|
||||
struct AutoLoadContext {
|
||||
@ -153,6 +159,7 @@ public:
|
||||
const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& strDevices,
|
||||
MultiDeviceInferencePlugin* plugin,
|
||||
const AutoContext& context,
|
||||
const bool needPerfCounters = false);
|
||||
|
||||
void SetConfig(const std::map<std::string, InferenceEngine::Parameter> &config) override;
|
||||
@ -202,6 +209,7 @@ private:
|
||||
std::shared_ptr<InferenceEngine::ICore> _core;
|
||||
InferenceEngine::IStreamsExecutor::Ptr _executor;
|
||||
MultiDeviceInferencePlugin* _multiPlugin;
|
||||
AutoContext _context;
|
||||
bool _workModeIsAUTO = {false};
|
||||
mutable std::once_flag _oc;
|
||||
std::once_flag _firstLoadOC;
|
||||
|
@ -60,10 +60,15 @@ namespace {
|
||||
res.push_back(CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO));
|
||||
res.push_back(PluginConfigParams::KEY_PERF_COUNT);
|
||||
res.push_back(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS);
|
||||
res.push_back(MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY);
|
||||
return res;
|
||||
}();
|
||||
} // namespace
|
||||
|
||||
|
||||
std::mutex MultiDeviceInferencePlugin::_mtx;
|
||||
std::map<unsigned int, std::list<std::string>> MultiDeviceInferencePlugin::_priorityMap;
|
||||
|
||||
std::map<std::string, std::string> MultiDeviceInferencePlugin::GetSupportedConfig(
|
||||
const std::map<std::string, std::string> & config, const std::string & deviceName) const {
|
||||
std::vector<std::string> supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
@ -137,11 +142,31 @@ std::vector<DeviceInformation> MultiDeviceInferencePlugin::ParseMetaDevices(cons
|
||||
|
||||
std::string defaultDeviceID = "";
|
||||
DeviceIDParser parsed{deviceName};
|
||||
if (parsed.getDeviceID().empty())
|
||||
std::string deviceid = parsed.getDeviceID();
|
||||
if (deviceid.empty()) {
|
||||
defaultDeviceID = getDefaultDeviceID(deviceName);
|
||||
deviceid = defaultDeviceID;
|
||||
}
|
||||
|
||||
std::string fullDeviceName = "";
|
||||
std::string uniqueName = "";
|
||||
if (parsed.getDeviceName() == "GPU") {
|
||||
std::vector<std::string> supportedMetrics = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_METRICS));
|
||||
if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) {
|
||||
fullDeviceName = GetCore()->GetMetric(deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
|
||||
}
|
||||
}
|
||||
|
||||
if (fullDeviceName.empty()) {
|
||||
uniqueName = parsed.getDeviceName() + "_" + deviceid;
|
||||
} else {
|
||||
uniqueName = fullDeviceName + "_" + deviceid;
|
||||
}
|
||||
|
||||
LOG_DEBUG("deviceName:%s, defaultDeviceID:%s, uniqueName:%s",
|
||||
deviceName.c_str(), defaultDeviceID.c_str(), uniqueName.c_str());
|
||||
// create meta device
|
||||
metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID });
|
||||
metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID, uniqueName});
|
||||
}
|
||||
|
||||
return metaDevices;
|
||||
@ -162,10 +187,9 @@ InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::stri
|
||||
}
|
||||
|
||||
void MultiDeviceInferencePlugin::SetConfig(const std::map<std::string, std::string> & config) {
|
||||
bool needPerfCounters = false;
|
||||
AutoContext context;
|
||||
std::map<std::string, std::string> filterConfig;
|
||||
CheckConfig(config, needPerfCounters, filterConfig);
|
||||
|
||||
CheckConfig(config, context, filterConfig);
|
||||
for (auto && kvp : config) {
|
||||
const auto& name = kvp.first;
|
||||
_config[name] = kvp.second;
|
||||
@ -237,10 +261,11 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
|
||||
if (workModeAuto) {
|
||||
// check the configure and check if need to set PerfCounters configure to device
|
||||
// and set filter configure
|
||||
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl::AutoMode");
|
||||
bool needPerfCounters = false;
|
||||
AutoContext context;
|
||||
std::map<std::string, std::string> filterConfig;
|
||||
CheckConfig(fullConfig, needPerfCounters, filterConfig);
|
||||
CheckConfig(fullConfig, context, filterConfig);
|
||||
// filter the device that supports filter configure
|
||||
auto strDevices = GetDeviceList(fullConfig);
|
||||
auto metaDevices = ParseMetaDevices(strDevices, fullConfig);
|
||||
@ -269,7 +294,7 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons
|
||||
strDevices += ((iter + 1) == supportDevices.end()) ? "" : ",";
|
||||
}
|
||||
|
||||
return std::make_shared<MultiDeviceExecutableNetwork>(modelPath, network, supportDevices, strDevices, this, needPerfCounters);
|
||||
return std::make_shared<MultiDeviceExecutableNetwork>(modelPath, network, supportDevices, strDevices, this, context, context.needPerfCounters);
|
||||
}
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl:MultiMode");
|
||||
if (priorities == fullConfig.end()) {
|
||||
@ -377,20 +402,18 @@ QueryNetworkResult MultiDeviceInferencePlugin::QueryNetwork(const CNNNetwork&
|
||||
return queryResult;
|
||||
}
|
||||
|
||||
DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision) {
|
||||
DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& networkPrecision, unsigned int priority) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::SelectDevice");
|
||||
if (metaDevices.empty()) {
|
||||
IE_THROW(NotFound) << "No available device to select in " << GetName() << " plugin";
|
||||
}
|
||||
if (metaDevices.size() == 1) {
|
||||
return metaDevices.at(0);
|
||||
}
|
||||
|
||||
std::vector<DeviceInformation> CPU;
|
||||
std::vector<DeviceInformation> dGPU;
|
||||
std::vector<DeviceInformation> iGPU;
|
||||
std::vector<DeviceInformation> MYRIAD;
|
||||
std::vector<DeviceInformation> VPUX;
|
||||
std::list<DeviceInformation> CPU;
|
||||
std::list<DeviceInformation> dGPU;
|
||||
std::list<DeviceInformation> iGPU;
|
||||
std::list<DeviceInformation> MYRIAD;
|
||||
std::list<DeviceInformation> VPUX;
|
||||
|
||||
for (auto& item : metaDevices) {
|
||||
if (item.deviceName.find("CPU") == 0) {
|
||||
@ -406,96 +429,103 @@ DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector<Dev
|
||||
continue;
|
||||
}
|
||||
if (item.deviceName.find("GPU") == 0) {
|
||||
auto gpuFullDeviceName = GetCore()->GetMetric(item.deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as<std::string>();
|
||||
if (gpuFullDeviceName.find("iGPU") != std::string::npos) {
|
||||
auto& gpuUniqueName = item.uniqueName;
|
||||
if (gpuUniqueName.find("iGPU") != std::string::npos) {
|
||||
iGPU.push_back(item);
|
||||
} else if (gpuFullDeviceName.find("dGPU") != std::string::npos) {
|
||||
} else if (gpuUniqueName.find("dGPU") != std::string::npos) {
|
||||
dGPU.push_back(item);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) {
|
||||
IE_THROW(NotFound) << "No available device found";
|
||||
}
|
||||
|
||||
// Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU
|
||||
if (!dGPU.empty()) {
|
||||
for (auto&& item : dGPU) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
} else if (!VPUX.empty()) {
|
||||
for (auto&& item : VPUX) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
} else if (!iGPU.empty()) {
|
||||
for (auto&& item : iGPU) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
} else if (!MYRIAD.empty()) {
|
||||
for (auto&& item : MYRIAD) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision);
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::list<DeviceInformation> devices;
|
||||
devices.splice(devices.end(), dGPU);
|
||||
devices.splice(devices.end(), VPUX);
|
||||
devices.splice(devices.end(), iGPU);
|
||||
devices.splice(devices.end(), MYRIAD);
|
||||
|
||||
// If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16.
|
||||
std::list<DeviceInformation> validDevices;
|
||||
|
||||
auto selectSupportDev = [this, &devices, &validDevices](const std::string& networkPrecision) {
|
||||
for (auto iter = devices.begin(); iter != devices.end();) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(iter->deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), (networkPrecision));
|
||||
if (supportNetwork != capability.end()) {
|
||||
validDevices.push_back(std::move(*iter));
|
||||
devices.erase(iter++);
|
||||
continue;
|
||||
}
|
||||
iter++;
|
||||
}
|
||||
};
|
||||
selectSupportDev(networkPrecision);
|
||||
// If network is FP32, continue to collect the device support FP16 but not support FP32.
|
||||
if (networkPrecision == "FP32") {
|
||||
if (!dGPU.empty()) {
|
||||
for (auto&& item : dGPU) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
} else if (!VPUX.empty()) {
|
||||
for (auto&& item : VPUX) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
} else if (!iGPU.empty()) {
|
||||
for (auto&& item : iGPU) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
} else if (!MYRIAD.empty()) {
|
||||
for (auto&& item : MYRIAD) {
|
||||
std::vector<std::string> capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16");
|
||||
if (supportNetwork != capability.end()) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
}
|
||||
const std::string f16 = "FP16";
|
||||
selectSupportDev(f16);
|
||||
}
|
||||
// add cpu devices if exist.
|
||||
validDevices.splice(validDevices.end(), CPU);
|
||||
|
||||
if (CPU.empty()) {
|
||||
if (validDevices.empty()) {
|
||||
IE_THROW() << "Cannot select any device";
|
||||
}
|
||||
return CPU[0];
|
||||
// all available Devices are in validDevices now
|
||||
// need to remove higher priority devices
|
||||
// save the last device first
|
||||
DeviceInformation lastDevice = validDevices.back();
|
||||
{
|
||||
// begin to filter devices
|
||||
std::lock_guard<std::mutex> lck(_mtx);
|
||||
for (auto && kvp : _priorityMap) {
|
||||
if (kvp.first >= priority) {
|
||||
continue;
|
||||
}
|
||||
auto& filterDevices = kvp.second;
|
||||
auto sd = std::remove_if(validDevices.begin(), validDevices.end(), [&filterDevices](DeviceInformation device) {
|
||||
auto iter = std::find_if(filterDevices.begin(), filterDevices.end(), [&device](std::string uniqueName) {
|
||||
return (uniqueName == device.uniqueName);
|
||||
});
|
||||
return iter != filterDevices.end() ? true : false;
|
||||
});
|
||||
validDevices.erase(sd, validDevices.end());
|
||||
}
|
||||
}
|
||||
|
||||
DeviceInformation* ptrSelectDevice = NULL;
|
||||
if (validDevices.empty()) {
|
||||
// after remove higher priority device,but the available devices is null,
|
||||
// so select the last device of all available Devices.
|
||||
ptrSelectDevice = &lastDevice;
|
||||
} else {
|
||||
// select the first device in the rest of available devices.
|
||||
ptrSelectDevice = &validDevices.front();
|
||||
}
|
||||
//recode the device priority
|
||||
RegisterPriority(priority, ptrSelectDevice->uniqueName);
|
||||
return *ptrSelectDevice;
|
||||
}
|
||||
|
||||
void MultiDeviceInferencePlugin::UnregisterPriority(const unsigned int& priority,
|
||||
const std::string& deviceName) {
|
||||
std::lock_guard<std::mutex> lck(_mtx);
|
||||
auto& priorityDevices = _priorityMap[priority];
|
||||
for (auto iter = priorityDevices.begin(); iter != priorityDevices.end();) {
|
||||
if (*iter == deviceName) {
|
||||
priorityDevices.erase(iter);
|
||||
break;
|
||||
}
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
void MultiDeviceInferencePlugin::RegisterPriority(const unsigned int& priority,
|
||||
const std::string& deviceName) {
|
||||
std::lock_guard<std::mutex> lck(_mtx);
|
||||
auto& priorityDevices = _priorityMap[priority];
|
||||
priorityDevices.push_back(deviceName);
|
||||
}
|
||||
|
||||
std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string, std::string>& config) const {
|
||||
@ -520,19 +550,17 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
|
||||
}
|
||||
|
||||
void MultiDeviceInferencePlugin::CheckConfig(const std::map<std::string, std::string>& config,
|
||||
bool& needPerfCounters, std::map<std::string, std::string>& filterConfig) {
|
||||
AutoContext& context, std::map<std::string, std::string>& filterConfig) {
|
||||
// TODO need to optimize this code, too much duplicated code
|
||||
|
||||
const auto perf_hints_configs = PerfHintsConfig::SupportedKeys();
|
||||
for (auto&& kvp : config) {
|
||||
if (kvp.first.find("AUTO_") == 0) {
|
||||
continue;
|
||||
} else if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) {
|
||||
if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) {
|
||||
if (kvp.second == PluginConfigParams::YES) {
|
||||
needPerfCounters = true;
|
||||
context.needPerfCounters = true;
|
||||
filterConfig.insert({kvp.first, kvp.second});
|
||||
} else if (kvp.second == PluginConfigParams::NO) {
|
||||
needPerfCounters = false;
|
||||
context.needPerfCounters = false;
|
||||
} else {
|
||||
IE_THROW() << "Unsupported config value: " << kvp.second
|
||||
<< " for key: " << kvp.first;
|
||||
@ -551,10 +579,24 @@ void MultiDeviceInferencePlugin::CheckConfig(const std::map<std::string, std::st
|
||||
IE_THROW() << "Unsupported config value: " << kvp.second
|
||||
<< " for key: " << kvp.first;
|
||||
}
|
||||
} else if (kvp.first == MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY) {
|
||||
try {
|
||||
int priority = std::stoi(kvp.second);
|
||||
if (priority < 0) {
|
||||
IE_THROW() << "Unsupported config value: " << kvp.second
|
||||
<< " for key: " << kvp.first;
|
||||
}
|
||||
context.modelPriority = priority;
|
||||
} catch(...) {
|
||||
IE_THROW() << "Unsupported config value: " << kvp.second
|
||||
<< " for key: " << kvp.first;
|
||||
}
|
||||
} else if (std::find(perf_hints_configs.begin(), perf_hints_configs.end(), kvp.first) != perf_hints_configs.end()) {
|
||||
PerfHintsConfig::CheckConfigAndValue(kvp);
|
||||
} else if (supported_configKeys.end() == std::find(supported_configKeys.begin(), supported_configKeys.end(), kvp.first)) {
|
||||
IE_THROW() << "Unsupported config key: " << kvp.first;
|
||||
} else if (kvp.first.find("AUTO_") == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
||||
@ -45,7 +46,10 @@ public:
|
||||
const std::map<std::string, std::string> & config) const;
|
||||
|
||||
std::string GetDeviceList(const std::map<std::string, std::string>& config) const;
|
||||
MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32));
|
||||
MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::string& networkPrecision = METRIC_VALUE(FP32), unsigned int priority = 0);
|
||||
void UnregisterPriority(const unsigned int& priority, const std::string& deviceName);
|
||||
void RegisterPriority(const unsigned int& priority, const std::string& deviceName);
|
||||
|
||||
protected:
|
||||
std::map<std::string, std::string> GetSupportedConfig(const std::map<std::string, std::string>& config,
|
||||
@ -56,10 +60,12 @@ private:
|
||||
InferenceEngine::CNNNetwork network,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::string &networkPrecision = METRIC_VALUE(FP32));
|
||||
static void CheckConfig(const std::map<std::string, std::string>& config, bool& needPerfCounters,
|
||||
static void CheckConfig(const std::map<std::string, std::string>& config, AutoContext& context,
|
||||
std::map<std::string, std::string>& filterConfig);
|
||||
std::vector<DeviceInformation> FilterDevice(const std::vector<DeviceInformation>& metaDevices,
|
||||
const std::map<std::string, std::string>& config);
|
||||
static std::mutex _mtx;
|
||||
static std::map<unsigned int, std::list<std::string>> _priorityMap;
|
||||
};
|
||||
|
||||
} // namespace MultiDevicePlugin
|
||||
|
@ -216,6 +216,8 @@ REGISTER_FACTORY(v8, GatherND);
|
||||
REGISTER_FACTORY(v8, DeformableConvolution);
|
||||
REGISTER_FACTORY(v8, NV12toRGB);
|
||||
REGISTER_FACTORY(v8, NV12toBGR);
|
||||
REGISTER_FACTORY(v8, I420toRGB);
|
||||
REGISTER_FACTORY(v8, I420toBGR);
|
||||
|
||||
// --------------------------- Supported internal ops --------------------------- //
|
||||
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
|
||||
|
@ -52,8 +52,20 @@ static void CreateNV12toBGROp(Program& p, const std::shared_ptr<ngraph::op::v8::
|
||||
CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR);
|
||||
}
|
||||
|
||||
static void CreateI420toRGBOp(Program& p, const std::shared_ptr<ngraph::op::v8::I420toRGB>& op) {
|
||||
p.ValidateInputs(op, {1, 3});
|
||||
CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB);
|
||||
}
|
||||
|
||||
static void CreateI420toBGROp(Program& p, const std::shared_ptr<ngraph::op::v8::I420toBGR>& op) {
|
||||
p.ValidateInputs(op, {1, 3});
|
||||
CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::BGR);
|
||||
}
|
||||
|
||||
REGISTER_FACTORY_IMPL(v8, NV12toRGB);
|
||||
REGISTER_FACTORY_IMPL(v8, NV12toBGR);
|
||||
REGISTER_FACTORY_IMPL(v8, I420toRGB);
|
||||
REGISTER_FACTORY_IMPL(v8, I420toBGR);
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
|
@ -137,11 +137,12 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ng
|
||||
|
||||
auto weightsName = inputs[1];
|
||||
auto weights_node = op->get_input_node_shared_ptr(1);
|
||||
// WA: For the cases like Const(weights)->Sub(zp)->Deconv.
|
||||
bool hasConstantWeights = IsNodeOnConstPath(weights_node);
|
||||
// WA: For the cases like Const(weights)->Sub(zp)->Deconv. And also for the cases with real runtime weights.
|
||||
// Dimensions order of weights blob is IOYX, but
|
||||
// the selected format is OIYX by default. So we need to swap (and transpose) I and O dimensions to match the format
|
||||
// For Constant node on input transpose is not needed, because the data is transposed on const node creation
|
||||
if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
|
||||
if ((hasConstantWeights && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) || !hasConstantWeights) {
|
||||
std::string permuteName = layerName + "_cldnn_weights_permute";
|
||||
auto weights_rank = op->get_input_shape(1).size();
|
||||
std::vector<uint16_t> permute_order(weights_rank);
|
||||
@ -195,11 +196,12 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p
|
||||
|
||||
auto weightsName = inputs[1];
|
||||
auto weights_node = op->get_input_node_shared_ptr(1);
|
||||
// WA: For the cases like Const(weights)->Sub(zp)->Deconv.
|
||||
bool hasConstWeights = IsNodeOnConstPath(weights_node);
|
||||
// WA: For the cases like Const(weights)->Sub(zp)->Deconv. And also for the cases with real runtime weights.
|
||||
// Dimensions order of weights blob is IOYX, but
|
||||
// the selected format is OIYX by default. So we need to swap I and O dimensions to match the format.
|
||||
// For Constant node on input transpose is not needed, because the data is transposed on const node creation
|
||||
if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
|
||||
if ((hasConstWeights && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) || !hasConstWeights) {
|
||||
std::string permuteName = layerName + "_cldnn_weights_permute";
|
||||
auto weights_rank = op->get_input_shape(1).size();
|
||||
std::vector<uint16_t> permute_order(weights_rank);
|
||||
|
@ -181,7 +181,9 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
bool is_convert_color_input = false;
|
||||
for (auto& node : op->get_users()) {
|
||||
is_convert_color_input |= ngraph::is_type<ngraph::op::v8::NV12toRGB>(node) ||
|
||||
ngraph::is_type<ngraph::op::v8::NV12toBGR>(node);
|
||||
ngraph::is_type<ngraph::op::v8::NV12toBGR>(node) ||
|
||||
ngraph::is_type<ngraph::op::v8::I420toRGB>(node) ||
|
||||
ngraph::is_type<ngraph::op::v8::I420toBGR>(node);
|
||||
}
|
||||
|
||||
if (is_convert_color_input) {
|
||||
|
@ -41,7 +41,9 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
|
||||
auto outputlayout = outputDesc.getLayout();
|
||||
|
||||
if (ngraph::is_type<ngraph::op::v8::NV12toRGB>(prev) ||
|
||||
ngraph::is_type<ngraph::op::v8::NV12toBGR>(prev)) {
|
||||
ngraph::is_type<ngraph::op::v8::NV12toBGR>(prev) ||
|
||||
ngraph::is_type<ngraph::op::v8::I420toRGB>(prev) ||
|
||||
ngraph::is_type<ngraph::op::v8::I420toBGR>(prev)) {
|
||||
outputlayout = NHWC;
|
||||
}
|
||||
|
||||
|
@ -346,7 +346,7 @@ bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
|
||||
std::function<bool(const std::shared_ptr<ngraph::Node>&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr<ngraph::Node>& node) {
|
||||
if (nodes_processed.count(node)) return true;
|
||||
nodes_processed.insert(node);
|
||||
// If input is constant, then drop if from the processing list
|
||||
// If input is constant, then drop it from the processing list
|
||||
if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(node) != nullptr)
|
||||
return true;
|
||||
// If the node doesn't have any parents and it's not a constant, then we deal with dynamic path
|
||||
|
@ -96,7 +96,13 @@ namespace {
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_DEBUG}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}}
|
||||
{InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, CorrectConfigTests,
|
||||
@ -149,17 +155,6 @@ namespace {
|
||||
{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}}
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> multiconf = {
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> autoinconfigs = {
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, "DOESN'T EXIST"}},
|
||||
@ -175,10 +170,25 @@ namespace {
|
||||
{InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, "OFF"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}}
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> multiconf = {
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY},
|
||||
{InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}},
|
||||
{{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, IncorrectConfigTests,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
|
@ -8,8 +8,15 @@
|
||||
using namespace BehaviorTestsDefinitions;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
const std::vector<Precision> precisionSet = {Precision::FP32, Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32, Precision::BOOL,
|
||||
Precision::I64, Precision::U64};
|
||||
const std::vector<Precision> precisionSet = {
|
||||
Precision::U8, Precision::I8,
|
||||
Precision::U16, Precision::I16,
|
||||
Precision::U32, Precision::I32,
|
||||
Precision::U64, Precision::I64,
|
||||
Precision::BF16, Precision::FP16,
|
||||
Precision::FP32, Precision::FP64,
|
||||
Precision::BOOL
|
||||
};
|
||||
|
||||
const std::vector<setType> typeSet = {setType::INPUT, setType::OUTPUT, setType::BOTH};
|
||||
|
||||
|
@ -18,24 +18,26 @@ const std::vector<ngraph::helpers::ConversionTypes> conversionOpTypes = {
|
||||
const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
// Ticket: 59594
|
||||
// InferenceEngine::Precision::I4,
|
||||
InferenceEngine::Precision::I8,
|
||||
InferenceEngine::Precision::I16,
|
||||
InferenceEngine::Precision::I32,
|
||||
InferenceEngine::Precision::I64,
|
||||
// Ticket: 59594
|
||||
// InferenceEngine::Precision::BIN,
|
||||
// InferenceEngine::Precision::BOOL,
|
||||
// InferenceEngine::Precision::U4,
|
||||
InferenceEngine::Precision::U8,
|
||||
InferenceEngine::Precision::I8,
|
||||
InferenceEngine::Precision::U16,
|
||||
// Ticket: 59594
|
||||
// InferenceEngine::Precision::U32,
|
||||
InferenceEngine::Precision::I16,
|
||||
InferenceEngine::Precision::U32,
|
||||
InferenceEngine::Precision::I32,
|
||||
InferenceEngine::Precision::U64,
|
||||
InferenceEngine::Precision::I64,
|
||||
InferenceEngine::Precision::BF16,
|
||||
InferenceEngine::Precision::FP16,
|
||||
InferenceEngine::Precision::FP32};
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP64,
|
||||
InferenceEngine::Precision::BOOL,
|
||||
InferenceEngine::Precision::MIXED,
|
||||
InferenceEngine::Precision::Q78,
|
||||
InferenceEngine::Precision::U4,
|
||||
InferenceEngine::Precision::I4,
|
||||
InferenceEngine::Precision::BIN,
|
||||
InferenceEngine::Precision::CUSTOM,
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest,
|
||||
ConversionLayerTest,
|
||||
|
@ -104,17 +104,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
|
||||
// CPU plugin does not support some precisions
|
||||
R"(smoke_CachingSupportCase_CPU/LoadNetworkCacheTestBase.CompareWithRefImpl/ReadConcatSplitAssign_f32_batch1_CPU)",
|
||||
// CPU plugin does not support some precisions
|
||||
R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(i8|u32).*)",
|
||||
R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(f16).*)",
|
||||
R"(.*EltwiseLayerTest.*NetType=f16.*)",
|
||||
|
||||
// TODO: CVS-66526 overrides i/o precisions in execution graph
|
||||
// as WA we used GetInputsInfo() precisions instead of ngraph ones
|
||||
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i16|u16).*)",
|
||||
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i64|u64).*)",
|
||||
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i16|u16).*)",
|
||||
// R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i64|u64).*)",
|
||||
|
||||
// CPU does not support dynamic rank
|
||||
// Issue: CVS-66778
|
||||
@ -152,6 +141,7 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// bad accuracy
|
||||
R"(.*smoke_FakeQuantizeLayerCPUTest_Decompos.
|
||||
*IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
|
||||
|
||||
// Issue: 71121
|
||||
R"(.*smoke_Proposal*.*TS=\(2.*)",
|
||||
// TODO : CVS-69533
|
||||
@ -165,6 +155,20 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
// Failure happened on win and macos for current seeds.
|
||||
R"(.*CTCLossLayerTest.*CMR=1.*)",
|
||||
R"(.*CTCLossLayerCPUTest.*ctcMergeRepeated=1.*)",
|
||||
// Issue: 71756
|
||||
R"(.*Deconv_.*D_(Blocked|DW|1x1)_.*DeconvolutionLayerCPUTest\.CompareWithRefs.*inFmts=(nChw16c|nCdhw16c)_outFmts=(nChw16c|nCdhw16c)_primitive=jit_avx512_.*Fused=Multiply\(PerChannel\)\.Add\(PerChannel\).*)",
|
||||
R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)",
|
||||
// Issue: 72150
|
||||
R"(.*smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=.*_Device=CPU_PrecisionInNet=BOOL.*)",
|
||||
// Issue: 59594
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BOOL.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*MIXED.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*Q78.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*U4.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*I4.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BIN.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*CUSTOM.*)",
|
||||
R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*UNSPECIFIED.*)",
|
||||
};
|
||||
|
||||
#define FIX_62820 0
|
||||
|
@ -5,38 +5,78 @@
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "test_utils/convolution_params.hpp"
|
||||
#include "test_utils/fusing_test_utils.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "functional_test_utils/ov_tensor_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include <shared_test_classes/single_layer/convolution_backprop_data.hpp>
|
||||
#include "openvino/core/preprocess/pre_post_process.hpp"
|
||||
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov::test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
using LayerTestsDefinitions::convBackpropDataSpecificParams;
|
||||
using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet;
|
||||
|
||||
typedef std::tuple<
|
||||
convBackpropDataLayerTestParamsSet,
|
||||
CPUSpecificParams,
|
||||
using DeconvSpecParams = LayerTestsDefinitions::convBackpropDataSpecificParams;
|
||||
|
||||
using DeconvInputData = std::tuple<InputShape, // data shape
|
||||
ngraph::helpers::InputLayerType, // 'output_shape' input type
|
||||
std::vector<std::vector<int32_t>>>; // values for 'output_shape'
|
||||
|
||||
using DeconvLayerCPUTestParamsSet = std::tuple<DeconvSpecParams,
|
||||
DeconvInputData,
|
||||
ElementType,
|
||||
fusingSpecificParams,
|
||||
std::map<std::string, std::string> > deconvLayerCPUTestParamsSet;
|
||||
CPUSpecificParams,
|
||||
std::map<std::string, std::string>>;
|
||||
|
||||
class DeconvolutionLayerCPUTest : public testing::WithParamInterface<deconvLayerCPUTestParamsSet>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
|
||||
class DeconvolutionLayerCPUTest : public testing::WithParamInterface<DeconvLayerCPUTestParamsSet>,
|
||||
virtual public SubgraphBaseTest, public CpuTestWithFusing {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<deconvLayerCPUTestParamsSet> obj) {
|
||||
convBackpropDataLayerTestParamsSet basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
static std::string getTestCaseName(testing::TestParamInfo<DeconvLayerCPUTestParamsSet> obj) {
|
||||
DeconvSpecParams basicParamsSet;
|
||||
DeconvInputData inputData;
|
||||
ElementType prec;
|
||||
fusingSpecificParams fusingParams;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param;
|
||||
std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = obj.param;
|
||||
|
||||
ngraph::op::PadType padType;
|
||||
InferenceEngine::SizeVector kernel, stride, dilation;
|
||||
std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
|
||||
size_t convOutChannels;
|
||||
std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = basicParamsSet;
|
||||
|
||||
InputShape inputShape;
|
||||
ngraph::helpers::InputLayerType outShapeType;
|
||||
std::vector<std::vector<int32_t>> outShapeData;
|
||||
std::tie(inputShape, outShapeType, outShapeData) = inputData;
|
||||
|
||||
std::ostringstream result;
|
||||
result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet>(
|
||||
basicParamsSet, 0));
|
||||
result << "IS=";
|
||||
result << CommonTestUtils::partialShape2str({inputShape.first}) << "_";
|
||||
result << "TS=";
|
||||
for (const auto& shape : inputShape.second) {
|
||||
result << "(";
|
||||
result << CommonTestUtils::vec2str(shape);
|
||||
result << ")_";
|
||||
}
|
||||
result << "PRC=" << prec << "_";
|
||||
result << "K=" << CommonTestUtils::vec2str(kernel) << "_";
|
||||
result << "S=" << CommonTestUtils::vec2str(stride) << "_";
|
||||
result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_";
|
||||
result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_";
|
||||
result << "D=" << CommonTestUtils::vec2str(dilation) << "_";
|
||||
result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_";
|
||||
result << "O=" << convOutChannels << "_";
|
||||
result << "AP=" << padType << "_";
|
||||
result << "OUT_SH=" << outShapeType << "_";
|
||||
result << "OUT_D=";
|
||||
for (const auto& data : outShapeData) {
|
||||
result << "(";
|
||||
result << CommonTestUtils::vec2str(data);
|
||||
result << ")_";
|
||||
}
|
||||
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
result << CpuTestWithFusing::getTestCaseName(fusingParams);
|
||||
@ -50,53 +90,159 @@ public:
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
|
||||
inputs.clear();
|
||||
const auto& funcInputs = function->inputs();
|
||||
for (int i = 0; i < funcInputs.size(); ++i) {
|
||||
const auto& funcInput = funcInputs[i];
|
||||
ov::runtime::Tensor tensor;
|
||||
|
||||
if (i == 1) {
|
||||
tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], outShapeData[inferRequestNum].data());
|
||||
} else {
|
||||
tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256);
|
||||
}
|
||||
|
||||
inputs.insert({funcInput.get_node_shared_ptr(), tensor});
|
||||
}
|
||||
inferRequestNum++;
|
||||
}
|
||||
|
||||
void init_ref_function(std::shared_ptr<ov::Model> &funcRef, const std::vector<ov::Shape>& targetInputStaticShapes) override {
|
||||
if (function->get_parameters().size() == 1) {
|
||||
ngraph::helpers::resize_function(funcRef, targetInputStaticShapes);
|
||||
} else {
|
||||
// WA: output_shape depends on 3rd deconvolution input data
|
||||
// but the reference implementation doesn't implement shape inference
|
||||
// so we need to build a new ngraph function and replace the 3rd input parameter with a constant
|
||||
// to get valid output shapes
|
||||
funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT);
|
||||
}
|
||||
}
|
||||
|
||||
void validate() override {
|
||||
if (function->get_parameters().size() == 2) {
|
||||
auto pos = std::find_if(inputs.begin(), inputs.end(),
|
||||
[](const std::pair<std::shared_ptr<ov::Node>, ov::runtime::Tensor> ¶ms) {
|
||||
return params.first->get_friendly_name() == "param_1";
|
||||
});
|
||||
IE_ASSERT(pos != inputs.end());
|
||||
inputs.erase(pos);
|
||||
}
|
||||
SubgraphBaseTest::validate();
|
||||
}
|
||||
|
||||
void configure_model() override {
|
||||
ov::preprocess::PrePostProcessor p(function);
|
||||
{
|
||||
auto& params = function->get_parameters();
|
||||
for (size_t i = 0; i < params.size(); i++) {
|
||||
if (i > 0) {
|
||||
continue;
|
||||
}
|
||||
if (inType != ov::element::Type_t::undefined) {
|
||||
p.input(i).tensor().set_element_type(inType);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
auto results = function->get_results();
|
||||
for (size_t i = 0; i < results.size(); i++) {
|
||||
if (outType != ov::element::Type_t::undefined) {
|
||||
p.output(i).tensor().set_element_type(outType);
|
||||
}
|
||||
}
|
||||
}
|
||||
function = p.build();
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> createGraph(const std::vector<ov::PartialShape>& inShapes, ngraph::helpers::InputLayerType outShapeType) {
|
||||
auto params = ngraph::builder::makeDynamicParams(prec, {inShapes.front()});
|
||||
std::shared_ptr<ov::Node> outShapeNode;
|
||||
if (!outShapeData.empty()) {
|
||||
if (outShapeType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
IE_ASSERT(inputDynamicShapes.size() == 2);
|
||||
auto outShapeParam = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::i32, inputDynamicShapes.back());
|
||||
params.push_back(outShapeParam);
|
||||
outShapeNode = outShapeParam;
|
||||
} else {
|
||||
outShapeNode = ngraph::opset8::Constant::create(ngraph::element::i32, {outShapeData[inferRequestNum].size()}, outShapeData[inferRequestNum]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < params.size(); i++) {
|
||||
params[i]->set_friendly_name(std::string("param_") + std::to_string(i));
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Node> deconv;
|
||||
if (!outShapeData.empty()) {
|
||||
IE_ASSERT(outShapeNode != nullptr);
|
||||
deconv = ngraph::builder::makeConvolutionBackpropData(params[0], outShapeNode, prec, kernel, stride, padBegin,
|
||||
padEnd, dilation, padType, convOutChannels);
|
||||
} else {
|
||||
deconv = ngraph::builder::makeConvolutionBackpropData(params[0], prec, kernel, stride, padBegin,
|
||||
padEnd, dilation, padType, convOutChannels, false, outPadding);
|
||||
}
|
||||
|
||||
return makeNgraphFunction(prec, params, deconv, "DeconvCPU");
|
||||
}
|
||||
|
||||
protected:
|
||||
InferenceEngine::SizeVector kernel, stride;
|
||||
|
||||
void SetUp() override {
|
||||
convBackpropDataLayerTestParamsSet basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
rel_threshold = 1e-4f;
|
||||
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
DeconvSpecParams basicParamsSet;
|
||||
DeconvInputData inputData;
|
||||
fusingSpecificParams fusingParams;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::map<std::string, std::string> additionalConfig;
|
||||
std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam();
|
||||
std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = this->GetParam();
|
||||
|
||||
InputShape inputShape;
|
||||
ngraph::helpers::InputLayerType outShapeType;
|
||||
std::tie(inputShape, outShapeType, outShapeData) = inputData;
|
||||
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
|
||||
convBackpropDataSpecificParams convParams;
|
||||
std::vector<size_t> inputShape;
|
||||
std::vector<size_t> outputShape;
|
||||
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = basicParamsSet;
|
||||
std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = basicParamsSet;
|
||||
|
||||
if (inPrc == Precision::UNSPECIFIED) {
|
||||
selectedType += std::string("_") + Precision(Precision::FP32).name();
|
||||
if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) {
|
||||
inType = outType = prec = ElementType::bf16;
|
||||
rel_threshold = 1e-2f;
|
||||
} else {
|
||||
selectedType += std::string("_") + inPrc.name();
|
||||
inType = outType = prec;
|
||||
}
|
||||
|
||||
selectedType = makeSelectedTypeStr(selectedType, prec);
|
||||
|
||||
std::vector<InputShape> paramsShapes;
|
||||
paramsShapes.push_back(inputShape);
|
||||
if (!outShapeData.empty() && outShapeType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
const auto outShapeDims = ov::Shape{outShapeData.front().size()};
|
||||
paramsShapes.push_back(InputShape{outShapeDims, std::vector<ov::Shape>(inputShape.second.size(), outShapeDims)});
|
||||
}
|
||||
|
||||
init_input_shapes(paramsShapes);
|
||||
|
||||
function = createGraph(inputDynamicShapes, outShapeType);
|
||||
}
|
||||
|
||||
private:
|
||||
ElementType prec;
|
||||
ngraph::op::PadType padType;
|
||||
InferenceEngine::SizeVector dilation;
|
||||
std::vector<ptrdiff_t> padBegin, padEnd, outPadding;
|
||||
size_t convOutChannels;
|
||||
std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convParams;
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape });
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
|
||||
|
||||
auto deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), ngPrc, kernel, stride, padBegin,
|
||||
padEnd, dilation, padType, convOutChannels, false, outPadding);
|
||||
|
||||
if (!outputShape.empty()) {
|
||||
auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape);
|
||||
deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), outShape, ngPrc, kernel, stride, padBegin,
|
||||
padEnd, dilation, padType, convOutChannels);
|
||||
}
|
||||
|
||||
function = makeNgraphFunction(ngPrc, inputParams, deconvolutionNode, "convolutionBackpropData");
|
||||
}
|
||||
ngraph::helpers::InputLayerType outShapeType;
|
||||
std::vector<std::vector<int32_t>> outShapeData;
|
||||
size_t inferRequestNum = 0;
|
||||
};
|
||||
|
||||
TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) {
|
||||
@ -113,7 +259,7 @@ TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) {
|
||||
}
|
||||
}
|
||||
|
||||
Run();
|
||||
run();
|
||||
CheckPluginRelatedResults(executableNetwork, "Deconvolution");
|
||||
}
|
||||
|
||||
@ -126,29 +272,29 @@ const std::vector<fusingSpecificParams> fusingParamsSet{
|
||||
};
|
||||
|
||||
const std::map<std::string, std::string> cpuEmptyPluginConfig;
|
||||
const std::map<std::string, std::string> cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } };
|
||||
const std::vector<SizeVector> emptyOutputShape = { {} };
|
||||
const std::map<std::string, std::string>cpuBF16PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16,
|
||||
InferenceEngine::PluginConfigParams::YES } };
|
||||
const std::vector<std::vector<ptrdiff_t>> emptyOutputPadding = { {} };
|
||||
|
||||
/* ============= Deconvolution params (planar layout) ============= */
|
||||
const SizeVector numOutChannels_Planar = { 6 };
|
||||
const InferenceEngine::SizeVector numOutChannels_Planar = { 6 };
|
||||
|
||||
/* ============= Deconvolution params (blocked layout) ============= */
|
||||
const SizeVector numOutChannels_Blocked = { 64 };
|
||||
const InferenceEngine::SizeVector numOutChannels_Blocked = { 64 };
|
||||
|
||||
/* ============= Deconvolution params (2D) ============= */
|
||||
const std::vector<SizeVector> kernels2d = { {3, 3}, {1, 1} };
|
||||
const std::vector<SizeVector> strides2d = { {1, 1}, {2, 2} };
|
||||
const std::vector<InferenceEngine::SizeVector> kernels2d = { {3, 3}, {1, 1} };
|
||||
const std::vector<InferenceEngine::SizeVector> strides2d = { {1, 1}, {2, 2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
|
||||
const std::vector<SizeVector> dilations2d = { {1, 1} };
|
||||
const std::vector<InferenceEngine::SizeVector> dilations2d = { {1, 1} };
|
||||
|
||||
/* ============= Deconvolution params (3D) ============= */
|
||||
const std::vector<SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
|
||||
const std::vector<SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
|
||||
const std::vector<InferenceEngine::SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
|
||||
const std::vector<InferenceEngine::SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins3d = { {0, 0, 0} };
|
||||
const std::vector<std::vector<ptrdiff_t>> padEnds3d = { {0, 0, 0} };
|
||||
const std::vector<SizeVector> dilations3d = { {1, 1, 1} };
|
||||
const std::vector<InferenceEngine::SizeVector> dilations3d = { {1, 1, 1} };
|
||||
/* ============= */
|
||||
|
||||
/* INSTANCES */
|
||||
@ -164,41 +310,99 @@ const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
const std::vector<DeconvInputData> Planar_2D_inputs_smoke = {
|
||||
DeconvInputData{
|
||||
InputShape{{}, {{ 2, 12, 7, 7 }}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{15, 15}, {9, 10}, {9, 9}}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<DeconvInputData> Planar_2D_inputs_nightly = {
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{15, 15}}
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
|
||||
::testing::ValuesIn(Planar_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
|
||||
::testing::ValuesIn(Planar_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupDeconvolution (Planar 3D) ============= */
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_2D,
|
||||
::testing::ValuesIn(Planar_2D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_2D,
|
||||
::testing::ValuesIn(Planar_2D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Deconvolution (Planar 3D) ============= */
|
||||
const std::vector<DeconvInputData> Planar_3D_inputs_smoke = {
|
||||
DeconvInputData{
|
||||
InputShape{{}, {{ 2, 12, 7, 7, 7 }}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{15, 15, 15}, {9, 10, 10}, {9, 9, 9}}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<DeconvInputData> Planar_3D_inputs_nightly = {
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{15, 15, 15}}
|
||||
}
|
||||
};
|
||||
|
||||
const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels3d),
|
||||
::testing::ValuesIn(strides3d),
|
||||
@ -211,40 +415,72 @@ const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine(
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_3D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
|
||||
::testing::ValuesIn(Planar_3D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_3D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
|
||||
::testing::ValuesIn(Planar_3D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupDeconvolution (Blocked 2D) ============= */
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_3D,
|
||||
::testing::ValuesIn(Planar_3D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Planar_3D,
|
||||
::testing::ValuesIn(Planar_3D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Deconvolution (Blocked 2D) ============= */
|
||||
const std::vector<DeconvInputData> Blocked_2D_inputs_smoke = {
|
||||
DeconvInputData{
|
||||
InputShape{{}, {{ 2, 67, 7, 7 }}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{15, 15}, {9, 10}, {9, 9}}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<DeconvInputData> Blocked_2D_inputs_nightly = {
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{15, 15}}
|
||||
}
|
||||
};
|
||||
|
||||
const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
::testing::ValuesIn(strides2d),
|
||||
@ -257,133 +493,220 @@ const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::ValuesIn(Blocked_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::ValuesIn(Blocked_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= GroupDeconvolution (Blocked 3D) ============= */
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_2D,
|
||||
::testing::ValuesIn(Blocked_2D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_2D,
|
||||
::testing::ValuesIn(Blocked_2D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Deconvolution (Blocked 3D) ============= */
|
||||
const std::vector<DeconvInputData> Blocked_3D_inputs_smoke = {
|
||||
DeconvInputData{
|
||||
InputShape{{}, {{ 2, 35, 7, 7, 7 }}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{7, 7, 7}, {7, 9, 7}}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<DeconvInputData> Blocked_3D_inputs_nightly = {
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{7, 7, 7}}
|
||||
}
|
||||
};
|
||||
|
||||
const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels3d),
|
||||
::testing::ValuesIn(strides3d),
|
||||
::testing::ValuesIn(padBegins3d),
|
||||
::testing::ValuesIn(padEnds3d),
|
||||
::testing::ValuesIn(dilations3d),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::Values(32),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT),
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_3D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
|
||||
::testing::ValuesIn(Blocked_3D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_3D,
|
||||
::testing::ValuesIn(Blocked_3D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_3D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
|
||||
::testing::ValuesIn(Blocked_3D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_Blocked_3D,
|
||||
::testing::ValuesIn(Blocked_3D_inputs_nightly),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Kernel_1x1 (2D) ============= */
|
||||
|
||||
const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
|
||||
::testing::Values(SizeVector({1, 1})),
|
||||
::testing::Values(SizeVector({1, 1})),
|
||||
::testing::Values(InferenceEngine::SizeVector({1, 1})),
|
||||
::testing::Values(InferenceEngine::SizeVector({1, 1})),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
|
||||
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
|
||||
::testing::Values(SizeVector({1, 1})),
|
||||
::testing::Values(InferenceEngine::SizeVector({1, 1})),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT),
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1x1_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Precision::UNSPECIFIED),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
|
||||
::testing::ValuesIn(Blocked_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_1x1_BF16, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(
|
||||
convParams_ExplicitPadding_1x1_2D,
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
|
||||
::testing::ValuesIn(emptyOutputShape),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
|
||||
::testing::ValuesIn(Blocked_2D_inputs_smoke),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
|
||||
::testing::Values(cpuBF16PluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ========= */
|
||||
/* ============= Reorder + Deconvolution ============= */
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_reorder_Deconv_2D, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
::testing::Combine(::testing::ValuesIn(kernels2d),
|
||||
::testing::Values(InferenceEngine::SizeVector{1, 1}),
|
||||
::testing::ValuesIn(padBegins2d),
|
||||
::testing::ValuesIn(padEnds2d),
|
||||
::testing::ValuesIn(dilations2d),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT),
|
||||
::testing::ValuesIn(emptyOutputPadding)),
|
||||
::testing::Values(DeconvInputData{InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{15, 15}, {9, 10}, {9, 9}}}),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(emptyFusingSpec),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
/* ============= Deconvolution auto padding tests ============= */
|
||||
const std::vector<DeconvInputData> inputs_2D_AutoPadding = {
|
||||
DeconvInputData{
|
||||
InputShape{{}, {{ 2, 67, 7, 7 }}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
{{15, 15}}
|
||||
},
|
||||
DeconvInputData{
|
||||
InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}},
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
{{15, 15}, {9, 10}, {9, 9}}
|
||||
}
|
||||
};
|
||||
|
||||
const auto deconvParams_AutoPadding_2D = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2d),
|
||||
::testing::ValuesIn(strides2d),
|
||||
::testing::ValuesIn(padBegins2d),
|
||||
::testing::ValuesIn(padEnds2d),
|
||||
::testing::ValuesIn(dilations2d),
|
||||
::testing::ValuesIn(numOutChannels_Blocked),
|
||||
::testing::Values(ngraph::op::PadType::SAME_UPPER, ngraph::op::PadType::SAME_LOWER),
|
||||
::testing::ValuesIn(emptyOutputPadding)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_AutoPadding_FP32, DeconvolutionLayerCPUTest,
|
||||
::testing::Combine(
|
||||
deconvParams_AutoPadding_2D,
|
||||
::testing::ValuesIn(inputs_2D_AutoPadding),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(emptyFusingSpec),
|
||||
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D, conv_avx512_2D})),
|
||||
::testing::Values(cpuEmptyPluginConfig)),
|
||||
DeconvolutionLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
|
@ -9,57 +9,55 @@
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
using namespace ov;
|
||||
using namespace test;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
using cumSumShape = std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>;
|
||||
using cumSumParams = std::tuple<
|
||||
ngraph::element::Type, // data precision
|
||||
cumSumShape, // input shape
|
||||
InputShape, // input shape
|
||||
std::int64_t, // axis
|
||||
bool, // exclusive
|
||||
bool>; // reverse
|
||||
|
||||
class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>, public ov::test::SubgraphBaseTest, public CPUTestsBase {
|
||||
class CumSumLayerCPUTest : public testing::WithParamInterface<cumSumParams>,
|
||||
public SubgraphBaseTest, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<cumSumParams> obj) {
|
||||
ngraph::element::Type inputPrecision;
|
||||
std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>> shapes;
|
||||
InputShape shapes;
|
||||
std::int64_t axis;
|
||||
bool exclusive;
|
||||
bool reverse;
|
||||
std::tie(inputPrecision, shapes, axis, exclusive, reverse) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << inputPrecision << "_" << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_" << "TS=";
|
||||
for (const auto& shape : shapes.second) {
|
||||
result << "(";
|
||||
for (const auto& item : shape) {
|
||||
result << CommonTestUtils::vec2str(item) << "_";
|
||||
std::ostringstream results;
|
||||
results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_";
|
||||
results << "TS=";
|
||||
for (const auto& item : shapes.second) {
|
||||
results << CommonTestUtils::vec2str(item) << "_";
|
||||
}
|
||||
result << ")_";
|
||||
}
|
||||
|
||||
result << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : "");
|
||||
return result.str();
|
||||
results << "Prc=" << inputPrecision << "_";
|
||||
results << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : "");
|
||||
return results.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
ngraph::element::Type inputPrecision;
|
||||
std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>> shapes;
|
||||
InputShape shapes;
|
||||
std::int64_t axis;
|
||||
bool exclusive;
|
||||
bool reverse;
|
||||
std::tie(inputPrecision, shapes, axis, exclusive, reverse) = this->GetParam();
|
||||
std::tie(inType, shapes, axis, exclusive, reverse) = this->GetParam();
|
||||
if (inType == ElementType::bf16)
|
||||
rel_threshold = 0.05f;
|
||||
|
||||
for (size_t i = 0; i < shapes.second.size(); i++) {
|
||||
targetStaticShapes.push_back(shapes.second[i]);
|
||||
}
|
||||
inputDynamicShapes = shapes.first;
|
||||
selectedType = makeSelectedTypeStr("ref_any", inType);
|
||||
init_input_shapes({shapes});
|
||||
|
||||
auto params = ngraph::builder::makeDynamicParams(inputPrecision, { inputDynamicShapes.front() });
|
||||
auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
|
||||
auto axisNode = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{}, std::vector<int64_t>{axis})->output(0);
|
||||
auto cumSum = ngraph::builder::makeCumSum(params[0], axisNode, exclusive, reverse);
|
||||
|
||||
@ -72,15 +70,12 @@ TEST_P(CumSumLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
run();
|
||||
// TODO: Should be uncommented after updating the CheckPluginRelatedResults() method
|
||||
//CheckPluginRelatedResults(executableNetwork, "CumSum");
|
||||
CheckPluginRelatedResults(executableNetwork, "CumSum");
|
||||
}
|
||||
|
||||
const ngraph::element::TypeVector inputPrecision = {
|
||||
ngraph::element::i8,
|
||||
ngraph::element::u8,
|
||||
ngraph::element::i16,
|
||||
ngraph::element::i32,
|
||||
ngraph::element::bf16,
|
||||
ngraph::element::f32
|
||||
};
|
||||
|
||||
@ -90,97 +85,33 @@ const std::vector<int64_t> negativeAxes = { -1, -2, -3, -4, -5, -6 };
|
||||
const std::vector<bool> exclusive = { true, false };
|
||||
const std::vector<bool> reverse = { true, false };
|
||||
|
||||
const std::vector<cumSumShape> inShapes = {
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{16}, {18}, {12}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{9, 15}, {18, 12}, {12, 12}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1, -1, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{16, 10, 12}, {18, 12, 10}, {12, 18, 10}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1, -1, -1, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{18, 20, 14, 12}, {19, 20, 14, 12}, {20, 22, 23, 25}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1, -1, -1, -1, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{2, 4, 6, 2, 4}, {3, 5, 6, 3, 5}, {1, 4, 2, 6, 8}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1, -1, -1, -1, -1, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{2, 4, 6, 2, 4, 2}, {3, 5, 6, 3, 5, 3}, {1, 4, 2, 6, 8, 1}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{-1, -1, -1, -1, -1, -1, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{2, 4, 6, 2, 4, 2, 4}, {3, 5, 6, 3, 5, 3, 5}, {1, 4, 2, 6, 8, 1, 4}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{{2, 5}, {3, 7}, {4, 8}, {5, 7}, {2, 5}, {3, 7}, {1, 2}}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{
|
||||
{{2, 5}, -1, {4, 8}, -1, -1, {3, 7}, -1}
|
||||
},
|
||||
// target
|
||||
{
|
||||
{{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}
|
||||
}
|
||||
},
|
||||
const std::vector<InputShape> inShapes = {
|
||||
{{-1},
|
||||
{{16}, {18}, {12}}},
|
||||
|
||||
{{-1, -1},
|
||||
{{9, 15}, {18, 12}, {12, 12}}},
|
||||
|
||||
{{-1, -1, -1},
|
||||
{{16, 10, 12}, {18, 12, 10}, {12, 18, 10}}},
|
||||
|
||||
{{-1, -1, -1, -1},
|
||||
{{18, 20, 14, 12}, {19, 20, 14, 12}, {20, 22, 23, 25}}},
|
||||
|
||||
{{-1, -1, -1, -1, -1},
|
||||
{{2, 4, 6, 2, 4}, {3, 5, 6, 3, 5}, {1, 4, 2, 6, 8}}},
|
||||
|
||||
{{-1, -1, -1, -1, -1, -1},
|
||||
{{2, 4, 6, 2, 4, 2}, {3, 5, 6, 3, 5, 3}, {1, 4, 2, 6, 8, 1}}},
|
||||
|
||||
{{{-1, -1, -1, -1, -1, -1, -1}},
|
||||
{{2, 4, 6, 2, 4, 2, 4}, {3, 5, 6, 3, 5, 3, 5}, {1, 4, 2, 6, 8, 1, 4}}},
|
||||
|
||||
{{{2, 5}, {3, 7}, {4, 8}, {5, 7}, {2, 5}, {3, 7}, {1, 2}},
|
||||
{{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}},
|
||||
|
||||
{{{2, 5}, -1, {4, 8}, -1, -1, {3, 7}, -1},
|
||||
{{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}}
|
||||
};
|
||||
|
||||
const auto testCasesAxis_0 = ::testing::Combine(
|
||||
@ -193,7 +124,7 @@ const auto testCasesAxis_0 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_1 = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 1, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 1, inShapes.end())),
|
||||
::testing::Values(axes[1]),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
@ -201,7 +132,7 @@ const auto testCasesAxis_1 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_2 = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 2, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 2, inShapes.end())),
|
||||
::testing::Values(axes[2]),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
@ -209,7 +140,7 @@ const auto testCasesAxis_2 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_3 = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 3, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 3, inShapes.end())),
|
||||
::testing::Values(axes[3]),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
@ -217,7 +148,7 @@ const auto testCasesAxis_3 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_4 = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 4, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 4, inShapes.end())),
|
||||
::testing::Values(axes[4]),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
@ -225,7 +156,7 @@ const auto testCasesAxis_4 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_5 = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 5, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 5, inShapes.end())),
|
||||
::testing::Values(axes[5]),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
@ -233,7 +164,7 @@ const auto testCasesAxis_5 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_6 = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 6, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
|
||||
::testing::Values(axes[6]),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
@ -241,7 +172,7 @@ const auto testCasesAxis_6 = ::testing::Combine(
|
||||
|
||||
const auto testCasesAxis_negative = ::testing::Combine(
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(std::vector<cumSumShape>(inShapes.begin() + 6, inShapes.end())),
|
||||
::testing::ValuesIn(std::vector<InputShape>(inShapes.begin() + 6, inShapes.end())),
|
||||
::testing::ValuesIn(negativeAxes),
|
||||
::testing::ValuesIn(exclusive),
|
||||
::testing::ValuesIn(reverse)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user