From f8759e19821cbb61cf905fbc48e4e511d94939e3 Mon Sep 17 00:00:00 2001 From: Bartosz Lesniewski Date: Tue, 15 Jun 2021 05:55:48 +0200 Subject: [PATCH 01/43] Revise ExtractImagePatches op (#6104) * use ngraph rtti macros * Add attribute count check to visitor tests * Add sslt for ExtractImagePatches * refactoring variable names * remove whitespaces from empty line --- .../single_layer/extractimagepatches.cpp | 43 +++++++++++++++++++ .../include/ngraph/op/extractimagepatches.hpp | 4 +- ngraph/core/src/op/extractimagepatches.cpp | 36 ++++++++-------- .../test/visitors/op/extractimagepatches.cpp | 2 + 4 files changed, 65 insertions(+), 20 deletions(-) create mode 100644 inference-engine/tests/functional/inference_engine/serialization/single_layer/extractimagepatches.cpp diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/extractimagepatches.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/extractimagepatches.cpp new file mode 100644 index 00000000000..7e48a0ad0e9 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/extractimagepatches.cpp @@ -0,0 +1,43 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "shared_test_classes/single_layer/extract_image_patches.hpp" + +using namespace ngraph; +using namespace LayerTestsDefinitions; + +namespace { +TEST_P(ExtractImagePatchesTest, Serialize) { + Serialize(); +} + +const std::vector> inShapes = {{2, 3, 13, 37}}; +const std::vector> kSizes = {{1, 5}, {3, 4}, {3, 1}}; +const std::vector> strides = {{1, 2}, {2, 2}, {2, 1}}; +const std::vector> rates = {{1, 3}, {3, 3}, {3, 1}}; + +const std::vector autoPads = { + ngraph::op::PadType::VALID, ngraph::op::PadType::SAME_UPPER, + ngraph::op::PadType::SAME_LOWER +}; +const std::vector netPrecision = { + InferenceEngine::Precision::I8, InferenceEngine::Precision::BF16, + InferenceEngine::Precision::FP32 +}; + +INSTANTIATE_TEST_CASE_P(smoke_ExtractImagePatchesLayerTest, ExtractImagePatchesTest, + ::testing::Combine(::testing::ValuesIn(inShapes), + ::testing::ValuesIn(kSizes), + ::testing::ValuesIn(strides), + ::testing::ValuesIn(rates), + ::testing::ValuesIn(autoPads), + ::testing::ValuesIn(netPrecision), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ExtractImagePatchesTest::getTestCaseName); + +} // namespace \ No newline at end of file diff --git a/ngraph/core/include/ngraph/op/extractimagepatches.hpp b/ngraph/core/include/ngraph/op/extractimagepatches.hpp index e9c89813d06..2cd40561876 100644 --- a/ngraph/core/include/ngraph/op/extractimagepatches.hpp +++ b/ngraph/core/include/ngraph/op/extractimagepatches.hpp @@ -15,8 +15,8 @@ namespace ngraph class NGRAPH_API ExtractImagePatches : public Op { public: - static constexpr NodeTypeInfo type_info{"ExtractImagePatches", 3}; - const NodeTypeInfo& get_type_info() const override { return type_info; } + NGRAPH_RTTI_DECLARATION; + ExtractImagePatches() = default; /// \brief Constructs a ExtractImagePatches operation /// diff --git a/ngraph/core/src/op/extractimagepatches.cpp b/ngraph/core/src/op/extractimagepatches.cpp index 0eba4645db0..eb66bbc3848 100644 --- a/ngraph/core/src/op/extractimagepatches.cpp +++ b/ngraph/core/src/op/extractimagepatches.cpp @@ -11,7 +11,7 @@ using namespace ngraph; // ExtractImagePatches v3 -constexpr NodeTypeInfo op::v3::ExtractImagePatches::type_info; +NGRAPH_RTTI_DEFINITION(op::v3::ExtractImagePatches, "ExtractImagePatches", 3); op::v3::ExtractImagePatches::ExtractImagePatches(const Output& image, const Shape& sizes, @@ -30,9 +30,9 @@ op::v3::ExtractImagePatches::ExtractImagePatches(const Output& image, void op::v3::ExtractImagePatches::validate_and_infer_types() { NGRAPH_OP_SCOPE(v3_ExtractImagePatches_validate_and_infer_types); - const PartialShape input_Pshape = get_input_partial_shape(0); + const PartialShape input_pshape = get_input_partial_shape(0); - NODE_VALIDATION_CHECK(this, input_Pshape.rank() == 4, "input tensor must be 4D tensor."); + NODE_VALIDATION_CHECK(this, input_pshape.rank() == 4, "input tensor must be 4D tensor."); NODE_VALIDATION_CHECK(this, m_patch_sizes.size() == 2, @@ -60,18 +60,18 @@ void op::v3::ExtractImagePatches::validate_and_infer_types() m_padding == PadType::SAME_UPPER, "Attribute padding should be in either valid or same_lower or same_upper."); - if (input_Pshape[1].is_dynamic() || input_Pshape[2].is_dynamic() || - input_Pshape[3].is_dynamic()) + if (input_pshape[1].is_dynamic() || input_pshape[2].is_dynamic() || + input_pshape[3].is_dynamic()) { set_input_is_relevant_to_shape(0); - auto output_Pshape = PartialShape::dynamic(4); - set_output_type(0, get_input_element_type(0), output_Pshape); + auto output_pshape = PartialShape::dynamic(4); + set_output_type(0, get_input_element_type(0), output_pshape); } else { - int32_t input_depth = input_Pshape[1].get_length(); - int32_t input_rows = input_Pshape[2].get_length(); - int32_t input_cols = input_Pshape[3].get_length(); + int32_t input_depth = input_pshape[1].get_length(); + int32_t input_rows = input_pshape[2].get_length(); + int32_t input_cols = input_pshape[3].get_length(); int32_t out_rows(0); int32_t out_cols(0); @@ -113,26 +113,26 @@ void op::v3::ExtractImagePatches::validate_and_infer_types() ngraph::Dimension::value_type out_cols_cast = static_cast(out_cols); - PartialShape output_Pshape; - if (input_Pshape[0].is_dynamic()) + PartialShape output_pshape; + if (input_pshape[0].is_dynamic()) { - output_Pshape = - PartialShape{input_Pshape[0], out_depth_cast, out_rows_cast, out_cols_cast}; + output_pshape = + PartialShape{input_pshape[0], out_depth_cast, out_rows_cast, out_cols_cast}; } else { ngraph::Dimension::value_type input_batch_cast = - static_cast(input_Pshape[0].get_length()); - output_Pshape = + static_cast(input_pshape[0].get_length()); + output_pshape = PartialShape{input_batch_cast, out_depth_cast, out_rows_cast, out_cols_cast}; } if (input_rows == 0 || input_cols == 0) { - output_Pshape = input_Pshape; + output_pshape = input_pshape; } - set_output_type(0, get_input_element_type(0), output_Pshape); + set_output_type(0, get_input_element_type(0), output_pshape); } } diff --git a/ngraph/test/visitors/op/extractimagepatches.cpp b/ngraph/test/visitors/op/extractimagepatches.cpp index 6d5c99146ea..a22b0a14b03 100644 --- a/ngraph/test/visitors/op/extractimagepatches.cpp +++ b/ngraph/test/visitors/op/extractimagepatches.cpp @@ -33,6 +33,8 @@ TEST(attributes, extractimagepatches_op) NodeBuilder builder(extractimagepatches); auto g_extractimagepatches = as_type_ptr(builder.create()); + const auto expected_attr_count = 4; + EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); EXPECT_EQ(g_extractimagepatches->get_sizes(), sizes); EXPECT_EQ(g_extractimagepatches->get_strides(), strides); EXPECT_EQ(g_extractimagepatches->get_rates(), rates); From 8dff04df2882cf04a1ab984a17ae0a59a87e3a83 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Tue, 15 Jun 2021 06:04:06 +0200 Subject: [PATCH 02/43] ShuffleChannels ng op and reference implementation revision (#5764) * Unblock shuffle channels tests from ie test manifest * Add more backend tests * ShiffleChannel reference impl update * Update attr visitor test * Remove unused get_pre_shuffle_shape helper function * Update class descriprion * Add type prop shape tests * Remove NGRAPH_SUPPRESS_DEPRECATED macro * Add single layer tests * Update layer tests * Remove unused header * Move implementation to cpp file --- .../single_layer_tests/shuffle_channels.cpp | 61 ++++++ .../single_layer_tests/shuffle_channels.cpp | 37 ++++ .../include/ngraph/op/shuffle_channels.hpp | 19 +- .../runtime/reference/shuffle_channels.hpp | 27 +++ .../runtime/reference/shuffle_channels.cpp | 60 ++++++ ngraph/core/src/op/shuffle_channels.cpp | 73 +------ ngraph/test/CMakeLists.txt | 1 + ngraph/test/backend/fused_op.in.cpp | 60 ------ ngraph/test/backend/shuffle_channels.in.cpp | 192 ++++++++++++++++++ ngraph/test/runtime/ie/unit_test.manifest | 5 - ngraph/test/type_prop/shuffle_channels.cpp | 112 +++++++++- ngraph/test/visitors/op/shuffle_channels.cpp | 16 +- 12 files changed, 508 insertions(+), 155 deletions(-) create mode 100644 ngraph/core/reference/include/ngraph/runtime/reference/shuffle_channels.hpp create mode 100644 ngraph/core/reference/src/runtime/reference/shuffle_channels.cpp create mode 100644 ngraph/test/backend/shuffle_channels.in.cpp diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp index b36a9e0713b..b338d02efc1 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp @@ -36,4 +36,65 @@ INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels4D, ShuffleChannelsLayerTest, ::testing::Values(CommonTestUtils::DEVICE_CPU)), ShuffleChannelsLayerTest::getTestCaseName); +// ND support tests +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels6D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(2, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({24, 6, 12, 18, 30, 36})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ShuffleChannelsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels5D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(2, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({6, 12, 18, 30, 36})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ShuffleChannelsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels3D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(1, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({18, 30, 36})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ShuffleChannelsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels2D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(1, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({18, 30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ShuffleChannelsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels1D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(0, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({30})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ShuffleChannelsLayerTest::getTestCaseName); + } // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp index 6f7bdc1fcf2..0eb593e16f4 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp @@ -35,3 +35,40 @@ const auto testCases = ::testing::Combine(::testing::ValuesIn(shuffleParameters) INSTANTIATE_TEST_CASE_P(smoke_GPU_ShuffleChannels, ShuffleChannelsLayerTest, testCases, ShuffleChannelsLayerTest::getTestCaseName); + +// ND support tests +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels3D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(1, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({18, 30, 36})), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ShuffleChannelsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels2D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(1, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({18, 30})), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ShuffleChannelsLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ShuffleChannels1D, ShuffleChannelsLayerTest, + ::testing::Combine( + ::testing::Values(std::tuple(0, 3)), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({30})), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ShuffleChannelsLayerTest::getTestCaseName); diff --git a/ngraph/core/include/ngraph/op/shuffle_channels.hpp b/ngraph/core/include/ngraph/op/shuffle_channels.hpp index fd4539b0b3a..a09878c574f 100644 --- a/ngraph/core/include/ngraph/op/shuffle_channels.hpp +++ b/ngraph/core/include/ngraph/op/shuffle_channels.hpp @@ -24,14 +24,12 @@ namespace ngraph ShuffleChannels() = default; /// \brief Constructs a ShuffleChannels node. /// - /// \param data - Node producing the input tensor - /// \param axis - channel dimension index in the data tensor. A negative value means - /// that the index should be calculated from the back of the input - /// data - /// shape. - /// \param group - number of group the channel dimension specified by axis should - /// be - /// split into + /// \param data Node producing the input tensor. + /// \param axis Channel dimension index in the data tensor. + /// A negative value means that the index should be + /// calculated from the back of the input data shape. + /// \param group Number of group the channel dimension should be split into. + /// ShuffleChannels(const Output& data, const int64_t axis = 1, const int64_t group = 1); @@ -51,11 +49,6 @@ namespace ngraph bool has_evaluate() const override; private: - /// \brief Generates a shape required to permute the data - /// - /// \param data_shape - Shape of the original input data tensor - /// \return A 4D tensor to be used to reshape the input data before shuffling it - Shape get_pre_shuffle_shape(const Shape& data_shape) const; bool evaluate_shuffle_channels(const HostTensorVector& outputs, const HostTensorVector& inputs) const; diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/shuffle_channels.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/shuffle_channels.hpp new file mode 100644 index 00000000000..0d6fe7bed51 --- /dev/null +++ b/ngraph/core/reference/include/ngraph/runtime/reference/shuffle_channels.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "ngraph/shape.hpp" + +namespace ngraph +{ + namespace runtime + { + namespace reference + { + void shuffle_channels(const char* arg, + char* out, + const Shape& data_shape, + size_t elem_size, + const int64_t axis, + const int64_t group); + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/reference/src/runtime/reference/shuffle_channels.cpp b/ngraph/core/reference/src/runtime/reference/shuffle_channels.cpp new file mode 100644 index 00000000000..5a12f3787bf --- /dev/null +++ b/ngraph/core/reference/src/runtime/reference/shuffle_channels.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/runtime/reference/shuffle_channels.hpp" +#include "ngraph/runtime/opt_kernel/reshape.hpp" + +namespace ngraph +{ + namespace runtime + { + namespace reference + { + void shuffle_channels(const char* arg, + char* out, + const Shape& data_shape, + size_t elem_size, + const int64_t axis, + const int64_t group) + { + // Input ND tensor of data_shape (ds) is always considered as 4D tensor with the + // following shape: + // dim 0: ds[0] * ds[1] * ... * ds[axis-1] (or 1 if axis == 0) + // dim 1: group + // dim 2: ds[axis] / group + // dim 3: ds[axis+1] * ds[axis+2] * ... * ds[ds.size()-1] + // (or 1 if axis points to last dimension) + + // The representation of ND tensor as 4D tensor doesn't affect flat data order + Shape reshaped_input_shape(4, 1); + const size_t axis_zb = + axis >= 0 ? axis : axis + data_shape.size(); // Allow negative indices + for (size_t i = 0; i < axis_zb; ++i) + { + // All dimensions before input channels dim axis + reshaped_input_shape[0] *= data_shape[i]; + } + reshaped_input_shape[1] = group; + reshaped_input_shape[2] = data_shape[axis_zb] / group; + for (size_t i = axis_zb + 1; i < data_shape.size(); ++i) + { + // All dimensions after input channels dim axis + reshaped_input_shape[3] *= data_shape[i]; + } + + // The two dimensions in the middle are swapped + const Shape transposed_shape{reshaped_input_shape[0], + reshaped_input_shape[2], + reshaped_input_shape[1], + reshaped_input_shape[3]}; + AxisVector axis_vector{0, 2, 1, 3}; + runtime::opt_kernel::reshape( + arg, out, reshaped_input_shape, axis_vector, transposed_shape, elem_size); + + // Reshaped 4D tensor is interpreted as ND output tensor with original shape of data + // input + } + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/src/op/shuffle_channels.cpp b/ngraph/core/src/op/shuffle_channels.cpp index 03859b3cb60..71683af4030 100644 --- a/ngraph/core/src/op/shuffle_channels.cpp +++ b/ngraph/core/src/op/shuffle_channels.cpp @@ -10,14 +10,13 @@ #include "ngraph/op/shuffle_channels.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/opt_kernel/reshape.hpp" +#include "ngraph/runtime/reference/shuffle_channels.hpp" #include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type_traits.hpp" using namespace std; using namespace ngraph; -NGRAPH_SUPPRESS_DEPRECATED_START - NGRAPH_RTTI_DEFINITION(op::v0::ShuffleChannels, "ShuffleChannels", 0); op::ShuffleChannels::ShuffleChannels(const Output& data, @@ -87,7 +86,8 @@ void op::ShuffleChannels::validate_and_infer_types() } else { - set_output_type(0, data_type, PartialShape::dynamic()); + const auto shape = get_input_partial_shape(0); + set_output_type(0, data_type, shape); } } @@ -103,76 +103,19 @@ shared_ptr op::ShuffleChannels::clone_with_new_inputs(const OutputVector& return make_shared(new_args.at(0), m_axis, m_group); } -Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const -{ - const Shape& ds = data_shape; - - // in general the resulting shape should contain the following values: - // [0]: ds[0] * ds[1] * ... * ds[m_axis-1] (or 1 if m_axis == 0) - // [1]: m_group - // [2]: ds[axis] / m_group - // [3]: ds[axis+1] * ds[axis+2] * ... * ds[ds.size()-1] (or 1 if m_axis points to the last elem - // of ds) - Shape res(4, 1); - - size_t axis_zb = get_zero_based_axis(); - for (size_t i = 0; i < axis_zb; ++i) - { - res[0] *= ds[i]; - } - - res[1] = m_group; - res[2] = ds[axis_zb] / m_group; - - for (size_t i = axis_zb + 1; i < ds.size(); ++i) - { - res[3] *= ds[i]; - } - - return res; -} - bool op::ShuffleChannels::evaluate_shuffle_channels(const HostTensorVector& outputs, const HostTensorVector& inputs) const { const auto arg = inputs[0]->get_data_ptr(); auto out = outputs[0]->get_data_ptr(); - Shape data_shape = inputs[0]->get_shape(); - const Shape& ds = data_shape; - size_t elem_size = inputs[0]->get_element_type().size(); + const auto data_shape = inputs[0]->get_shape(); + const size_t elem_size = inputs[0]->get_element_type().size(); - Shape reshaped_out_shape(4, 1); - size_t axis_zb = m_axis >= 0 ? m_axis : m_axis + data_shape.size(); - for (size_t i = 0; i < axis_zb; ++i) - { - reshaped_out_shape[0] *= ds[i]; - } + outputs[0]->set_element_type(inputs[0]->get_element_type()); + outputs[0]->set_shape(data_shape); - reshaped_out_shape[1] = m_group; - reshaped_out_shape[2] = ds[axis_zb] / m_group; + runtime::reference::shuffle_channels(arg, out, data_shape, elem_size, m_axis, m_group); - for (size_t i = axis_zb + 1; i < ds.size(); ++i) - { - reshaped_out_shape[3] *= ds[i]; - } - - // first reshape from data_shape to reshaped_out_shape is skipped since it doesn't affect - // out - // data - - Shape transpose_axes_order = {0, 2, 1, 3}; - Shape transposed_shape(transpose_axes_order.size()); - - for (size_t i = 0; i < transpose_axes_order.size(); ++i) - { - transposed_shape[i] = data_shape.at(transpose_axes_order.at(i)); - } - auto axis_vector = AxisVector{begin(transpose_axes_order), end(transpose_axes_order)}; - runtime::opt_kernel::reshape( - arg, out, reshaped_out_shape, axis_vector, transposed_shape, elem_size); - - // last reshape from transposed_shape to data_shape is skipped since it doesn't affect out - // data return true; } bool op::ShuffleChannels::evaluate(const HostTensorVector& outputs, diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index c3c5ab80405..2f85b09fc60 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -442,6 +442,7 @@ set(MULTI_TEST_SRC backend/select.in.cpp backend/selu.in.cpp backend/shape_of.in.cpp + backend/shuffle_channels.in.cpp backend/sigmoid.in.cpp backend/sign.in.cpp backend/sin.in.cpp diff --git a/ngraph/test/backend/fused_op.in.cpp b/ngraph/test/backend/fused_op.in.cpp index 4b6bb15cb51..c5e89e84e40 100644 --- a/ngraph/test/backend/fused_op.in.cpp +++ b/ngraph/test/backend/fused_op.in.cpp @@ -570,66 +570,6 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_grn_2d_with_bias) test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_simple) -{ - const auto data = make_shared(element::i32, Shape{1, 15, 2, 2}); - auto tested_op = make_shared(data, 1, 5); - auto function = make_shared(tested_op, ParameterVector{data}); - - auto test_case = test::TestCase(function); - - std::vector input_data(60); - std::iota(std::begin(input_data), std::end(input_data), 0); - test_case.add_input(input_data); - - test_case.add_expected_output( - Shape{1, 15, 2, 2}, - {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, - 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, - 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); - - test_case.run(); -} - -NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_negative_axis) -{ - // in this test the output is the same as in shuffle_channels_simple but - // the axis value is negative and the C(channels) value is in a different dimension(0) of the - // shape - const auto data = make_shared(element::i32, Shape{15, 2, 1, 2}); - auto tested_op = make_shared(data, -4, 5); - auto function = make_shared(tested_op, ParameterVector{data}); - - auto test_case = test::TestCase(function); - - std::vector input_data(60); - std::iota(std::begin(input_data), std::end(input_data), 0); - test_case.add_input(input_data); - - test_case.add_expected_output( - Shape{15, 2, 1, 2}, - {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, - 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, - 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); - - test_case.run(); -} - -NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_float) -{ - const auto data = make_shared(element::f32, Shape{6, 1, 1, 1}); - auto tested_op = make_shared(data, 0, 2); - auto function = make_shared(tested_op, ParameterVector{data}); - - auto test_case = test::TestCase(function); - - test_case.add_input({0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); - - test_case.add_expected_output(Shape{6, 1, 1, 1}, {0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f}); - - test_case.run(); -} - // TODO: Issue: 37534 NGRAPH_TEST(${BACKEND_NAME}, DISABLED_squared_difference) { diff --git a/ngraph/test/backend/shuffle_channels.in.cpp b/ngraph/test/backend/shuffle_channels.in.cpp new file mode 100644 index 00000000000..16e3afb108c --- /dev/null +++ b/ngraph/test/backend/shuffle_channels.in.cpp @@ -0,0 +1,192 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "runtime/backend.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/engine/test_engines.hpp" +#include "util/test_case.hpp" +#include "util/test_control.hpp" +#include "util/test_tools.hpp" + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; +using TestEngine = test::ENGINE_CLASS_NAME(${BACKEND_NAME}); + + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_simple) +{ + const auto data = make_shared(element::i32, Shape{1, 15, 2, 2}); + auto tested_op = make_shared(data, 1, 5); + auto function = make_shared(tested_op, ParameterVector{data}); + auto test_case = test::TestCase(function); + + std::vector input_data(60); + std::iota(std::begin(input_data), std::end(input_data), 0); + test_case.add_input(input_data); + + test_case.add_expected_output( + Shape{1, 15, 2, 2}, + {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_negative_axis) +{ + // In this test the output is the same as in shuffle_channels_simple but + // the axis value is negative and the C(channels) value is in a different dimension(0) of the + // shape + const auto data = make_shared(element::i32, Shape{15, 2, 1, 2}); + auto tested_op = make_shared(data, -4, 5); + auto function = make_shared(tested_op, ParameterVector{data}); + + auto test_case = test::TestCase(function); + + std::vector input_data(60); + std::iota(std::begin(input_data), std::end(input_data), 0); + test_case.add_input(input_data); + + test_case.add_expected_output( + Shape{15, 2, 1, 2}, + {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_float) +{ + const auto data = make_shared(element::f32, Shape{6, 1, 1, 1}); + auto tested_op = make_shared(data, 0, 2); + auto function = make_shared(tested_op, ParameterVector{data}); + + auto test_case = test::TestCase(function); + + test_case.add_input({0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); + test_case.add_expected_output(Shape{6, 1, 1, 1}, {0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_1d) +{ + Shape data_shape{15}; + const auto data = make_shared(element::i32, data_shape); + auto tested_op = make_shared(data, 0, 5); + auto function = make_shared(tested_op, ParameterVector{data}); + auto test_case = test::TestCase(function); + + std::vector input_data{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; + + test_case.add_input(input_data); + test_case.add_expected_output( + data_shape, + {0, 3, 6, 9, 12, + 1, 4, 7, 10, 13, + 2, 5, 8, 11, 14}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_2d) +{ + Shape data_shape{15, 4}; + const auto data = make_shared(element::i32, data_shape); + auto tested_op = make_shared(data, 0, 5); + auto function = make_shared(tested_op, ParameterVector{data}); + auto test_case = test::TestCase(function); + + std::vector input_data{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59}; + + test_case.add_input(input_data); + test_case.add_expected_output( + data_shape, + {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_3d) +{ + Shape data_shape{15, 2, 2}; + const auto data = make_shared(element::i32, data_shape); + auto tested_op = make_shared(data, 0, 5); + auto function = make_shared(tested_op, ParameterVector{data}); + auto test_case = test::TestCase(function); + + + std::vector input_data{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59}; + + test_case.add_input(input_data); + test_case.add_expected_output( + data_shape, + {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, shuffle_channels_5d) +{ + Shape data_shape{2, 2, 15, 2, 2}; + const auto data = make_shared(element::i32, data_shape); + auto tested_op = make_shared(data, 2, 5); + auto function = make_shared(tested_op, ParameterVector{data}); + auto test_case = test::TestCase(function); + + std::vector input_data{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59}; + + test_case.add_input(input_data); + test_case.add_expected_output( + data_shape, + {0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59, + + 0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59, + + 0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59, + + 0, 1, 2, 3, 12, 13, 14, 15, 24, 25, 26, 27, 36, 37, 38, 39, 48, 49, 50, 51, + 4, 5, 6, 7, 16, 17, 18, 19, 28, 29, 30, 31, 40, 41, 42, 43, 52, 53, 54, 55, + 8, 9, 10, 11, 20, 21, 22, 23, 32, 33, 34, 35, 44, 45, 46, 47, 56, 57, 58, 59}); + + test_case.run(); +} diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index 80f2421fb41..e76ac6f2b97 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -752,11 +752,6 @@ gemm_broadcast_axes_1_input_C scale_shift_no_broadcast scale_shift -# Cannot cast ngraph node ShuffleChannels to CNNLayer! -shuffle_channels_simple -shuffle_channels_negative_axis -shuffle_channels_float - # Detected op not belonging to opset1! onnx_model_quant_conv_linear onnx_model_quant_conv_linear_2d diff --git a/ngraph/test/type_prop/shuffle_channels.cpp b/ngraph/test/type_prop/shuffle_channels.cpp index 95cd2fd3cca..3f18ba50230 100644 --- a/ngraph/test/type_prop/shuffle_channels.cpp +++ b/ngraph/test/type_prop/shuffle_channels.cpp @@ -9,12 +9,116 @@ using namespace std; using namespace ngraph; +TEST(type_prop, shuffle_channels_default_4D) +{ + const auto data_input_shape = Shape{3, 9, 4, 5}; + const auto data = make_shared(element::f32, data_input_shape); + const auto shuffle_channels = make_shared(data); + + EXPECT_EQ(shuffle_channels->get_element_type(), element::f32); + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); +} + +TEST(type_prop, shuffle_channels_basic_4D) +{ + const auto data_input_shape = Shape{3, 9, 4, 5}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 1; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_element_type(), element::f32); + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); +} + +TEST(type_prop, shuffle_channels_dynamic_4D) +{ + const auto data_input_shape = PartialShape{Dimension::dynamic(), Dimension(3, 9), 4, Dimension(4, 15)}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 1; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_element_type(), element::f32); + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); +} + +TEST(type_prop, shuffle_channels_dynamic_fully) +{ + const auto data_input_shape = PartialShape::dynamic(); + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 1; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_element_type(), element::f32); + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); +} + +TEST(type_prop, shuffle_channels_ND_bigger) +{ + { + // 5D + const auto data_input_shape = Shape{2, 3, 9, 4, 5}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 2; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); + } + { + // 6D + const auto data_input_shape = Shape{6, 2, 3, 9, 4, 5}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 3; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); + } +} + +TEST(type_prop, shuffle_channels_ND_smaller) +{ + { + // 3D + const auto data_input_shape = Shape{5, 4, 9}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 2; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); + } + { + // 2D + const auto data_input_shape = Shape{9, 20}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 0; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); + } + { + // 1D + const auto data_input_shape = Shape{9}; + const auto data = make_shared(element::f32, data_input_shape); + const auto axis = 0; + const auto group = 3; + const auto shuffle_channels = make_shared(data, axis, group); + + EXPECT_EQ(shuffle_channels->get_output_partial_shape(0), data_input_shape); + } +} + TEST(type_prop, shuffle_channels_axis_validation) { try { const auto data = make_shared(element::f64, Shape{1, 2, 3, 4}); - const auto shuffle_channels = make_shared(data, -5, 5); + const auto shuffle_channels = make_shared(data, -5, 5); FAIL() << "ShuffleChannels validation did not work. Op node was created with incorrect " "params."; } @@ -30,7 +134,7 @@ TEST(type_prop, shuffle_channels_negative_axis_calculation) { const auto data = make_shared(element::f64, Shape{1, 2, 3, 4}); - const auto shuffle_channels = make_shared(data, -3, 2); + const auto shuffle_channels = make_shared(data, -3, 2); EXPECT_EQ(shuffle_channels->get_zero_based_axis(), 1); } @@ -40,7 +144,7 @@ TEST(type_prop, shuffle_channels_invalid_input_shape) try { const auto data = make_shared(element::f64, Shape{}); - const auto shuffle_channels = make_shared(data, 0, 1); + const auto shuffle_channels = make_shared(data, 0, 1); FAIL() << "ShuffleChannels validation did not work. Op node was created with incorrect " "params."; } @@ -56,7 +160,7 @@ TEST(type_prop, shuffle_channels_invalid_groups_value) try { const auto data = make_shared(element::f64, Shape{1, 2, 3, 15}); - const auto shuffle_channels = make_shared(data, -1, 2); + const auto shuffle_channels = make_shared(data, -1, 2); FAIL() << "ShuffleChannels validation did not work. Op node was created with incorrect " "params."; } diff --git a/ngraph/test/visitors/op/shuffle_channels.cpp b/ngraph/test/visitors/op/shuffle_channels.cpp index ae1a5867779..b9b36ba6d73 100644 --- a/ngraph/test/visitors/op/shuffle_channels.cpp +++ b/ngraph/test/visitors/op/shuffle_channels.cpp @@ -5,28 +5,28 @@ #include "gtest/gtest.h" #include "ngraph/ngraph.hpp" -#include "ngraph/op/util/attr_types.hpp" #include "ngraph/opsets/opset1.hpp" -#include "ngraph/opsets/opset3.hpp" -#include "ngraph/opsets/opset4.hpp" -#include "ngraph/opsets/opset5.hpp" #include "util/visitor.hpp" using namespace std; using namespace ngraph; using ngraph::test::NodeBuilder; -using ngraph::test::ValueMap; TEST(attributes, shuffle_channels_op) { - NodeBuilder::get_ops().register_factory(); + using ShuffleChannels = opset1::ShuffleChannels; + + NodeBuilder::get_ops().register_factory(); auto data = make_shared(element::i32, Shape{200}); auto axis = 0; auto groups = 2; - auto shuffle_channels = make_shared(data, axis, groups); + auto shuffle_channels = make_shared(data, axis, groups); NodeBuilder builder(shuffle_channels); - auto g_shuffle_channels = as_type_ptr(builder.create()); + auto g_shuffle_channels = as_type_ptr(builder.create()); + + const auto expected_attr_count = 2; + EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); EXPECT_EQ(g_shuffle_channels->get_axis(), shuffle_channels->get_axis()); EXPECT_EQ(g_shuffle_channels->get_group(), shuffle_channels->get_group()); From 6deec50b0b599568b93a8b725e2cef0fa292cd28 Mon Sep 17 00:00:00 2001 From: Patryk Elszkowski Date: Tue, 15 Jun 2021 06:07:18 +0200 Subject: [PATCH 03/43] Reshape OP: add SLT for special `-1` value in new shape dimensions (#5648) * add test for special `-1` value in new shape dimensions * add ticket with next steps --- .../single_layer_tests/reshape.cpp | 73 +++++++++++-------- .../serialization/single_layer/reshape.cpp | 8 +- .../single_layer_tests/reshape.cpp | 28 +++++-- .../single_layer_tests/reshape.cpp | 26 +++++-- .../single_layer/reshape.hpp | 66 ++++++++++++----- .../src/single_layer/reshape.cpp | 47 +++++++++++- 6 files changed, 182 insertions(+), 66 deletions(-) diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp index 85313f410d3..e0c986ad8b5 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/single_layer_tests/reshape.cpp @@ -2,43 +2,58 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "single_layer_tests/reshape.hpp" + #include -#include "single_layer_tests/reshape.hpp" #include "common_test_utils/test_constants.hpp" using namespace LayerTestsDefinitions; namespace { const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP32, }; -INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest, - ::testing::Combine( - ::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), - ::testing::Values(std::map({}))), - ReshapeLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P( + smoke_ReshapeCheckDynBatch, ReshapeLayerTestRevise, + ::testing::Combine( + ::testing::Values(true), ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + ::testing::Values(std::map({}))), + ReshapeLayerTestRevise::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest, - ::testing::Combine( - ::testing::Values(true), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({10, 10, 10, 10})), - ::testing::Values(std::vector({10, 0, 100})), - ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), - ::testing::Values(std::map({}))), - ReshapeLayerTest::getTestCaseName); -} // namespace \ No newline at end of file +INSTANTIATE_TEST_CASE_P( + smoke_ReshapeCheck, ReshapeLayerTestRevise, + ::testing::Combine( + ::testing::Values(true), ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({10, 10, 10, 10})), + ::testing::Values(std::vector({10, 0, 100})), + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + ::testing::Values(std::map({}))), + ReshapeLayerTestRevise::getTestCaseName); + +INSTANTIATE_TEST_CASE_P( + smoke_ReshapeCheckNegative, ReshapeLayerTestRevise, + ::testing::Combine( + ::testing::Values(true), ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({10, 10, 10, 10})), + ::testing::Values(std::vector({10, -1, 100})), + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + ::testing::Values(std::map({}))), + ReshapeLayerTestRevise::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/reshape.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/reshape.cpp index c7b60da690f..9ba9aec704b 100644 --- a/inference-engine/tests/functional/inference_engine/serialization/single_layer/reshape.cpp +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/reshape.cpp @@ -10,7 +10,7 @@ using namespace LayerTestsDefinitions; namespace { - TEST_P(ReshapeLayerTest, Serialize) { + TEST_P(ReshapeLayerTestRevise, Serialize) { Serialize(); } @@ -19,7 +19,7 @@ namespace { InferenceEngine::Precision::FP16 }; - INSTANTIATE_TEST_CASE_P(smoke_ReshapeSerialization, ReshapeLayerTest, + INSTANTIATE_TEST_CASE_P(smoke_ReshapeSerialization, ReshapeLayerTestRevise, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), @@ -28,8 +28,8 @@ namespace { ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(std::vector({30, 30, 30, 30})), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::Values(std::map({{CONFIG_KEY(DYN_BATCH_ENABLED), CONFIG_VALUE(YES)}}))), - ReshapeLayerTest::getTestCaseName); + ReshapeLayerTestRevise::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp index 0bdef642658..28419ec971f 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/reshape.cpp @@ -15,7 +15,7 @@ const std::vector netPrecisions = { InferenceEngine::Precision::FP16 }; -INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTestRevise, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), @@ -24,12 +24,12 @@ INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckDynBatch, ReshapeLayerTest, ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({30, 30, 30, 30})), - ::testing::Values(std::vector({30, 30, 30, 30})), + ::testing::Values(std::vector({30, 30, 30, 30})), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::Values(std::map({{CONFIG_KEY(DYN_BATCH_ENABLED), CONFIG_VALUE(YES)}}))), - ReshapeLayerTest::getTestCaseName); + ReshapeLayerTestRevise::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTestRevise, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), @@ -38,8 +38,22 @@ INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest, ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({10, 10, 10, 10})), - ::testing::Values(std::vector({10, 0, 100})), + ::testing::Values(std::vector({10, 0, 100})), ::testing::Values(CommonTestUtils::DEVICE_CPU), ::testing::Values(std::map({}))), - ReshapeLayerTest::getTestCaseName); -} // namespace \ No newline at end of file + ReshapeLayerTestRevise::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckNegative, ReshapeLayerTestRevise, + ::testing::Combine( + ::testing::Values(true), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({10, 10, 10, 10})), + ::testing::Values(std::vector({10, -1, 100})), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(std::map({}))), + ReshapeLayerTestRevise::getTestCaseName); +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp index a304c345776..84d30807341 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reshape.cpp @@ -16,7 +16,7 @@ const std::vector netPrecisions = { }; //TODO: Issue : - 28981 -INSTANTIATE_TEST_CASE_P(DISABLE_smoke_ReshapeCheckDynBatch, ReshapeLayerTest, +INSTANTIATE_TEST_CASE_P(DISABLE_smoke_ReshapeCheckDynBatch, ReshapeLayerTestRevise, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), @@ -25,12 +25,12 @@ INSTANTIATE_TEST_CASE_P(DISABLE_smoke_ReshapeCheckDynBatch, ReshapeLayerTest, ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({1, 16, 16, 16})), - ::testing::Values(std::vector({1, 0, 256})), + ::testing::Values(std::vector({1, 0, 256})), ::testing::Values(CommonTestUtils::DEVICE_GPU), ::testing::Values(std::map({{CONFIG_KEY(DYN_BATCH_ENABLED), CONFIG_VALUE(YES)}}))), - ReshapeLayerTest::getTestCaseName); + ReshapeLayerTestRevise::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTestRevise, ::testing::Combine( ::testing::Values(true), ::testing::ValuesIn(netPrecisions), @@ -39,8 +39,22 @@ INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheck, ReshapeLayerTest, ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(std::vector({10, 10, 10, 10})), - ::testing::Values(std::vector({10, 0, 100})), + ::testing::Values(std::vector({10, 0, 100})), ::testing::Values(CommonTestUtils::DEVICE_GPU), ::testing::Values(std::map({}))), - ReshapeLayerTest::getTestCaseName); + ReshapeLayerTestRevise::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ReshapeCheckNegative, ReshapeLayerTestRevise, + ::testing::Combine( + ::testing::Values(true), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(std::vector({10, 10, 10, 10})), + ::testing::Values(std::vector({10, -1, 100})), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::Values(std::map({}))), + ReshapeLayerTestRevise::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/reshape.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/reshape.hpp index d0acf57931d..9ddeeed509a 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/reshape.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/reshape.hpp @@ -4,36 +4,66 @@ #pragma once -#include -#include -#include #include +#include +#include +#include + #include "ngraph_functions/builders.hpp" #include "ngraph_functions/utils/ngraph_helpers.hpp" - #include "shared_test_classes/base/layer_test_utils.hpp" namespace LayerTestsDefinitions { -typedef std::tuple< - bool, // SpecialZero - InferenceEngine::Precision, // Network precision - InferenceEngine::Precision, // Input precision - InferenceEngine::Precision, // Output precision - InferenceEngine::Layout, // Input layout - InferenceEngine::Layout, // Output layout - std::vector, // Input shapes - std::vector, // OutForm Shapes - std::string, // Device name - std::map // Config -> reshapeParams; +//TODO: remove this alias when ticket 57975 is done - ticket: 57976 +typedef std::tuple, // Input shapes + std::vector, // OutForm Shapes + std::string, // Device name + std::map // Config + > + reshapeParams; + +//TODO: remove this class when ticket 57975 is done - ticket: 57976 class ReshapeLayerTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon { public: - static std::string getTestCaseName(testing::TestParamInfo obj); + static std::string getTestCaseName( + testing::TestParamInfo obj); protected: void SetUp() override; }; -} // namespace LayerTestsDefinitions \ No newline at end of file + +//TODO: use this alias in all dependencies - ticket: 57975 +typedef std::tuple, // Input shapes + std::vector, // OutForm Shapes + std::string, // Device name + std::map // Config + > + reshapeParamsRevise; + +//TODO: use this class in all dependencies - ticket: 57975 +class ReshapeLayerTestRevise + : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName( + testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/reshape.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/reshape.cpp index 3063b01b27b..708c40ebbe0 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/reshape.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/reshape.cpp @@ -5,7 +5,7 @@ #include "shared_test_classes/single_layer/reshape.hpp" namespace LayerTestsDefinitions { - std::string ReshapeLayerTest::getTestCaseName(testing::TestParamInfo obj) { +std::string ReshapeLayerTest::getTestCaseName(testing::TestParamInfo obj) { InferenceEngine::Precision netPrecision; InferenceEngine::Precision inPrc, outPrc; InferenceEngine::Layout inLayout, outLayout; @@ -44,4 +44,47 @@ void ReshapeLayerTest::SetUp() { ngraph::ResultVector results{std::make_shared(reshape)}; function = std::make_shared(results, paramsIn, "Reshape"); } -} // namespace LayerTestsDefinitions \ No newline at end of file + +std::string ReshapeLayerTestRevise::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::Precision inPrc, outPrc; + InferenceEngine::Layout inLayout, outLayout; + InferenceEngine::SizeVector inputShapes; + std::vector outFormShapes; + std::string targetDevice; + std::map config; + bool specialZero; + std::tie(specialZero, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outFormShapes, targetDevice, config) = obj.param; + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "OS=" << CommonTestUtils::vec2str(outFormShapes) << "_"; + result << "specialZero=" << specialZero << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "inPRC=" << inPrc.name() << "_"; + result << "outPRC=" << outPrc.name() << "_"; + result << "inL=" << inLayout << "_"; + result << "outL=" << outLayout << "_"; + result << "trgDev=" << targetDevice; + return result.str(); +} + +void ReshapeLayerTestRevise::SetUp() { + InferenceEngine::SizeVector inputShapes; + std::vector outFormShapes; + bool specialZero; + InferenceEngine::Precision netPrecision; + std::tie(specialZero, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outFormShapes, targetDevice, configuration) = + this->GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto paramsIn = ngraph::builder::makeParams(ngPrc, {inputShapes}); + auto paramIn = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(paramsIn)); + auto constNode = std::make_shared( + ngraph::element::Type_t::i64, ngraph::Shape{outFormShapes.size()}, outFormShapes); + auto reshape = std::dynamic_pointer_cast( + std::make_shared(paramIn[0], constNode, specialZero)); + ngraph::ResultVector results{std::make_shared(reshape)}; + function = std::make_shared(results, paramsIn, "Reshape"); +} + +} // namespace LayerTestsDefinitions From 772465da1ebd72f95298bf876e649e5057f4329d Mon Sep 17 00:00:00 2001 From: Szymon Durawa Date: Tue, 15 Jun 2021 06:08:10 +0200 Subject: [PATCH 04/43] Add output shape and output padding for Convolution Backprop SLTs. (#5576) * Create output shape for Convoution Backprop SLTs. * Add output_padding attribute to SLT scope. * Introduce SLT for Serializaton. * Introduce new test layer class ConvolutionBackpropLayerTest which contains output_padding attribute and output_shape input. Old one is deprecated, but cannot be removed due to kmb plugin dependency. * Add ConvolutionBackpropDataLayerTest into TEST_P. * ConvolutionBackpropDataLayerTest left as legacy class used by kmb_plugin. * Remove redundant variables. * Switch to new API for gpu SLTs. * Remove legacy API. * Introduce legacy API to match dependency for KMB and ARM plugins. * Create test cases for output_padding attribute. * Fixing smoke_Deconv tests. --- .../single_layer/convolution_backprop.cpp | 58 ++++++ .../convolution_backprop_data.cpp | 168 ++++++++++++++++-- .../convolution_backprop_data.cpp | 58 ++++-- .../convolution_backprop_data.cpp | 136 ++++++++++++-- .../convolution_backprop.hpp | 15 ++ .../convolution_backprop_data.hpp | 1 + .../single_layer/convolution_backprop.hpp | 49 +++++ .../convolution_backprop_data.hpp | 2 + .../src/single_layer/convolution_backprop.cpp | 71 ++++++++ .../convolution_backprop_data.cpp | 4 +- .../include/ngraph_functions/builders.hpp | 17 ++ .../src/convolution_backprop_data.cpp | 45 ++++- 12 files changed, 579 insertions(+), 45 deletions(-) create mode 100644 inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop.hpp create mode 100644 inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp diff --git a/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp b/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp new file mode 100644 index 00000000000..2f9383ad914 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/serialization/single_layer/convolution_backprop.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "shared_test_classes/single_layer/convolution_backprop.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +TEST_P(ConvolutionBackpropLayerTest, Serialize) { + Serialize(); +} + +const std::vector precisions = { + InferenceEngine::Precision::FP64, InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, InferenceEngine::Precision::BF16, + InferenceEngine::Precision::I8, InferenceEngine::Precision::I16, + InferenceEngine::Precision::I32, InferenceEngine::Precision::I64, + InferenceEngine::Precision::U8, InferenceEngine::Precision::U16, + InferenceEngine::Precision::U32, InferenceEngine::Precision::U64, +}; +const std::vector> kernels = {{3, 3}}; +const std::vector> strides = {{1, 1}}; +const std::vector> padBegins = {{0, 0}}; +const std::vector> padEnds = {{0, 0}}; +const std::vector> dilations = {{1, 1}}; +const std::vector> outPadding = {{}, {1, 1}}; +const std::vector numOutChannels = {8, 16}; +const std::vector pad_types = { + ngraph::op::PadType::EXPLICIT, ngraph::op::PadType::VALID, + ngraph::op::PadType::SAME_LOWER, ngraph::op::PadType::SAME_UPPER}; +const auto inputShapes = std::vector({1, 16, 20, 20}); +const std::vector> emptyOutputShape = {{}}; + +const auto convolutionBackpropData2DParams = ::testing::Combine( + ::testing::ValuesIn(kernels), ::testing::ValuesIn(strides), + ::testing::ValuesIn(padBegins), ::testing::ValuesIn(padEnds), + ::testing::ValuesIn(dilations), ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(pad_types), ::testing::ValuesIn(outPadding)); + +INSTANTIATE_TEST_CASE_P( + smoke_convolutionBackpropData2D_Serialization, ConvolutionBackpropLayerTest, + ::testing::Combine( + convolutionBackpropData2DParams, + ::testing::ValuesIn(precisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(inputShapes), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index a8b4c01497f..1a5f3885c93 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -4,7 +4,7 @@ #include -#include "single_layer_tests/convolution_backprop_data.hpp" +#include "single_layer_tests/convolution_backprop.hpp" #include "common_test_utils/test_constants.hpp" using namespace LayerTestsDefinitions; @@ -17,6 +17,8 @@ const std::vector netPrecisions = { }; const std::vector numOutChannels = {1, 5, 16}; +const std::vector> emptyOutputShape = {{}}; +const std::vector> emptyOutputPadding = {{}}; /* ============= 2D ConvolutionBackpropData ============= */ const std::vector> inputShapes2D = {{1, 3, 30, 30}, @@ -35,7 +37,8 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(kernels2D), @@ -44,10 +47,11 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID) + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropLayerTest, ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), @@ -56,10 +60,11 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convolu ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), @@ -68,8 +73,75 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convolutio ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); + +const std::vector> inputShape2D = {{1, 3, 9, 12}}; +const std::vector> outputShapes2D = {{6, 6}, {4, 9}}; + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_OutputShapeDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShape2D), + ::testing::ValuesIn(outputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +const std::vector> outputPadding2D = {{1, 1}, {2, 2}}; +const std::vector> testStrides2D = {{3, 3}}; + +const auto conv2DParams_ExplicitPadding_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(testStrides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(outputPadding2D) +); +const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(testStrides2D), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(outputPadding2D) +); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv2DParams_ExplicitPadding_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); /* ============= 3D ConvolutionBackpropData ============= */ const std::vector> inputShapes3D = {{1, 3, 10, 10, 10}, @@ -88,7 +160,8 @@ const auto conv3DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds3D), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); const auto conv3DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(kernels3D), @@ -97,10 +170,11 @@ const auto conv3DParams_AutoPadValid = ::testing::Combine( ::testing::Values(std::vector({0, 0, 0})), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID) + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropLayerTest, ::testing::Combine( conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions), @@ -109,10 +183,11 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convolu ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropLayerTest, ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions), @@ -121,7 +196,74 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convolutio ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); + +const std::vector> inputShape3D = {{1, 3, 10, 10, 10}}; +const std::vector> outputShapes3D = {{8, 8, 8}, {10, 10, 10}}; + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_OutputShapeDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv3DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShape3D), + ::testing::ValuesIn(outputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +const std::vector> outputPadding3D = {{1, 1, 1}, {2, 2, 2}}; +const std::vector> testStrides3D = {{3, 3, 3}}; + +const auto conv3DParams_ExplicitPadding_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(testStrides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(outputPadding3D) +); +const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(testStrides3D), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(outputPadding3D) +); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv3DParams_AutoPadValid_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv3DParams_ExplicitPadding_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp index 5fdcbef5747..f1d144f666e 100755 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp @@ -8,18 +8,18 @@ #include "shared_test_classes/base/layer_test_utils.hpp" #include "ngraph_functions/utils/ngraph_helpers.hpp" #include "ngraph_functions/builders.hpp" -#include +#include using namespace InferenceEngine; using namespace CPUTestUtils; namespace CPULayerTestsDefinitions { -using LayerTestsDefinitions::convBackpropDataSpecificParams; -using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet; +using LayerTestsDefinitions::convBackpropSpecificParams; +using LayerTestsDefinitions::convBackpropLayerTestParamsSet; typedef std::tuple< - convBackpropDataLayerTestParamsSet, + convBackpropLayerTestParamsSet, CPUSpecificParams, fusingSpecificParams, std::map > deconvLayerCPUTestParamsSet; @@ -28,14 +28,14 @@ class DeconvolutionLayerCPUTest : public testing::WithParamInterface obj) { - convBackpropDataLayerTestParamsSet basicParamsSet; + convBackpropLayerTestParamsSet basicParamsSet; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; std::ostringstream result; - result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo( + result << LayerTestsDefinitions::ConvolutionBackpropLayerTest::getTestCaseName(testing::TestParamInfo( basicParamsSet, 0)); result << CPUTestsBase::getTestCaseName(cpuParams); @@ -52,7 +52,7 @@ public: } protected: void SetUp() override { - convBackpropDataLayerTestParamsSet basicParamsSet; + convBackpropLayerTestParamsSet basicParamsSet; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; @@ -63,10 +63,11 @@ protected: std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; std::tie(postOpMgrPtr, fusedOps) = fusingParams; - convBackpropDataSpecificParams convParams; + convBackpropSpecificParams convParams; std::vector inputShape; + std::vector outputShape; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = basicParamsSet; + std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = basicParamsSet; if (inPrc == Precision::UNSPECIFIED) { selectedType += std::string("_") + Precision(Precision::FP32).name(); @@ -76,16 +77,22 @@ protected: ngraph::op::PadType padType; InferenceEngine::SizeVector kernel, stride, dilation; - std::vector padBegin, padEnd; + std::vector padBegin, padEnd, outPadding; size_t convOutChannels; - std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convParams; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape }); auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); auto deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), ngPrc, kernel, stride, padBegin, - padEnd, dilation, padType, convOutChannels); + padEnd, dilation, padType, convOutChannels, false, outPadding); + + if (!outputShape.empty()) { + auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); + deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), outShape, ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels); + } function = makeNgraphFunction(ngPrc, inputParams, deconvolutionNode, "convolutionBackpropData"); } @@ -108,6 +115,8 @@ const std::vector fusingParamsSet{ const std::map cpuEmptyPluginConfig; const std::map cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } }; +const std::vector emptyOutputShape = { {} }; +const std::vector> emptyOutputPadding = { {} }; /* ============= Deconvolution params (planar layout) ============= */ const SizeVector numOutChannels_Planar = { 6 }; @@ -139,7 +148,8 @@ const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine( ::testing::ValuesIn(padEnds2d), ::testing::ValuesIn(dilations2d), ::testing::ValuesIn(numOutChannels_Planar), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest, @@ -152,6 +162,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 12, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), ::testing::ValuesIn(fusingParamsSet), @@ -168,6 +179,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 12, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), ::testing::ValuesIn(fusingParamsSet), @@ -182,7 +194,8 @@ const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine( ::testing::ValuesIn(padEnds3d), ::testing::ValuesIn(dilations3d), ::testing::ValuesIn(numOutChannels_Planar), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest, @@ -195,6 +208,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), ::testing::ValuesIn(fusingParamsSet), @@ -211,6 +225,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), ::testing::ValuesIn(fusingParamsSet), @@ -225,7 +240,8 @@ const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine( ::testing::ValuesIn(padEnds2d), ::testing::ValuesIn(dilations2d), ::testing::ValuesIn(numOutChannels_Blocked), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest, @@ -238,6 +254,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), ::testing::ValuesIn(fusingParamsSet), @@ -254,6 +271,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), ::testing::ValuesIn(fusingParamsSet), @@ -268,7 +286,8 @@ const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine( ::testing::ValuesIn(padEnds3d), ::testing::ValuesIn(dilations3d), ::testing::ValuesIn(numOutChannels_Blocked), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest, @@ -281,6 +300,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 67, 7, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), ::testing::ValuesIn(fusingParamsSet), @@ -297,6 +317,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 67, 7, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), ::testing::ValuesIn(fusingParamsSet), @@ -312,7 +333,8 @@ const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::Values(SizeVector({1, 1})), ::testing::ValuesIn(numOutChannels_Blocked), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest, @@ -325,6 +347,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), ::testing::ValuesIn(fusingParamsSet), @@ -341,6 +364,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_BF16, DeconvolutionLayerCPUTest, ::testing::Values(Layout::ANY), ::testing::Values(Layout::ANY), ::testing::Values(std::vector({ 2, 67, 7, 7 })), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), ::testing::ValuesIn(fusingParamsSet), diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp index f18346cde9f..f404d932bc8 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -4,7 +4,7 @@ #include -#include "single_layer_tests/convolution_backprop_data.hpp" +#include "single_layer_tests/convolution_backprop.hpp" #include "common_test_utils/test_constants.hpp" using namespace LayerTestsDefinitions; @@ -17,6 +17,8 @@ const std::vector netPrecisions = { }; const std::vector numOutChannels = {1, 5, 16}; +const std::vector> emptyOutputShape = {{}}; +const std::vector> emptyOutputPadding = {{}}; /* ============= 2D ConvolutionBackpropData ============= */ const std::vector netPrecisions2D = { @@ -40,7 +42,8 @@ const auto conv2DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(kernels2D), @@ -49,10 +52,11 @@ const auto conv2DParams_AutoPadValid = ::testing::Combine( ::testing::Values(std::vector({0, 0})), ::testing::ValuesIn(dilations2D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID) + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropLayerTest, ::testing::Combine( conv2DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions2D), @@ -61,10 +65,11 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding, Convolu ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropLayerTest, ::testing::Combine( conv2DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions2D), @@ -73,8 +78,59 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadValid, Convolutio ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); + +const std::vector> outputPadding2D = {{1, 1}, {2, 2}}; +const std::vector> testStrides2D = {{3, 3}}; + +const auto conv2DParams_ExplicitPadding_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(testStrides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(outputPadding2D) +); +const auto conv2DParams_AutoPadValid_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(testStrides2D), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(outputPadding2D) +); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv2DParams_ExplicitPadding_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes2D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); /* ============= 3D ConvolutionBackpropData ============= */ const std::vector netPrecisions3D = { @@ -96,7 +152,8 @@ const auto conv3DParams_ExplicitPadding = ::testing::Combine( ::testing::ValuesIn(padEnds3D), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::EXPLICIT) + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding) ); const auto conv3DParams_AutoPadValid = ::testing::Combine( ::testing::ValuesIn(kernels3D), @@ -105,10 +162,11 @@ const auto conv3DParams_AutoPadValid = ::testing::Combine( ::testing::Values(std::vector({0, 0, 0})), ::testing::ValuesIn(dilations3D), ::testing::ValuesIn(numOutChannels), - ::testing::Values(ngraph::op::PadType::VALID) + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(emptyOutputPadding) ); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropLayerTest, ::testing::Combine( conv3DParams_ExplicitPadding, ::testing::ValuesIn(netPrecisions3D), @@ -117,10 +175,11 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding, Convolu ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); -INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest, +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropLayerTest, ::testing::Combine( conv3DParams_AutoPadValid, ::testing::ValuesIn(netPrecisions3D), @@ -129,7 +188,58 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadValid, Convolutio ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), ::testing::Values(CommonTestUtils::DEVICE_GPU)), - ConvolutionBackpropDataLayerTest::getTestCaseName); + ConvolutionBackpropLayerTest::getTestCaseName); + +const std::vector> outputPadding3D = {{1, 1, 1}, {2, 2, 2}}; +const std::vector> testStrides3D = {{3, 3, 3}}; + +const auto conv3DParams_ExplicitPadding_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(testStrides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(outputPadding3D) +); +const auto conv3DParams_AutoPadValid_output_padding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(testStrides3D), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID), + ::testing::ValuesIn(outputPadding3D) +); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_ExplicitPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv3DParams_AutoPadValid_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_ConvolutionBackpropData3D_AutoPadding_OutputPaddingDefined, ConvolutionBackpropLayerTest, + ::testing::Combine( + conv3DParams_ExplicitPadding_output_padding, + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::Values(InferenceEngine::Layout::ANY), + ::testing::ValuesIn(inputShapes3D), + ::testing::ValuesIn(emptyOutputShape), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropLayerTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop.hpp new file mode 100644 index 00000000000..45563d86a34 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/single_layer/convolution_backprop.hpp" + +namespace LayerTestsDefinitions { + +TEST_P(ConvolutionBackpropLayerTest, CompareWithRefs) { + Run(); +} + +} diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp index 67fb0c56efd..3b2947db121 100644 --- a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +// DEPRECATED, can't be removed currently due to arm and kmb-plugin dependency (#55568) #pragma once #include "shared_test_classes/single_layer/convolution_backprop_data.hpp" diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop.hpp new file mode 100644 index 00000000000..794782396da --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::SizeVector, // Kernel size + InferenceEngine::SizeVector, // Strides + std::vector, // Pad begin + std::vector, // Pad end + InferenceEngine::SizeVector, // Dilation + size_t, // Num out channels + ngraph::op::PadType, // Padding type + std::vector // Output padding +> convBackpropSpecificParams; +typedef std::tuple< + convBackpropSpecificParams, + InferenceEngine::Precision, // Net precision + InferenceEngine::Precision, // Input precision + InferenceEngine::Precision, // Output precision + InferenceEngine::Layout, // Input layout + InferenceEngine::Layout, // Output layout + InferenceEngine::SizeVector, // Input shapes + InferenceEngine::SizeVector, // Output shapes + LayerTestsUtils::TargetDevice // Device name +> convBackpropLayerTestParamsSet; + +class ConvolutionBackpropLayerTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp index ecfd6e4f1f7..9aeb9a1a2be 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/convolution_backprop_data.hpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +// DEPRECATED, can't be removed currently due to arm and kmb-plugin dependency (#55568) + #pragma once #include diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp new file mode 100644 index 00000000000..55aae5e0a21 --- /dev/null +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/convolution_backprop.hpp" + +namespace LayerTestsDefinitions { + +std::string ConvolutionBackpropLayerTest::getTestCaseName(testing::TestParamInfo obj) { + convBackpropSpecificParams convBackpropDataParams; + InferenceEngine::Precision netPrecision; + InferenceEngine::Precision inPrc, outPrc; + InferenceEngine::Layout inLayout, outLayout; + InferenceEngine::SizeVector inputShapes; + InferenceEngine::SizeVector outputShapes; + std::string targetDevice; + std::tie(convBackpropDataParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShapes, outputShapes, targetDevice) = obj.param; + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd, outPadding; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convBackpropDataParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "OS=" << CommonTestUtils::vec2str(outputShapes) << "_"; + result << "K" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_"; + result << "O=" << convOutChannels << "_"; + result << "AP=" << padType << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "inPRC=" << inPrc.name() << "_"; + result << "outPRC=" << outPrc.name() << "_"; + result << "inL=" << inLayout << "_"; + result << "outL=" << outLayout << "_"; + result << "trgDev=" << targetDevice; + return result.str(); +} + +void ConvolutionBackpropLayerTest::SetUp() { + convBackpropSpecificParams convBackpropDataParams; + std::vector inputShape; + std::vector outputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(convBackpropDataParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = this->GetParam(); + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd, outPadding; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convBackpropDataParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + auto convBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, false, outPadding)); + if (!outputShape.empty()) { + auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); + convBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels)); + } + ngraph::ResultVector results{std::make_shared(convBackpropData)}; + function = std::make_shared(results, params, "convolutionBackpropData"); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp index c6730a3aaec..f2656a3c2ab 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/convolution_backprop_data.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +// DEPRECATED, can't be removed currently due to arm and kmb-plugin dependency (#55568) + #include "shared_test_classes/single_layer/convolution_backprop_data.hpp" namespace LayerTestsDefinitions { @@ -54,7 +56,7 @@ void ConvolutionBackpropDataLayerTest::SetUp() { ngraph::helpers::castOps2Nodes(params)); auto convBackpropData = std::dynamic_pointer_cast( ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, - padEnd, dilation, padType, convOutChannels)); + padEnd, dilation, padType, convOutChannels)); ngraph::ResultVector results{std::make_shared(convBackpropData)}; function = std::make_shared(results, params, "convolutionBackpropData"); } diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp index 802535430cd..1643f31f761 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp @@ -125,6 +125,7 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &outputPadding = {}, const std::vector &filterWeights = {}, const std::vector &biasesWeights = {}); @@ -137,6 +138,22 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &dilations, const op::PadType &autoPad, bool addBiases = false, + const std::vector &outputPadding = {}, + const std::vector &biasesWeights = {}); + +std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, + const ngraph::Output &outputShape, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + bool addBiases = false, + const std::vector &outputPadding = {}, + const std::vector &filterWeights = {}, const std::vector &biasesWeights = {}); std::shared_ptr makeCTCGreedyDecoder( diff --git a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp index 0edf339ce7f..91d6c0fc085 100644 --- a/inference-engine/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp +++ b/inference-engine/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp @@ -20,6 +20,7 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in const op::PadType &autoPad, size_t numOutChannels, bool addBiases, + const std::vector &outputPadding, const std::vector &filterWeights, const std::vector &biasesWeights) { bool randomFilterWeights = filterWeights.empty(); @@ -28,7 +29,7 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights); - return makeConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights); + return makeConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, outputPadding, biasesWeights); } std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, @@ -40,9 +41,51 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in const std::vector &dilations, const op::PadType &autoPad, bool addBiases, + const std::vector &outputPadding, const std::vector &biasesWeights) { auto deconv = std::make_shared(in, weights, strides, padsBegin, padsEnd, dilations, autoPad); + if (!outputPadding.empty()) { + deconv = std::make_shared(in, weights, strides, padsBegin, padsEnd, dilations, autoPad, outputPadding); + } + + if (addBiases) { + bool randomBiases = biasesWeights.empty(); + auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases); + auto add = std::make_shared(deconv, biasesWeightsNode); + return add; + } else { + return deconv; + } +} + +std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, + const ngraph::Output &outputShape, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + bool addBiases, + const std::vector &outputPadding, + const std::vector &filterWeights, + const std::vector &biasesWeights) { + bool randomFilterWeights = filterWeights.empty(); + auto shape = in.get_shape(); + std::vector filterWeightsShape = {shape[1], numOutChannels}; + filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); + auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights); + + auto deconv = std::make_shared(in, filterWeightsNode, outputShape, strides, padsBegin, padsEnd, dilations, autoPad); + + if (!outputPadding.empty()) { + deconv = std::make_shared(in, filterWeightsNode, outputShape, strides, padsBegin, + padsEnd, dilations, autoPad, outputPadding); + } + if (addBiases) { bool randomBiases = biasesWeights.empty(); auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases); From b4a4c9110c4c5e629277dae02d4ab8d41670edcc Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Tue, 15 Jun 2021 06:18:57 +0200 Subject: [PATCH 05/43] Revise Reference Implementations ReduceLp operations (#6065) * Revise reference implementation for ReduceL1 operation * Revise reference implementation for ReduceL2 operation * Move op_eval tests to backend unit tests * Added minor changes * Replace CoordinateTransform for CoordinateTransformBasic * Added constant expression to set keep_dims as false * Add const qualifier to local variables * Use rank from host tensor to normalize axes --- .../ngraph/runtime/reference/reduce_l1.hpp | 29 ++++----- .../ngraph/runtime/reference/reduce_l2.hpp | 37 ++++++----- ngraph/core/src/op/reduce_l1.cpp | 12 +++- ngraph/core/src/op/reduce_l2.cpp | 13 +++- ngraph/test/CMakeLists.txt | 4 +- ngraph/test/backend/reduce_l1.in.cpp | 61 +++++++++++++++++++ ngraph/test/backend/reduce_l2.in.cpp | 61 +++++++++++++++++++ ngraph/test/op_eval/reduce_l1.cpp | 59 ------------------ ngraph/test/op_eval/reduce_l2.cpp | 61 ------------------- 9 files changed, 176 insertions(+), 161 deletions(-) create mode 100644 ngraph/test/backend/reduce_l1.in.cpp create mode 100644 ngraph/test/backend/reduce_l2.in.cpp delete mode 100644 ngraph/test/op_eval/reduce_l1.cpp delete mode 100644 ngraph/test/op_eval/reduce_l2.cpp diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp index 86eac8ceb26..83a3fbce98c 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l1.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "ngraph/coordinate_transform.hpp" #include "ngraph/shape_util.hpp" @@ -19,27 +20,27 @@ namespace ngraph void reduce_l1(const T* arg, T* out, const Shape& in_shape, - const AxisSet& reduction_axes, - bool keep_dims) + const AxisSet& reduction_axes) { - auto out_shape = reduce(in_shape, reduction_axes, keep_dims); - CoordinateTransform output_transform(out_shape); + constexpr bool dont_keep_dims_in_output = false; + const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output); + std::fill(out, out + shape_size(out_shape), 0); - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = 0; - } - - CoordinateTransform input_transform(in_shape); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + CoordinateTransformBasic input_transform(in_shape); for (const Coordinate& input_coord : input_transform) { - Coordinate output_coord = reduce(input_coord, reduction_axes, keep_dims); + const Coordinate output_coord = + reduce(input_coord, reduction_axes, dont_keep_dims_in_output); - size_t output_index = output_transform.index(output_coord); + const size_t in_idx = std::inner_product( + input_coord.begin(), input_coord.end(), in_strides.begin(), 0); + const size_t out_idx = std::inner_product( + output_coord.begin(), output_coord.end(), out_strides.begin(), 0); - out[output_index] = - out[output_index] + std::abs(arg[input_transform.index(input_coord)]); + out[out_idx] = out[out_idx] + std::abs(arg[in_idx]); } } } // namespace reference diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp index b7ae96586b2..aeb4eecbe0f 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/reduce_l2.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "ngraph/coordinate_transform.hpp" #include "ngraph/shape_util.hpp" @@ -19,34 +20,30 @@ namespace ngraph void reduce_l2(const T* arg, T* out, const Shape& in_shape, - const AxisSet& reduction_axes, - bool keep_dims) + const AxisSet& reduction_axes) { - auto out_shape = reduce(in_shape, reduction_axes, keep_dims); - CoordinateTransform output_transform(out_shape); + constexpr bool dont_keep_dims_in_output = false; + const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output); + std::fill(out, out + shape_size(out_shape), 0); - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = 0; - } - - CoordinateTransform input_transform(in_shape); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + CoordinateTransformBasic input_transform(in_shape); for (const Coordinate& input_coord : input_transform) { - Coordinate output_coord = reduce(input_coord, reduction_axes, keep_dims); + const Coordinate output_coord = + reduce(input_coord, reduction_axes, dont_keep_dims_in_output); - size_t output_index = output_transform.index(output_coord); + const size_t in_idx = std::inner_product( + input_coord.begin(), input_coord.end(), in_strides.begin(), 0); + const size_t out_idx = std::inner_product( + output_coord.begin(), output_coord.end(), out_strides.begin(), 0); - out[output_index] = - out[output_index] + arg[input_transform.index(input_coord)] * - arg[input_transform.index(input_coord)]; - } - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = - sqrt(out[output_transform.index(output_coord)]); + out[out_idx] = out[out_idx] + arg[in_idx] * arg[in_idx]; } + std::transform( + out, out + shape_size(out_shape), out, [](T elem) { return sqrt(elem); }); } } // namespace reference } // namespace runtime diff --git a/ngraph/core/src/op/reduce_l1.cpp b/ngraph/core/src/op/reduce_l1.cpp index 29de7e4e03f..02a336ac60d 100644 --- a/ngraph/core/src/op/reduce_l1.cpp +++ b/ngraph/core/src/op/reduce_l1.cpp @@ -3,8 +3,10 @@ // #include "ngraph/op/reduce_l1.hpp" +#include #include "itt.hpp" #include "ngraph/graph_util.hpp" +#include "ngraph/op/util/evaluate_helpers.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/reduce_l1.hpp" #include "ngraph/shape_util.hpp" @@ -44,7 +46,7 @@ namespace reduce_l1 { out->set_shape(reduce(arg->get_shape(), axes, keep_dims)); runtime::reference::reduce_l1( - arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes, keep_dims); + arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes); return true; } @@ -71,7 +73,13 @@ bool op::v4::ReduceL1::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { NGRAPH_OP_SCOPE(v4_ReduceL1_evaluate); - return reduce_l1::evaluate_sum(inputs[0], outputs[0], get_reduction_axes(), get_keep_dims()); + NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2)); + NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1)); + + const auto reduction_axes = get_normalized_axes_from_tensor( + inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name()); + + return reduce_l1::evaluate_sum(inputs[0], outputs[0], reduction_axes, get_keep_dims()); } bool op::v4::ReduceL1::has_evaluate() const diff --git a/ngraph/core/src/op/reduce_l2.cpp b/ngraph/core/src/op/reduce_l2.cpp index e3ee81b1875..7ffecb90377 100644 --- a/ngraph/core/src/op/reduce_l2.cpp +++ b/ngraph/core/src/op/reduce_l2.cpp @@ -3,8 +3,10 @@ // #include "ngraph/op/reduce_l2.hpp" +#include #include "itt.hpp" #include "ngraph/graph_util.hpp" +#include "ngraph/op/util/evaluate_helpers.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/reduce_l2.hpp" #include "ngraph/shape_util.hpp" @@ -44,7 +46,7 @@ namespace reduce_l2 { out->set_shape(reduce(arg->get_shape(), axes, keep_dims)); runtime::reference::reduce_l2( - arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes, keep_dims); + arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes); return true; } @@ -69,8 +71,13 @@ bool op::v4::ReduceL2::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { NGRAPH_OP_SCOPE(v4_ReduceL2_evaluate); - return reduce_l2::evaluate_reduce_l2( - inputs[0], outputs[0], get_reduction_axes(), get_keep_dims()); + NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2)); + NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1)); + + const auto reduction_axes = get_normalized_axes_from_tensor( + inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name()); + + return reduce_l2::evaluate_reduce_l2(inputs[0], outputs[0], reduction_axes, get_keep_dims()); } bool op::v4::ReduceL2::has_evaluate() const diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 2f85b09fc60..77903f5fc25 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -71,8 +71,6 @@ set(SRC op_eval/memory.cpp op_eval/mish.cpp op_eval/non_zero.cpp - op_eval/reduce_l1.cpp - op_eval/reduce_l2.cpp op_eval/reduce_prod.cpp op_eval/reduce_sum.cpp op_eval/roi_align.cpp @@ -423,6 +421,8 @@ set(MULTI_TEST_SRC backend/psroi_pooling.in.cpp backend/range.in.cpp backend/recurrent_cells.in.cpp + backend/reduce_l1.in.cpp + backend/reduce_l2.in.cpp backend/reduce_max.in.cpp backend/reduce_mean.in.cpp backend/reduce_min.in.cpp diff --git a/ngraph/test/backend/reduce_l1.in.cpp b/ngraph/test/backend/reduce_l1.in.cpp new file mode 100644 index 00000000000..d421ebc443d --- /dev/null +++ b/ngraph/test/backend/reduce_l1.in.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/test_control.hpp" +#include "util/test_tools.hpp" + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; + +NGRAPH_TEST(${BACKEND_NAME}, reduce_l1_one_axis_keep_dims) +{ + auto data = make_shared(element::f32, Shape{3, 2, 2}); + auto axes = op::Constant::create(element::i32, Shape{1}, {2}); + auto reduce_l1 = make_shared(data, axes, true); + auto f = make_shared(OutputVector{reduce_l1}, ParameterVector{data}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create tensors for input/output + std::vector input{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + std::vector expected_result{3.0, 7.0, 11.0, 15.0, 19.0, 23.0}; + + auto data_tensor = backend->create_tensor(element::f32, Shape{3, 2, 2}); + copy_data(data_tensor, input); + + auto result_tensor = backend->create_tensor(element::f32, Shape{3, 2, 1}); + + auto handle = backend->compile(f); + handle->call_with_validate({result_tensor}, {data_tensor}); + EXPECT_TRUE(test::all_close_f((expected_result), read_vector(result_tensor))); +} + +NGRAPH_TEST(${BACKEND_NAME}, reduce_l1_one_axis_do_not_keep_dims) +{ + auto data = make_shared(element::f32, Shape{3, 2, 2}); + auto axes = op::Constant::create(element::i32, Shape{1}, {2}); + auto reduce_l1 = make_shared(data, axes, false); + auto f = make_shared(OutputVector{reduce_l1}, ParameterVector{data}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + // Create tensors for input/output + std::vector input{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + std::vector expected_result{3.0, 7.0, 11.0, 15.0, 19.0, 23.0}; + + auto data_tensor = backend->create_tensor(element::f32, Shape{3, 2, 2}); + copy_data(data_tensor, input); + + auto result_tensor = backend->create_tensor(element::f32, Shape{3, 2}); + + auto handle = backend->compile(f); + handle->call_with_validate({result_tensor}, {data_tensor}); + EXPECT_TRUE(test::all_close_f((expected_result), read_vector(result_tensor))); +} diff --git a/ngraph/test/backend/reduce_l2.in.cpp b/ngraph/test/backend/reduce_l2.in.cpp new file mode 100644 index 00000000000..a1cd3eebab6 --- /dev/null +++ b/ngraph/test/backend/reduce_l2.in.cpp @@ -0,0 +1,61 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/test_control.hpp" +#include "util/test_tools.hpp" + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; + +NGRAPH_TEST(${BACKEND_NAME}, reduce_l2_one_axis_keep_dims) +{ + auto data = make_shared(element::f32, Shape{3, 2, 2}); + auto axes = op::Constant::create(element::i32, Shape{1}, {2}); + auto reduce_l2 = make_shared(data, axes, true); + auto f = make_shared(OutputVector{reduce_l2}, ParameterVector{data}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + std::vector input{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + std::vector expected_result{ + 2.23606798, 5.0, 7.81024968, 10.63014581, 13.45362405, 16.2788206}; + + auto data_tensor = backend->create_tensor(element::f32, Shape{3, 2, 2}); + copy_data(data_tensor, input); + + auto result_tensor = backend->create_tensor(element::f32, Shape{3, 2, 1}); + + auto handle = backend->compile(f); + handle->call_with_validate({result_tensor}, {data_tensor}); + EXPECT_TRUE(test::all_close_f((expected_result), read_vector(result_tensor))); +} + +NGRAPH_TEST(${BACKEND_NAME}, reduce_l2_one_axis_do_not_keep_dims) +{ + auto data = make_shared(element::f32, Shape{3, 2, 2}); + auto axes = op::Constant::create(element::i32, Shape{1}, {2}); + auto reduce_l2 = make_shared(data, axes, false); + auto f = make_shared(OutputVector{reduce_l2}, ParameterVector{data}); + + auto backend = runtime::Backend::create("${BACKEND_NAME}"); + + std::vector input{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + std::vector expected_result{ + 2.23606798, 5.0, 7.81024968, 10.63014581, 13.45362405, 16.2788206}; + + auto data_tensor = backend->create_tensor(element::f32, Shape{3, 2, 2}); + copy_data(data_tensor, input); + + auto result_tensor = backend->create_tensor(element::f32, Shape{3, 2}); + + auto handle = backend->compile(f); + handle->call_with_validate({result_tensor}, {data_tensor}); + EXPECT_TRUE(test::all_close_f((expected_result), read_vector(result_tensor))); +} diff --git a/ngraph/test/op_eval/reduce_l1.cpp b/ngraph/test/op_eval/reduce_l1.cpp deleted file mode 100644 index 0f64ee495bf..00000000000 --- a/ngraph/test/op_eval/reduce_l1.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -#include "gtest/gtest.h" - -#include "ngraph/opsets/opset4.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" -#include "runtime/backend.hpp" -#include "util/test_tools.hpp" - -using namespace std; -using namespace ngraph; - -TEST(op_eval, reduce_l1_one_axis_keep_dims) -{ - auto data = make_shared(element::f32, Shape{3, 2, 2}); - auto axes = opset4::Constant::create(element::i32, Shape{1}, {2}); - auto reduce = make_shared(data, axes, true); - auto fun = make_shared(OutputVector{reduce}, ParameterVector{data}); - - std::vector inputs{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; - std::vector expected_result{3.0, 7.0, 11.0, 15.0, 19.0, 23.0}; - - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{3, 2, 2}, inputs), - make_host_tensor(Shape{1}, {2})})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({3, 2, 1})}); - auto result_data = read_vector(result); - for (size_t i = 0; i < expected_result.size(); i++) - EXPECT_NEAR(result_data[i], expected_result[i], 0.000001); -} - -TEST(op_eval, reduce_l1_one_axis_do_not_keep_dims) -{ - auto data = make_shared(element::f32, Shape{3, 2, 2}); - auto axes = opset4::Constant::create(element::i32, Shape{1}, {2}); - auto reduce = make_shared(data, axes, false); - auto fun = make_shared(OutputVector{reduce}, ParameterVector{data}); - - std::vector inputs{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; - std::vector expected_result{3.0, 7.0, 11.0, 15.0, 19.0, 23.0}; - - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{3, 2, 2}, inputs), - make_host_tensor(Shape{1}, {2})})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({3, 2})}); - auto result_data = read_vector(result); - for (size_t i = 0; i < expected_result.size(); i++) - EXPECT_NEAR(result_data[i], expected_result[i], 0.000001); -} diff --git a/ngraph/test/op_eval/reduce_l2.cpp b/ngraph/test/op_eval/reduce_l2.cpp deleted file mode 100644 index 7aab1907c4a..00000000000 --- a/ngraph/test/op_eval/reduce_l2.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -#include "gtest/gtest.h" - -#include "ngraph/opsets/opset4.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" -#include "runtime/backend.hpp" -#include "util/test_tools.hpp" - -using namespace std; -using namespace ngraph; - -TEST(op_eval, reduce_l2_one_axis_keep_dims) -{ - auto data = make_shared(element::f32, Shape{3, 2, 2}); - auto axes = opset4::Constant::create(element::i32, Shape{1}, {2}); - auto reduce = make_shared(data, axes, true); - auto fun = make_shared(OutputVector{reduce}, ParameterVector{data}); - - std::vector inputs{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; - std::vector expected_result{ - 2.23606798, 5.0, 7.81024968, 10.63014581, 13.45362405, 16.2788206}; - - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{3, 2, 2}, inputs), - make_host_tensor(Shape{1}, {2})})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({3, 2, 1})}); - auto result_data = read_vector(result); - for (size_t i = 0; i < expected_result.size(); i++) - EXPECT_NEAR(result_data[i], expected_result[i], 0.000001); -} - -TEST(op_eval, reduce_l2_one_axis_do_not_keep_dims) -{ - auto data = make_shared(element::f32, Shape{3, 2, 2}); - auto axes = opset4::Constant::create(element::i32, Shape{1}, {2}); - auto reduce = make_shared(data, axes, false); - auto fun = make_shared(OutputVector{reduce}, ParameterVector{data}); - - std::vector inputs{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; - std::vector expected_result{ - 2.23606798, 5.0, 7.81024968, 10.63014581, 13.45362405, 16.2788206}; - - auto result = make_shared(); - ASSERT_TRUE(fun->evaluate({result}, - {make_host_tensor(Shape{3, 2, 2}, inputs), - make_host_tensor(Shape{1}, {2})})); - EXPECT_EQ(result->get_element_type(), element::f32); - EXPECT_EQ(result->get_shape(), Shape{std::vector({3, 2})}); - auto result_data = read_vector(result); - for (size_t i = 0; i < expected_result.size(); i++) - EXPECT_NEAR(result_data[i], expected_result[i], 0.000001); -} From 766d011b06af935f5e2efcb5411bf848856e9029 Mon Sep 17 00:00:00 2001 From: Patryk Elszkowski Date: Tue, 15 Jun 2021 06:25:19 +0200 Subject: [PATCH 06/43] reshape reference implementation - new implementation (#5559) --- .../ngraph/runtime/opt_kernel/reshape.hpp | 1 - .../ngraph/runtime/reference/reshape.hpp | 6 +- .../src/runtime/opt_kernel/reshape.cpp | 18 +- .../src/runtime/reference/reshape.cpp | 91 ++++--- ngraph/test/CMakeLists.txt | 1 + ngraph/test/reshape_opt_kernel.cpp | 232 ++++++++++++++++++ ngraph/test/visitors/op/reshape.cpp | 4 + 7 files changed, 310 insertions(+), 43 deletions(-) create mode 100644 ngraph/test/reshape_opt_kernel.cpp diff --git a/ngraph/core/reference/include/ngraph/runtime/opt_kernel/reshape.hpp b/ngraph/core/reference/include/ngraph/runtime/opt_kernel/reshape.hpp index 30ba487ecc6..27d960ee6db 100644 --- a/ngraph/core/reference/include/ngraph/runtime/opt_kernel/reshape.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/opt_kernel/reshape.hpp @@ -5,7 +5,6 @@ #pragma once #include "ngraph/axis_vector.hpp" -#include "ngraph/runtime/reference/reshape.hpp" #include "ngraph/shape.hpp" namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/reshape.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/reshape.hpp index d6696b8feeb..ee1c2f998ed 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/reshape.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/reshape.hpp @@ -4,12 +4,8 @@ #pragma once -#include - #include "ngraph/axis_vector.hpp" -#include "ngraph/check.hpp" -#include "ngraph/coordinate_transform.hpp" -#include "ngraph/type/element_type.hpp" +#include "ngraph/shape.hpp" namespace ngraph { diff --git a/ngraph/core/reference/src/runtime/opt_kernel/reshape.cpp b/ngraph/core/reference/src/runtime/opt_kernel/reshape.cpp index f0d559f1847..38c29f9387c 100644 --- a/ngraph/core/reference/src/runtime/opt_kernel/reshape.cpp +++ b/ngraph/core/reference/src/runtime/opt_kernel/reshape.cpp @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include +#include #include "ngraph/check.hpp" #include "ngraph/runtime/opt_kernel/reshape.hpp" +#include "ngraph/runtime/reference/reshape.hpp" using namespace ngraph; @@ -233,6 +234,13 @@ namespace } } } + bool no_axis_reordering(const AxisVector& axis_order) + { + auto tmp = axis_order; + std::sort(begin(tmp), end(tmp)); + tmp.erase(std::unique(begin(tmp), end(tmp)), end(tmp)); + return tmp == axis_order; + } } // namespace void runtime::opt_kernel::reshape(const char* in, char* out, @@ -241,6 +249,12 @@ void runtime::opt_kernel::reshape(const char* in, const Shape& out_shape, size_t elem_size) { + if (no_axis_reordering(in_axis_order)) + { + std::memcpy(out, in, shape_size(in_shape) * elem_size); + return; + } + switch (in_shape.size()) { case 0: reshape_in0(in, out, in_shape, in_axis_order, out_shape, elem_size); break; diff --git a/ngraph/core/reference/src/runtime/reference/reshape.cpp b/ngraph/core/reference/src/runtime/reference/reshape.cpp index f4c100b27fc..27ac18e0190 100644 --- a/ngraph/core/reference/src/runtime/reference/reshape.cpp +++ b/ngraph/core/reference/src/runtime/reference/reshape.cpp @@ -2,46 +2,67 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include +#include #include "ngraph/check.hpp" +#include "ngraph/coordinate_range.hpp" +#include "ngraph/coordinate_transform.hpp" #include "ngraph/runtime/reference/reshape.hpp" -using namespace ngraph; - -void runtime::reference::reshape(const char* arg, - char* out, - const Shape& in_shape, - const AxisVector& in_axis_order, - const Shape& out_shape, - size_t elem_size) +namespace ngraph { - // Unfortunately we don't yet have a constructor for CoordinateTransform that lets - // us pass only source_space_shape - // and source_axis_order so we have to construct the defaults here. - Shape in_start_corner(in_shape.size(), 0); // (0,...0) - Strides in_strides(in_shape.size(), 1); // (1,...,1) - - CoordinateTransform input_transform( - in_shape, in_start_corner, in_shape, in_strides, in_axis_order); - CoordinateTransform output_transform(out_shape); - - NGRAPH_CHECK(shape_size(input_transform.get_target_shape()) == - shape_size(output_transform.get_target_shape())); - - CoordinateTransform::Iterator output_it = output_transform.begin(); - - for (const Coordinate& input_coord : input_transform) + namespace runtime { - if (output_it == output_transform.end()) - break; - const Coordinate& output_coord = *output_it; + namespace reference + { + namespace + { + std::vector reorder(const std::vector& origin, + const AxisVector& order) + { + std::vector reordered = origin; + auto out = begin(reordered); + NGRAPH_CHECK(origin.size() <= order.size()); + for (size_t i = 0; i < origin.size(); ++i) + { + *out = origin.at(order[i]); + ++out; + } + return reordered; + } + } // namespace - memcpy(out + output_transform.index(output_coord) * elem_size, - arg + input_transform.index(input_coord) * elem_size, - elem_size); + void reshape(const char* arg, + char* out, + const Shape& in_shape, + const AxisVector& in_axis_order, + const Shape& out_shape, + size_t elem_size) + { + if (shape_size(in_shape) == 1) + { + std::memcpy(out, arg, elem_size); + return; + } - ++output_it; - } -} + char* output = out; + const char* const output_end = out + shape_size(out_shape) * elem_size; + const auto axis_strides = reorder(row_major_strides(in_shape), in_axis_order); + for (const auto& coordinate : + CoordinateTransformBasic(reorder(in_shape, in_axis_order))) + { + if (output >= output_end) + { + break; + } + const auto elem_offset = std::inner_product( + begin(coordinate), end(coordinate), begin(axis_strides), 0ll); + const auto input = arg + elem_offset * elem_size; + std::memcpy(output, input, elem_size); + output += elem_size; + } + } + } // namespace reference + } // namespace runtime +} // namespace ngraph diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 77903f5fc25..47dfbbd3e49 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -92,6 +92,7 @@ set(SRC pattern.cpp provenance.cpp replace_node.cpp + reshape_opt_kernel.cpp shape.cpp span.cpp specialize_function.cpp diff --git a/ngraph/test/reshape_opt_kernel.cpp b/ngraph/test/reshape_opt_kernel.cpp new file mode 100644 index 00000000000..01a2358e29e --- /dev/null +++ b/ngraph/test/reshape_opt_kernel.cpp @@ -0,0 +1,232 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "gtest/gtest.h" + +#include "ngraph/axis_vector.hpp" +#include "ngraph/runtime/opt_kernel/reshape.hpp" +#include "ngraph/shape.hpp" + +#include "util/ndarray.hpp" + +using namespace ngraph; + +namespace +{ + using ElementValue = int32_t; + enum class AxisOrder + { + straight, + reverse, + }; + + AxisVector get_axis_order(AxisOrder order, size_t size) + { + AxisVector v(size); + std::iota(begin(v), end(v), 0); + if (order == AxisOrder::reverse) + { + std::reverse(begin(v), end(v)); + } + return v; + } + + struct TestParams + { + AxisOrder order; + test::NDArrayBase input; + test::NDArrayBase output; + }; + + struct ReshapeOptKernel : ::testing::TestWithParam + { + }; + +} // namespace + +TEST_P(ReshapeOptKernel, reshape_opt_kernel) +{ + const TestParams& p = GetParam(); + + const AxisVector axis_order = get_axis_order(p.order, p.input.get_shape().size()); + std::vector output_buff(p.input.get_vector().size()); + + runtime::opt_kernel::reshape((const char*)p.input.data(), + (char*)output_buff.data(), + p.input.get_shape(), + axis_order, + p.output.get_shape(), + sizeof(ElementValue)); + EXPECT_EQ(p.output.get_vector(), output_buff); +} + +INSTANTIATE_TEST_CASE_P(reshape_opt_kernel, + ReshapeOptKernel, + ::testing::Values(TestParams{AxisOrder::straight, + test::NDArray{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + test::NDArray{ + {1, 2, 3}, + {4, 5, 6}, + }}, + TestParams{AxisOrder::straight, + test::NDArray{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + test::NDArray{ + {1, 2, 3, 4, 5, 6}, + }}, + TestParams{AxisOrder::straight, + test::NDArray{ + { + {11, 12}, + {13, 14}, + {15, 16}, + }, + { + {21, 22}, + {23, 24}, + {25, 26}, + }, + }, + test::NDArray{ + {11, 12, 13, 14, 15, 16}, + {21, 22, 23, 24, 25, 26}, + }}, + TestParams{AxisOrder::straight, + test::NDArray{ + { + { + {11, 12}, + {13, 14}, + {15, 16}, + }, + { + {21, 22}, + {23, 24}, + {25, 26}, + }, + }, + }, + test::NDArray{ + {11, 12, 13, 14, 15, 16}, + {21, 22, 23, 24, 25, 26}, + }}, + TestParams{AxisOrder::reverse, + test::NDArray{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + test::NDArray{ + {1, 3, 5}, + {2, 4, 6}, + }}, + TestParams{AxisOrder::reverse, + test::NDArray{ + {1, 2}, + {3, 4}, + {5, 6}, + }, + test::NDArray{ + {1, 3, 5, 2, 4, 6}, + }}, + TestParams{AxisOrder::reverse, + test::NDArray{ + { + {11, 12}, + {13, 14}, + {15, 16}, + }, + { + {21, 22}, + {23, 24}, + {25, 26}, + }, + }, + test::NDArray{ + {11, 21, 13, 23, 15, 25}, + {12, 22, 14, 24, 16, 26}, + }}, + TestParams{AxisOrder::reverse, + test::NDArray{ + { + { + {11, 12}, + {13, 14}, + {15, 16}, + }, + { + {21, 22}, + {23, 24}, + {25, 26}, + }, + }, + }, + test::NDArray{ + {11, 21, 13, 23, 15, 25}, + {12, 22, 14, 24, 16, 26}, + }})); + +// input shape with size > 6 should be covered by reference implementation: +INSTANTIATE_TEST_CASE_P(reshape_opt_kernel_ref_impl_fallback, + ReshapeOptKernel, + ::testing::Values(TestParams{AxisOrder::straight, + test::NDArray{ + { + { + { + { + { + {11, 12}, + {13, 14}, + {15, 16}, + }, + { + {21, 22}, + {23, 24}, + {25, 26}, + }, + }, + }, + }, + }, + }, + test::NDArray{ + {11, 12, 13, 14, 15, 16}, + {21, 22, 23, 24, 25, 26}, + }}, + TestParams{AxisOrder::reverse, + test::NDArray{ + { + { + { + { + { + {11, 12}, + {13, 14}, + {15, 16}, + }, + { + {21, 22}, + {23, 24}, + {25, 26}, + }, + }, + }, + }, + }, + }, + test::NDArray{ + {11, 21, 13, 23, 15, 25}, + {12, 22, 14, 24, 16, 26}, + }})); diff --git a/ngraph/test/visitors/op/reshape.cpp b/ngraph/test/visitors/op/reshape.cpp index 8acad56678e..7d48e11442f 100644 --- a/ngraph/test/visitors/op/reshape.cpp +++ b/ngraph/test/visitors/op/reshape.cpp @@ -30,5 +30,9 @@ TEST(attributes, reshape_op) NodeBuilder builder(reshape); auto g_reshape = as_type_ptr(builder.create()); + const auto expected_attr_count = 1; + + EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); + EXPECT_EQ(g_reshape->get_special_zero(), reshape->get_special_zero()); } From 134c66a9339477f3ba1e271cac6a7d7de25c5689 Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Tue, 15 Jun 2021 11:06:30 +0200 Subject: [PATCH 07/43] Reference Implementation of Logical Reduce Operations (#6004) * Remove CoordinateTransform call to index function to calculate tensor element indexes * Allow negative axis values in axes host tensor * Added constant expression to set keep_dims as false * Use rank from host tensor to normalize axes * Address minor comments * Add const qualifier to local variables * Add deprecated macro for arm plugin dependent function signatures * Remove duplicate helper functions --- .../ngraph/runtime/reference/eval_helpers.hpp | 15 ---- .../runtime/reference/logical_reduction.hpp | 83 ++++++++++++------- .../src/runtime/reference/eval_helpers.cpp | 33 -------- ngraph/core/src/op/reduce_logical_and.cpp | 32 +++---- ngraph/core/src/op/reduce_logical_or.cpp | 32 +++---- ngraph/test/eval.cpp | 22 ----- 6 files changed, 87 insertions(+), 130 deletions(-) delete mode 100644 ngraph/core/reference/include/ngraph/runtime/reference/eval_helpers.hpp delete mode 100644 ngraph/core/reference/src/runtime/reference/eval_helpers.cpp diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/eval_helpers.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/eval_helpers.hpp deleted file mode 100644 index 935237778d1..00000000000 --- a/ngraph/core/reference/include/ngraph/runtime/reference/eval_helpers.hpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "ngraph/runtime/host_tensor.hpp" - -namespace ngraph -{ - namespace eval - { - AxisSet extract_reduction_axes(const HostTensorPtr& axes, const char* op_name); - } -} // namespace ngraph diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp index f9e24d89292..f2789da27de 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/logical_reduction.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "ngraph/coordinate_transform.hpp" #include "ngraph/shape_util.hpp" @@ -17,53 +18,75 @@ namespace ngraph { static inline void reduce_logical_and(const char* arg, char* out, - const Shape& input_shape, - const AxisSet& reduction_axes, - bool keep_dims) + const Shape& in_shape, + const AxisSet& reduction_axes) { - CoordinateTransform output_transform( - reduce(input_shape, reduction_axes, keep_dims)); + constexpr bool dont_keep_dims_in_output = false; + const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output); + std::fill(out, out + shape_size(out_shape), 1); - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = 1; - } - - CoordinateTransform input_transform(input_shape); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + CoordinateTransformBasic input_transform(in_shape); for (const Coordinate& input_coord : input_transform) { - Coordinate output_coord = reduce(input_coord, reduction_axes, keep_dims); - out[output_transform.index(output_coord)] = - out[output_transform.index(output_coord)] && - arg[input_transform.index(input_coord)]; + const Coordinate output_coord = + reduce(input_coord, reduction_axes, dont_keep_dims_in_output); + + const size_t in_idx = std::inner_product( + input_coord.begin(), input_coord.end(), in_strides.begin(), 0); + const size_t out_idx = std::inner_product( + output_coord.begin(), output_coord.end(), out_strides.begin(), 0); + + out[out_idx] = out[out_idx] && arg[in_idx]; } } + NGRAPH_DEPRECATED("Remove when arm plugin supports the new signature") + static inline void reduce_logical_and(const char* arg, + char* out, + const Shape& input_shape, + const AxisSet& reduction_axes, + bool) + { + reduce_logical_and(arg, out, input_shape, reduction_axes); + } + static inline void reduce_logical_or(const char* arg, char* out, - const Shape& input_shape, - const AxisSet& reduction_axes, - bool keep_dims) + const Shape& in_shape, + const AxisSet& reduction_axes) { - CoordinateTransform output_transform( - reduce(input_shape, reduction_axes, keep_dims)); + const auto out_shape = reduce(in_shape, reduction_axes, false); + std::fill(out, out + shape_size(out_shape), 0); - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = 0; - } - - CoordinateTransform input_transform(input_shape); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + CoordinateTransformBasic input_transform(in_shape); for (const Coordinate& input_coord : input_transform) { - Coordinate output_coord = reduce(input_coord, reduction_axes, keep_dims); - out[output_transform.index(output_coord)] = - out[output_transform.index(output_coord)] || - arg[input_transform.index(input_coord)]; + const Coordinate output_coord = reduce(input_coord, reduction_axes, false); + + const size_t in_idx = std::inner_product( + input_coord.begin(), input_coord.end(), in_strides.begin(), 0); + const size_t out_idx = std::inner_product( + output_coord.begin(), output_coord.end(), out_strides.begin(), 0); + + out[out_idx] = out[out_idx] || arg[in_idx]; } } + + NGRAPH_DEPRECATED("Remove when arm plugin supports the new signature") + static inline void reduce_logical_or(const char* arg, + char* out, + const Shape& input_shape, + const AxisSet& reduction_axes, + bool) + { + reduce_logical_or(arg, out, input_shape, reduction_axes); + } } // namespace reference } // namespace runtime } // namespace ngraph diff --git a/ngraph/core/reference/src/runtime/reference/eval_helpers.cpp b/ngraph/core/reference/src/runtime/reference/eval_helpers.cpp deleted file mode 100644 index 9aae80310a3..00000000000 --- a/ngraph/core/reference/src/runtime/reference/eval_helpers.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "ngraph/check.hpp" -#include "ngraph/runtime/reference/eval_helpers.hpp" -#include "ngraph/util.hpp" - -namespace ngraph -{ - namespace eval - { - AxisSet extract_reduction_axes(const HostTensorPtr& axes, const char* op_name) - { - const auto axes_in_tensor = host_tensor_2_vector(axes); - - const bool negative_axis_received = - std::any_of(axes_in_tensor.begin(), axes_in_tensor.end(), [](const int64_t axis) { - return axis < 0; - }); - - NGRAPH_CHECK(!negative_axis_received, - "Negative axis value received in the ", - op_name, - " evaluation. This case is not supported."); - - return AxisSet( - std::vector(axes_in_tensor.begin(), axes_in_tensor.end())); - } - } // namespace eval -} // namespace ngraph diff --git a/ngraph/core/src/op/reduce_logical_and.cpp b/ngraph/core/src/op/reduce_logical_and.cpp index c75c244d590..6b578c894ac 100644 --- a/ngraph/core/src/op/reduce_logical_and.cpp +++ b/ngraph/core/src/op/reduce_logical_and.cpp @@ -3,10 +3,11 @@ // #include "ngraph/op/reduce_logical_and.hpp" +#include #include "itt.hpp" #include "ngraph/log.hpp" +#include "ngraph/op/util/evaluate_helpers.hpp" #include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/runtime/reference/eval_helpers.hpp" #include "ngraph/runtime/reference/logical_reduction.hpp" using namespace ngraph; @@ -32,28 +33,20 @@ shared_ptr op::v1::ReduceLogicalAnd::clone_with_new_inputs(const OutputVec return make_shared(new_args.at(0), new_args.at(1), get_keep_dims()); } -namespace +namespace reduce_and { bool evaluate_reduce_logical_and(const HostTensorPtr& data, - const HostTensorPtr& axes, const HostTensorPtr& out, + const AxisSet& reduction_axes, bool keep_dims) { - if (data->get_element_type() != element::boolean || - !axes->get_element_type().is_integral_number()) - { - return false; - } + out->set_shape(reduce(data->get_shape(), reduction_axes, keep_dims)); try { - const AxisSet reduction_axes = eval::extract_reduction_axes(axes, "ReduceLogicalAnd"); - runtime::reference::reduce_logical_and(data->get_data_ptr(), out->get_data_ptr(), data->get_shape(), - reduction_axes, - keep_dims); - + reduction_axes); return true; } catch (const ngraph_error& e) @@ -62,16 +55,25 @@ namespace return false; } } -} // namespace +} // namespace reduce_and bool op::v1::ReduceLogicalAnd::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { NGRAPH_OP_SCOPE(v1_ReduceLogicalAnd_evaluate); + NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2)); + NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1)); const auto& data = inputs[0]; const auto& axes = inputs[1]; const auto& out = outputs[0]; - return evaluate_reduce_logical_and(data, axes, out, get_keep_dims()); + if (data->get_element_type() != element::boolean || + !axes->get_element_type().is_integral_number()) + { + return false; + } + const auto reduction_axes = get_normalized_axes_from_tensor( + axes, data->get_partial_shape().rank(), get_friendly_name()); + return reduce_and::evaluate_reduce_logical_and(data, out, reduction_axes, get_keep_dims()); } bool op::v1::ReduceLogicalAnd::has_evaluate() const diff --git a/ngraph/core/src/op/reduce_logical_or.cpp b/ngraph/core/src/op/reduce_logical_or.cpp index a6afa5f77c8..427b576bf91 100644 --- a/ngraph/core/src/op/reduce_logical_or.cpp +++ b/ngraph/core/src/op/reduce_logical_or.cpp @@ -3,10 +3,11 @@ // #include "ngraph/op/reduce_logical_or.hpp" +#include #include "itt.hpp" #include "ngraph/log.hpp" +#include "ngraph/op/util/evaluate_helpers.hpp" #include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/runtime/reference/eval_helpers.hpp" #include "ngraph/runtime/reference/logical_reduction.hpp" using namespace ngraph; @@ -32,28 +33,20 @@ shared_ptr op::v1::ReduceLogicalOr::clone_with_new_inputs(const OutputVect return make_shared(new_args.at(0), new_args.at(1), get_keep_dims()); } -namespace +namespace reduce_or { bool evaluate_reduce_logical_or(const HostTensorPtr& data, - const HostTensorPtr& axes, const HostTensorPtr& out, + const AxisSet& reduction_axes, bool keep_dims) { - if (data->get_element_type() != element::boolean || - !axes->get_element_type().is_integral_number()) - { - return false; - } + out->set_shape(reduce(data->get_shape(), reduction_axes, keep_dims)); try { - const AxisSet reduction_axes = eval::extract_reduction_axes(axes, "ReduceLogicalOr"); - runtime::reference::reduce_logical_or(data->get_data_ptr(), out->get_data_ptr(), data->get_shape(), - reduction_axes, - keep_dims); - + reduction_axes); return true; } catch (const ngraph_error& e) @@ -62,16 +55,25 @@ namespace return false; } } -} // namespace +} // namespace reduce_or bool op::v1::ReduceLogicalOr::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { NGRAPH_OP_SCOPE(v1_ReduceLogicalOr_evaluate); + NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2)); + NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1)); const auto& data = inputs[0]; const auto& axes = inputs[1]; const auto& out = outputs[0]; - return evaluate_reduce_logical_or(data, axes, out, get_keep_dims()); + if (data->get_element_type() != element::boolean || + !axes->get_element_type().is_integral_number()) + { + return false; + } + const auto reduction_axes = get_normalized_axes_from_tensor( + axes, data->get_partial_shape().rank(), get_friendly_name()); + return reduce_or::evaluate_reduce_logical_or(data, out, reduction_axes, get_keep_dims()); } bool op::v1::ReduceLogicalOr::has_evaluate() const diff --git a/ngraph/test/eval.cpp b/ngraph/test/eval.cpp index f5602023c5c..1ba827190ae 100644 --- a/ngraph/test/eval.cpp +++ b/ngraph/test/eval.cpp @@ -1810,28 +1810,6 @@ TEST(eval, topk_v1_param_dyn_k0) ASSERT_EQ(result1_val, expec1); } -TEST(eval, reduce_logical_and__neg_axis) -{ - const auto data = make_shared(element::boolean, Shape{2, 2, 2}); - const auto axes = make_shared(element::i64, Shape{}); - - const auto op = make_shared(data, axes); - - auto fun = make_shared(op, ParameterVector{data, axes}); - - auto result = make_shared(); - - // when ReduceLogicalAnd node evaluator returns false -> the Function object throws - EXPECT_THROW( - fun->evaluate({result}, - { - make_host_tensor( - Shape{2, 2, 2}, {true, false, true, false, true, false, true, false}), - make_host_tensor(Shape{}, {-1}), - }), - ngraph::ngraph_error); -} - TEST(eval, evaluate_static_scatter_update_basic_axes_indices_i32) { const Shape data_shape{3, 3}; From d777679de94559f4f2626a5d27caecce18ec4dc3 Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Tue, 15 Jun 2021 13:19:58 +0300 Subject: [PATCH 08/43] Added copyrights note into CMakeLists (#6155) --- thirdparty/zlib/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/thirdparty/zlib/CMakeLists.txt b/thirdparty/zlib/CMakeLists.txt index b24d4abf323..33b9222ec53 100644 --- a/thirdparty/zlib/CMakeLists.txt +++ b/thirdparty/zlib/CMakeLists.txt @@ -1,3 +1,7 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + PROJECT(zlib) if(NOT WIN32) From 67c93ec6fa0954a8d9977a93b7ab56edc73858b2 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Tue, 15 Jun 2021 13:47:06 +0300 Subject: [PATCH 09/43] Return sporadic GPU test cases (cannot reproduce 54436) (#6127) --- .../plugin/gpu/shared_tests_instances/skip_tests_config.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 07bd2a26098..5863309efe4 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -52,9 +52,6 @@ std::vector disabledTestPatterns() { R"(.*(LPT/StridedSliceTransformation).*)", // TODO: Issue: 48106 R"(.*ConstantResultSubgraphTest.*inPrc=I16.*)", - // TODO: Issue: 54436 - R"(.*LSTMSequence.*CompareWithRefs.*mode=PURE_SEQ_RAND_SEQ_LEN_PARAM.*direction=bidirectional_clip=0.7_netPRC=FP32.*)", - R"(.*LSTMSequence.*CompareWithRefs.*mode=CONVERT_TO_TI_RAND_SEQ_LEN_PARAM_seq.*direction=bidirectional_clip=0.7_netPRC=FP32.*)", // TODO: Issue: 54194 R"(.*ActivationLayerTest.*SoftPlus.*)", // need to implement Export / Import From a248d84894ddf8681192c40dbfc2a9fa90b9d5f0 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Tue, 15 Jun 2021 14:02:37 +0200 Subject: [PATCH 10/43] [Spec] ShuffleChannels spec update (#5813) --- docs/ops/movement/ShuffleChannels_1.md | 47 ++++++++++++++++++-------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/docs/ops/movement/ShuffleChannels_1.md b/docs/ops/movement/ShuffleChannels_1.md index ec7cfc75d9d..e97f3350d2a 100644 --- a/docs/ops/movement/ShuffleChannels_1.md +++ b/docs/ops/movement/ShuffleChannels_1.md @@ -8,12 +8,37 @@ **Short description**: *ShuffleChannels* permutes data in the channel dimension of the input tensor. +**Detailed description**: + +Input tensor of `data_shape` is always interpreted as 4D tensor with the following shape: + + dim 0: data_shape[0] * data_shape[1] * ... * data_shape[axis-1] + (or 1 if axis == 0) + dim 1: group + dim 2: data_shape[axis] / group + dim 3: data_shape[axis+1] * data_shape[axis+2] * ... * data_shape[data_shape.size()-1] + (or 1 if axis points to last dimension) + + +Trailing and leading to `axis` dimensions are flattened and reshaped back to the original shape after channels shuffling. + + +The operation is equivalent to the following transformation of the input tensor `x` of shape `[N, C, H, W]` and `axis = 1`: + +\f[ +x' = reshape(x, [N, group, C / group, H * W])\\ +x'' = transpose(x', [0, 2, 1, 3])\\ +y = reshape(x'', [N, C, H, W])\\ +\f] + +where `group` is the layer attribute described below. + **Attributes**: * *axis* * **Description**: *axis* specifies the index of a channel dimension. - * **Range of values**: an integer number in the range [-4, 3] + * **Range of values**: an integer number in the range `[-rank(data_shape), rank(data_shape) - 1]` * **Type**: `int` * **Default value**: 1 * **Required**: *No* @@ -21,30 +46,22 @@ * *group* * **Description**: *group* specifies the number of groups to split the channel dimension into. This number must evenly divide the channel dimension size. - * **Range of values**: a positive integer + * **Range of values**: a positive integer in the range `[1, data_shape[axis]]` * **Type**: `int` * **Default value**: 1 * **Required**: *No* **Inputs**: -* **1**: 4D input tensor of any supported data type. Required. +* **1**: `data` input tensor of type *T* and rank greater or equal to 1. **Required.** **Outputs**: -* **1**: 4D input tensor with shape and element type as for the input tensor. +* **1**: Output tensor with element type *T* and same shape as the input tensor. -**Mathematical Formulation** +**Types** -The operation is the equivalent with the following transformation of the input tensor *x* of shape *[N, C, H, W]*: - -``` -x' = reshape(x, [N, group, C / group, H * W]) -x'' = transpose(x', [0, 2, 1, 3]) -y = reshape(x'', [N, C, H, W]) -``` - -where `group` is the layer parameter described above and the `axis = 1`. +* *T*: any supported numeric type. **Example** @@ -68,4 +85,4 @@ where `group` is the layer parameter described above and the `axis = 1`. -``` \ No newline at end of file +``` From d7b2e4e50ec4b7ee7a7b21aae36a7be853e3b349 Mon Sep 17 00:00:00 2001 From: Bartek Szmelczynski Date: Tue, 15 Jun 2021 14:24:40 +0200 Subject: [PATCH 11/43] revise space_to_batch spec (#5767) * refactor part of the docs file to use \dots * refector docs * add function enclosure for docs * split function enclosurs across lines * add latex operations to spec * fix style * fix missing index * remove link to tensorflow operation * Remove commas from formula. Co-authored-by: jdanieck --- docs/ops/movement/SpaceToBatch_2.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/ops/movement/SpaceToBatch_2.md b/docs/ops/movement/SpaceToBatch_2.md index 66c064e27be..eea9df8ced1 100644 --- a/docs/ops/movement/SpaceToBatch_2.md +++ b/docs/ops/movement/SpaceToBatch_2.md @@ -8,20 +8,20 @@ **Detailed description**: -The *SpaceToBatch* operation is similar to the TensorFlow* operation [SpaceToBatchND](https://www.tensorflow.org/api_docs/python/tf/space_to_batch_nd) - The operation is equivalent to the following transformation of the input tensor `data` of shape `[batch, D_1, D_2 ... D_{N - 1}]` and `block_shape`, `pads_begin`, `pads_end` of shapes `[N]` to *Y* output tensor. - Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to `pads_begin` and `pads_end`: - note: P_0 for batch dimension is expected to be 0 (no-padding). - x = [batch + P_0, D_1 + P_1, D_2 + P_2, ..., D_{N - 1} + P_{N - 1}], where P_i = pads_begin[i] + pads_end[i] +Zero-pad the start and end of dimensions \f$[D_0, \dots, D_{N - 1}]\f$ of the input according to `pads_begin` and `pads_end`: - note: B_0 for batch is ignored. - x' = reshape(x, [batch, (D_1 + P_1) / B_1, B_1, (D_2 + P_2) / B_2, B_2, ..., (D_{N - 1} + P_{N - 1}) / B_{N - 1}, B_{N - 1}]), where B_i = block_shape[i] +\f[x = [batch + P_0, D_1 + P_1, D_2 + P_2, \dots, D_{N - 1} + P_{N - 1}]\f] +\f[x' = reshape(x, [batch, \frac{D_1 + P_1}{B_1}, B_1, \frac{D_2 + P_2}{B_2}, B_2, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}, B_{N - 1}])\f] +\f[x'' = transpose(x', [2, 4, \dots, (N - 1) + (N - 1), 0, 1, 3, \dots, N + (N - 1)])\f] +\f[y = reshape(x'', [batch \times B_1 \times \dots \times B_{N - 1}, \frac{D_1 + P_1}{B_1}, \frac{D_2 + P_2}{B_2}, \dots, \frac{D_{N - 1} + P_{N - 1}}{B_{N - 1}}]\f] - x'' = transpose(x', [2, 4, ..., (N - 1) + (N - 1), 0, 1, 3, ..., N + (N - 1)]) - - y = reshape(x'', [batch * B_1 * ... * B_{N - 1}, (D_1 + P_1) / B_1, (D_2 + P_2) / B_2, ... , (D_{N - 1} + P_{N - 1}) / B_{N - 1}]) +where +- \f$P_i\f$ = pads_begin[i] + pads_end[i] +- \f$B_i\f$ = block_shape[i] +- \f$P_0\f$ for batch dimension is expected to be 0 (no-padding) +- \f$B_0\f$ for batch is ignored **Attributes** @@ -36,7 +36,7 @@ The operation is equivalent to the following transformation of the input tensor **Outputs** -* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (pads_begin[1] + D_1 + pads_end[1]) / block_shape[1], (pads_begin[2] + D_2 + pads_end[2]) / block_shape[2], ..., (pads_begin[N - 1] + D_{N - 1} + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input. +* **1**: N-D tensor with shape `[batch * block_shape[0] * block_shape[1] * ... * block_shape[N - 1], (D_1 + pads_begin[1] + pads_end[1]) / block_shape[1], (D_2 + pads_begin[2] + pads_end[2]) / block_shape[2], ..., (D_{N -1} + pads_begin[N - 1] + pads_end[N - 1]) / block_shape[N - 1]` of the same type as `data` input. **Types** From 5ebdcfe15b36d587e323da949f3deaaea21b78c4 Mon Sep 17 00:00:00 2001 From: Jozef Daniecki Date: Tue, 15 Jun 2021 14:35:47 +0200 Subject: [PATCH 12/43] StridedSlice operation specification refactoring (#5887) * StridedSlice spec refactored against explicit type indication. * Add name to data input. * Add new examples. * Changed T to 'any supported type'. * Remove mention about 'generalized python indexing' from short description. --- docs/ops/movement/StridedSlice_1.md | 97 +++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 18 deletions(-) diff --git a/docs/ops/movement/StridedSlice_1.md b/docs/ops/movement/StridedSlice_1.md index 6c07665d8f9..41742e20652 100644 --- a/docs/ops/movement/StridedSlice_1.md +++ b/docs/ops/movement/StridedSlice_1.md @@ -4,14 +4,13 @@ **Category**: Data movement operation -**Short description**: *StridedSlice* extracts a strided slice of a tensor. - It is similar to generalized array indexing in Python\*. +**Short description**: *StridedSlice* extracts a strided slice of a tensor. **Attributes** * *begin_mask* - * **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to 1 means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension. + * **Description**: *begin_mask* is a bit mask. *begin_mask[i]* equal to `1` means that the corresponding dimension of the `begin` input is ignored and the 'real' beginning of the tensor is used along corresponding dimension. * **Range of values**: a list of `0`s and `1`s * **Type**: `int[]` * **Default value**: None @@ -19,7 +18,7 @@ * *end_mask* - * **Description**: *end_mask* is a bit mask. If *end_mask[i]* is 1, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension. + * **Description**: *end_mask* is a bit mask. If *end_mask[i]* is `1`, the corresponding dimension of the `end` input is ignored and the real 'end' of the tensor is used along corresponding dimension. * **Range of values**: a list of `0`s and `1`s * **Type**: `int[]` * **Default value**: None @@ -27,7 +26,7 @@ * *new_axis_mask* - * **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is 1, a length 1 dimension is inserted on the `i`-th position of input tensor. + * **Description**: *new_axis_mask* is a bit mask. If *new_axis_mask[i]* is `1`, a length 1 dimension is inserted on the `i`-th position of input tensor. * **Range of values**: a list of `0`s and `1`s * **Type**: `int[]` * **Default value**: `[0]` @@ -35,7 +34,7 @@ * *shrink_axis_mask* - * **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is 1, the dimension on the `i`-th position is deleted. + * **Description**: *shrink_axis_mask* is a bit mask. If *shrink_axis_mask[i]* is `1`, the dimension on the `i`-th position is deleted. * **Range of values**: a list of `0`s and `1`s * **Type**: `int[]` * **Default value**: `[0]` @@ -51,21 +50,83 @@ **Inputs**: -* **1**: Multidimensional input tensor to be sliced. Required. +* **1**: `data` - input tensor to be sliced of type `T` and arbitrary shape. **Required.** -* **2**: `begin` input - 1D input tensor with begin indexes for input tensor slicing. Required. - Out-of-bounds values are silently clamped. If `begin_mask[i]` is 1, the value of `begin[i]` is ignored - and the range of the appropriate dimension starts from 0. - Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`. +* **2**: `begin` - 1D tensor of type `T_IND` with begin indexes for input tensor slicing. **Required.** + Out-of-bounds values are silently clamped. If `begin_mask[i]` is `1`, the value of `begin[i]` is ignored and the range of the appropriate dimension starts from `0`. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `begin[0]=-1` means `begin[0]=3`. -* **3**: `end` input - 1D input tensor with end indexes for input tensor slicing. Required. - Out-of-bounds values will be silently clamped. If `end_mask[i]` is 1, the value of `end[i]` is ignored - and the full range of the appropriate dimension is used instead. - Negative values mean indexing starts from the end. For example, if `foo=[1,2,3]`, `end[0]=-1` means `end[0]=3`. +* **3**: `end` - 1D tensor of type `T_IND` with end indexes for input tensor slicing. **Required.** + Out-of-bounds values will be silently clamped. If `end_mask[i]` is `1`, the value of `end[i]` is ignored and the full range of the appropriate dimension is used instead. Negative values mean indexing starts from the end. For example, if `data=[1,2,3]`, `end[0]=-1` means `end[0]=3`. -* **4**: `stride` input - 1D input tensor with strides. Optional. +* **4**: `stride` - 1D tensor of type `T_IND` with strides. **Optional.** -**Example** +**Types** +* *T*: any supported type. +* *T_IND*: any supported integer type. + +**Example** +Example of `begin_mask` & `end_mask` usage. +```xml + + + + + 2 + 3 + 4 + + + 2 + + + 2 + + + 2 + + + + + 1 + 3 + 2 + + + +``` + +Example of `new_axis_mask` usage. +```xml + + + + + 2 + 3 + 4 + + + 2 + + + 2 + + + 2 + + + + + 1 + 2 + 3 + 4 + + + +``` + +Example of `shrink_axis_mask` usage. ```xml @@ -96,4 +157,4 @@ -``` \ No newline at end of file +``` From db74707835cd2ed08e11dd38cbfac9c038b33def Mon Sep 17 00:00:00 2001 From: Patryk Elszkowski Date: Tue, 15 Jun 2021 15:08:36 +0200 Subject: [PATCH 13/43] Enable reshape test (#6161) --- ngraph/test/runtime/ie/unit_test.manifest | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index e76ac6f2b97..1537b823c17 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -386,31 +386,12 @@ reduce_sum_large_1d_to_scalar # Doesn't throw expected exception type. unhandled_op -# Const layer Constant_6325 has incorrect dimensions in the output data 0 -reshape_t2s - # Expected equality of these values: # (vector{42}) # Which is: { '*' (42, 0x2A) } # read_vector(result) # Which is: { '\0' } reshape_s2t1 -reshape_v2m_col -reshape_v2m_row -reshape_v2t_middle -reshape_m2m_same -reshape_m2m_transpose -reshape_m2m_dim_change_transpose -reshape_3d_transpose_021 -reshape_3d_transpose_210 -reshape_3d_transpose_201 -reshape_3d_transpose_102 -reshape_3d_transpose_120 -reshape_4d_transpose -reshape_4d_no_transpose -reshape_transposed_shape_change -reshape_6d -reshape_shufflenet_5d dyn_group_convolution_backprop_data group_conv group_conv_striding From c3d1c2e420cd8f77b1e8a4946eedd1c1774c1287 Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Tue, 15 Jun 2021 15:12:12 +0200 Subject: [PATCH 14/43] Revise reference implementation for ReduceMax operation (#5792) * Revise reference implementation for ReduceMax operation * Refactor backend unit tests * Move tests with zero dims to op_eval * Remove test with double elem type * Fix code style * Added minor changes * Replace CoordinateTransform for CoordinateTransformBasic * Added constant expression to set keep_dims as false * Add const qualifier to local variables * Use host tensor to retrieve and normalize axes --- .../ngraph/runtime/reference/log_softmax.hpp | 2 +- .../include/ngraph/runtime/reference/max.hpp | 35 +- .../ngraph/runtime/reference/softmax.hpp | 2 +- ngraph/core/src/op/max.cpp | 12 +- ngraph/test/CMakeLists.txt | 1 + ngraph/test/backend/reduce_max.in.cpp | 336 ------------------ ngraph/test/op_eval/reduce_max.cpp | 312 ++++++++++++++++ ngraph/test/runtime/ie/unit_test.manifest | 14 - .../runtime/interpreter/unit_test.manifest | 2 - 9 files changed, 343 insertions(+), 373 deletions(-) create mode 100644 ngraph/test/op_eval/reduce_max.cpp diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/log_softmax.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/log_softmax.hpp index 27198f465b3..2cf445ee54f 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/log_softmax.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/log_softmax.hpp @@ -24,7 +24,7 @@ namespace ngraph auto temp_max = std::vector(temp_elements, 0); auto temp_sum = std::vector(temp_elements, 0); - max(arg, temp_max.data(), shape, axes, true); + max(arg, temp_max.data(), shape, axes); CoordinateTransform transform(shape); CoordinateTransform temp_transform(temp_shape); diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/max.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/max.hpp index d8e39fefd65..8bed66cdefb 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/max.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/max.hpp @@ -6,6 +6,7 @@ #include #include +#include #include "ngraph/coordinate_transform.hpp" #include "ngraph/shape_util.hpp" @@ -17,35 +18,35 @@ namespace ngraph namespace reference { template - void max(const T* arg, - T* out, - const Shape& in_shape, - const AxisSet& reduction_axes, - bool keep_dims) + void max(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) { T minval = std::numeric_limits::has_infinity ? T(-std::numeric_limits::infinity()) : std::numeric_limits::min(); - auto out_shape = reduce(in_shape, reduction_axes, keep_dims); - CoordinateTransform output_transform(out_shape); + constexpr bool dont_keep_dims_in_output = false; + const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output); + std::fill(out, out + shape_size(out_shape), minval); - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = minval; - } - - CoordinateTransform input_transform(in_shape); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + CoordinateTransformBasic input_transform(in_shape); for (const Coordinate& input_coord : input_transform) { - Coordinate output_coord = reduce(input_coord, reduction_axes, keep_dims); + const Coordinate output_coord = + reduce(input_coord, reduction_axes, dont_keep_dims_in_output); - T x = arg[input_transform.index(input_coord)]; - T max = out[output_transform.index(output_coord)]; + const size_t in_idx = std::inner_product( + input_coord.begin(), input_coord.end(), in_strides.begin(), 0); + const size_t out_idx = std::inner_product( + output_coord.begin(), output_coord.end(), out_strides.begin(), 0); + + const T x = arg[in_idx]; + const T max = out[out_idx]; if (x > max) { - out[output_transform.index(output_coord)] = x; + out[out_idx] = x; } } } diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/softmax.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/softmax.hpp index 94cb0549b73..a8544177aec 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/softmax.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/softmax.hpp @@ -23,7 +23,7 @@ namespace ngraph auto temp_elements = shape_size(temp_shape); auto temp_ptr = new T[temp_elements]; - max(arg, temp_ptr, shape, axes, true); + max(arg, temp_ptr, shape, axes); CoordinateTransform transform(shape); CoordinateTransform temp_transform(temp_shape); diff --git a/ngraph/core/src/op/max.cpp b/ngraph/core/src/op/max.cpp index 318910aa70c..f4332ef9317 100644 --- a/ngraph/core/src/op/max.cpp +++ b/ngraph/core/src/op/max.cpp @@ -3,8 +3,10 @@ // #include "ngraph/op/max.hpp" +#include #include "itt.hpp" #include "ngraph/graph_util.hpp" +#include "ngraph/op/util/evaluate_helpers.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/max.hpp" #include "ngraph/shape_util.hpp" @@ -22,7 +24,7 @@ namespace maxop { out->set_shape(reduce(arg->get_shape(), axes, keep_dims)); runtime::reference::max( - arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes, keep_dims); + arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes); return true; } @@ -67,7 +69,13 @@ bool op::v1::ReduceMax::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { NGRAPH_OP_SCOPE(v1_ReduceMax_evaluate); - return maxop::evaluate_max(inputs[0], outputs[0], get_reduction_axes(), get_keep_dims()); + NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2)); + NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1)); + + const auto reduction_axes = get_normalized_axes_from_tensor( + inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name()); + + return maxop::evaluate_max(inputs[0], outputs[0], reduction_axes, get_keep_dims()); } bool op::v1::ReduceMax::has_evaluate() const diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 47dfbbd3e49..b9e65209bb8 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -71,6 +71,7 @@ set(SRC op_eval/memory.cpp op_eval/mish.cpp op_eval/non_zero.cpp + op_eval/reduce_max.cpp op_eval/reduce_prod.cpp op_eval/reduce_sum.cpp op_eval/roi_align.cpp diff --git a/ngraph/test/backend/reduce_max.in.cpp b/ngraph/test/backend/reduce_max.in.cpp index a1288f416a8..28c1a3b97cd 100644 --- a/ngraph/test/backend/reduce_max.in.cpp +++ b/ngraph/test/backend/reduce_max.in.cpp @@ -104,123 +104,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_max_matrix_rows_int32) test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - // Create some tensors for input/output - std::vector a{}; - - auto test_case = test::TestCase(f); - test_case.add_input({a}); - test_case.add_expected_output(shape_rt, - {-std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - -std::numeric_limits::infinity()}); - test_case.run(); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_matrix_rows_zero_int32) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::i32, shape_a); - Shape shape_rt{3}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::i32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::i32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - int32_t minval = std::numeric_limits::has_infinity - ? -std::numeric_limits::infinity() - : std::numeric_limits::min(); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{minval, minval, minval}), read_vector(result)); - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity(), - -std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); -} - NGRAPH_TEST(${BACKEND_NAME}, reduce_max_3d_to_matrix_most_sig) { Shape shape_a{3, 3, 3}; @@ -319,55 +202,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_max_3d_to_scalar_int32) EXPECT_EQ((vector{14}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_3d_to_scalar_double) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f64, shape_a); - Shape shape_rt{}; - auto axes = make_shared(element::i32, Shape{3}, vector{0, 1, 2}); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f64, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); - auto result = backend->create_tensor(element::f64, shape_rt); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_TRUE(test::all_close_f((vector{14}), read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 2}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the - // right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - float mi = -std::numeric_limits::infinity(); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{mi, mi, mi, mi, mi, mi}), read_vector(result)); -} - // ----------------------- keep dims = true ----------------------- // NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_to_scalar) @@ -473,127 +307,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_matrix_rows_int32) EXPECT_EQ((vector{2, 4, 6}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 1}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity(), - -std::numeric_limits::infinity(), - -std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_matrix_rows_zero_int32) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::i32, shape_a); - Shape shape_rt{3, 1}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::i32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::i32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - int32_t minval = std::numeric_limits::has_infinity - ? -std::numeric_limits::infinity() - : std::numeric_limits::min(); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{minval, minval, minval}), read_vector(result)); - EXPECT_EQ((vector{}), read_vector(a)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{1, 2}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity(), - -std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{1}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{1, 1}; - auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); -} - NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_3d_to_matrix_most_sig) { Shape shape_a{3, 3, 3}; @@ -711,55 +424,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_3d_to_scalar_int32) EXPECT_EQ((vector{14}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_3d_to_scalar_double) -{ - Shape shape_a{3, 3, 3}; - auto A = make_shared(element::f64, shape_a); - Shape shape_rt{1, 1, 1}; - auto axes = make_shared(element::i32, Shape{3}, vector{0, 1, 2}); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f64, shape_a); - copy_data(a, vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}); - auto result = backend->create_tensor(element::f64, shape_rt); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_TRUE(test::all_close_f((vector{14}), read_vector(result))); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_max_keep_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 1, 2}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the - // right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - float mi = -std::numeric_limits::infinity(); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{mi, mi, mi, mi, mi, mi}), read_vector(result)); -} - // Dynamic NGRAPH_TEST(${BACKEND_NAME}, reduce_max_matrix_columns_dynamic) diff --git a/ngraph/test/op_eval/reduce_max.cpp b/ngraph/test/op_eval/reduce_max.cpp new file mode 100644 index 00000000000..2ce244f9eb2 --- /dev/null +++ b/ngraph/test/op_eval/reduce_max.cpp @@ -0,0 +1,312 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "util/test_control.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/ndarray.hpp" + + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; + +TEST(op_eval, reduce_max_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + + EXPECT_EQ((vector{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_max_matrix_rows_zero_int32) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::i32, shape_a); + Shape shape_rt{3}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::i32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::i32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + int32_t minval = std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() + : std::numeric_limits::min(); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{minval, minval, minval}), read_vector(result)); + EXPECT_EQ((vector{}), read_vector(a)); +} + +TEST(op_eval, reduce_max_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_max_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_max_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_max_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 2}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the + // right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + float mi = -std::numeric_limits::infinity(); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{mi, mi, mi, mi, mi, mi}), read_vector(result)); +} + +TEST(op_eval, reduce_max_keep_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 1}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_max_keep_matrix_rows_zero_int32) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::i32, shape_a); + Shape shape_rt{3, 1}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::i32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::i32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + int32_t minval = std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() + : std::numeric_limits::min(); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{minval, minval, minval}), read_vector(result)); + EXPECT_EQ((vector{}), read_vector(a)); +} + +TEST(op_eval, reduce_max_keep_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{1, 2}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_max_keep_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{1}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_max_keep_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{1, 1}; + auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{-std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_max_keep_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 1, 2}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the + // right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + float mi = -std::numeric_limits::infinity(); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{mi, mi, mi, mi, mi, mi}), read_vector(result)); +} \ No newline at end of file diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index 1537b823c17..a0a7c8735f5 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -474,21 +474,7 @@ reduce_mean_matrix_rows_int32 reduce_mean_keep_to_scalar_int8 reduce_mean_keep_matrix_rows_int32 reduce_max_to_scalar_int8 -reduce_max_matrix_rows_zero -reduce_max_matrix_rows_zero_int32 -reduce_max_matrix_cols_zero -reduce_max_vector_zero -reduce_max_matrix_to_scalar_zero_by_zero -reduce_max_3d_to_scalar_double -reduce_max_3d_eliminate_zero_dim reduce_max_keep_to_scalar_int8 -reduce_max_keep_matrix_rows_zero -reduce_max_keep_matrix_rows_zero_int32 -reduce_max_keep_matrix_cols_zero -reduce_max_keep_vector_zero -reduce_max_keep_matrix_to_scalar_zero_by_zero -reduce_max_keep_3d_to_scalar_double -reduce_max_keep_3d_eliminate_zero_dim # Incorrect precision f64! sum_trivial_in_double diff --git a/ngraph/test/runtime/interpreter/unit_test.manifest b/ngraph/test/runtime/interpreter/unit_test.manifest index 9e123149152..799025f3b92 100644 --- a/ngraph/test/runtime/interpreter/unit_test.manifest +++ b/ngraph/test/runtime/interpreter/unit_test.manifest @@ -41,9 +41,7 @@ INTERPRETER.reduce_min_keep_to_scalar_int8 INTERPRETER.reduce_mean_to_scalar_int8 INTERPRETER.reduce_mean_keep_to_scalar_int8 INTERPRETER.reduce_max_to_scalar_int8 -INTERPRETER.reduce_max_3d_to_scalar_double INTERPRETER.reduce_max_keep_to_scalar_int8 -INTERPRETER.reduce_max_keep_3d_to_scalar_double INTERPRETER.product_to_scalar_int8 INTERPRETER.max_pool_uint8 INTERPRETER.max_pool_int8 From b4e6028f8b4bdc8fca3bb646b31688bd0d894cf9 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 15 Jun 2021 16:29:26 +0300 Subject: [PATCH 15/43] Fixed shellcheck issues (#6141) --- CMakeLists.txt | 9 +++-- .../scripts/run_tests_myriad_multistick.sh | 2 +- scripts/demo/demo_benchmark_app.sh | 26 ++++++------- scripts/demo/demo_security_barrier_camera.sh | 20 +++++----- .../demo_squeezenet_download_convert_run.sh | 27 +++++++------ scripts/demo/utils.sh | 2 + .../install_4_14_kernel.sh | 38 +++++++++---------- .../install_NCS_udev_rules.sh | 2 - 8 files changed, 63 insertions(+), 63 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0706a72e87..3602750435c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,10 +169,11 @@ ie_shellcheck_process(DIRECTORY "${OpenVINO_MAIN_SOURCE_DIR}" "${IE_MAIN_SOURCE_DIR}/thirdparty" "${IE_MAIN_SOURCE_DIR}/temp" # TODO fix and enable back: - "${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies" - "${OpenVINO_MAIN_SOURCE_DIR}/scripts/demo" - "${OpenVINO_MAIN_SOURCE_DIR}/ngraph" - "${IE_MAIN_SOURCE_DIR}/scripts") + "${OpenVINO_MAIN_SOURCE_DIR}/inference-engine/scripts/dependencies.sh" + "${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_NEO_OCL_driver.sh" + "${OpenVINO_MAIN_SOURCE_DIR}/scripts/install_dependencies/install_openvino_dependencies.sh" + "${OpenVINO_MAIN_SOURCE_DIR}/ngraph/python/tests/test_onnx/model_zoo_preprocess.sh" + ) # # cpack diff --git a/inference-engine/scripts/run_tests_myriad_multistick.sh b/inference-engine/scripts/run_tests_myriad_multistick.sh index 468817c6f75..73befb9b366 100755 --- a/inference-engine/scripts/run_tests_myriad_multistick.sh +++ b/inference-engine/scripts/run_tests_myriad_multistick.sh @@ -55,7 +55,7 @@ fi if [[ "${APPS_TO_RUN}" -ge 4 ]] ; then # For more then 4 multidevice testing - for (( VAR = 4; VAR <= ${APPS_TO_RUN}; ++VAR )); do + for (( VAR = 4; VAR <= APPS_TO_RUN; ++VAR )); do ./${APP_NAME} --gtest_filter=*VPURegTest*YOLO*myriad* & pids+=" $!" done diff --git a/scripts/demo/demo_benchmark_app.sh b/scripts/demo/demo_benchmark_app.sh index 8c84e8a724e..25d1f94ae8e 100755 --- a/scripts/demo/demo_benchmark_app.sh +++ b/scripts/demo/demo_benchmark_app.sh @@ -50,7 +50,7 @@ fi target_precision="FP16" -printf "target_precision = ${target_precision}\n" +printf "target_precision = %s\n" ${target_precision} models_path="$HOME/openvino_models/models" models_cache="$HOME/openvino_models/cache" @@ -71,12 +71,12 @@ else fi if ! . "$setupvars_path" ; then - printf "Unable to run ./setupvars.sh. Please check its presence. ${run_again}" + printf "Unable to run ./setupvars.sh. Please check its presence. %s" "${run_again}" exit 1 fi # Step 1. Download the Caffe model and the prototxt of the model -printf "${dashes}" +printf "%s" "${dashes}" printf "\n\nDownloading the Caffe model and the prototxt" cur_path=$PWD @@ -119,7 +119,7 @@ elif [[ $DISTRO == "ubuntu" ]]; then python_binary=python3 pip_binary=pip3 - system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2` + system_ver=$(grep -i "DISTRIB_RELEASE" -f /etc/lsb-release | cut -d "=" -f2) if [ "$system_ver" = "16.04" ]; then sudo -E apt-get install -y libpng12-dev else @@ -143,7 +143,7 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then fi if ! command -v $python_binary &>/dev/null; then - printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. ${run_again}" + printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. %s" "${run_again}" exit 1 fi @@ -166,14 +166,14 @@ ir_dir="${irs_path}/${model_dir}/${target_precision}" if [ ! -e "$ir_dir" ]; then # Step 2. Configure Model Optimizer - printf "${dashes}" + printf "%s" "${dashes}" printf "Install Model Optimizer dependencies\n\n" cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites" . ./install_prerequisites.sh caffe cd "$cur_path" # Step 3. Convert a model with Model Optimizer - printf "${dashes}" + printf "%s" "${dashes}" printf "Convert a model with Model Optimizer\n\n" mo_path="${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/mo.py" @@ -181,12 +181,12 @@ if [ ! -e "$ir_dir" ]; then export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp print_and_run "$python_binary" "$downloader_dir/converter.py" --mo "$mo_path" --name "$model_name" -d "$models_path" -o "$irs_path" --precisions "$target_precision" else - printf "\n\nTarget folder ${ir_dir} already exists. Skipping IR generation with Model Optimizer." - printf "If you want to convert a model again, remove the entire ${ir_dir} folder. ${run_again}" + printf "\n\nTarget folder %s already exists. Skipping IR generation with Model Optimizer." "${ir_dir}" + printf "If you want to convert a model again, remove the entire %s folder. %s" "${ir_dir}" "${run_again}" fi # Step 4. Build samples -printf "${dashes}" +printf "%s" "${dashes}" printf "Build Inference Engine samples\n\n" OS_PATH=$(uname -m) @@ -211,15 +211,15 @@ cmake -DCMAKE_BUILD_TYPE=Release "$samples_path" make $NUM_THREADS benchmark_app # Step 5. Run samples -printf "${dashes}" +printf "%s" "${dashes}" printf "Run Inference Engine benchmark app\n\n" cd "$binaries_dir" cp -f "$ROOT_DIR/${model_name}.labels" "${ir_dir}/" -print_and_run ./benchmark_app -d "$target" -i "$target_image_path" -m "${ir_dir}/${model_name}.xml" -pc ${sampleoptions} +print_and_run ./benchmark_app -d "$target" -i "$target_image_path" -m "${ir_dir}/${model_name}.xml" -pc "${sampleoptions}" -printf "${dashes}" +printf "%s" "${dashes}" printf "Inference Engine benchmark app completed successfully.\n\n" diff --git a/scripts/demo/demo_security_barrier_camera.sh b/scripts/demo/demo_security_barrier_camera.sh index 4913aaf1d31..eaf6cd6784e 100755 --- a/scripts/demo/demo_security_barrier_camera.sh +++ b/scripts/demo/demo_security_barrier_camera.sh @@ -88,7 +88,7 @@ elif [[ $DISTRO == "ubuntu" ]]; then python_binary=python3 pip_binary=pip3 - system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2` + system_ver=$(grep -i "DISTRIB_RELEASE" -f /etc/lsb-release | cut -d "=" -f2) if [ "$system_ver" = "16.04" ]; then sudo -E apt-get install -y libpng12-dev else @@ -112,7 +112,7 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then fi if ! command -v $python_binary &>/dev/null; then - printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. ${run_again}" + printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. %s" "${run_again}" exit 1 fi @@ -128,18 +128,18 @@ else printf "Error: setupvars.sh is not found\n" fi if ! . "$setupvars_path" ; then - printf "Unable to run ./setupvars.sh. Please check its presence. ${run_again}" + printf "Unable to run ./setupvars.sh. Please check its presence. %s" "${run_again}" exit 1 fi # Step 1. Downloading Intel models -printf "${dashes}" +printf "%s" "${dashes}" printf "Downloading Intel models\n\n" target_precision="FP16" -printf "target_precision = ${target_precision}\n" +printf "target_precision = %s\n" "${target_precision}" downloader_dir="${INTEL_OPENVINO_DIR}/deployment_tools/open_model_zoo/tools/downloader" @@ -161,13 +161,13 @@ while read -r model_opt model_name; do done < "$ROOT_DIR/demo_security_barrier_camera.conf" # Step 2. Build samples -printf "${dashes}" +printf "%s" "${dashes}" printf "Build Inference Engine demos\n\n" demos_path="${INTEL_OPENVINO_DIR}/deployment_tools/open_model_zoo/demos" if ! command -v cmake &>/dev/null; then - printf "\n\nCMAKE is not installed. It is required to build Inference Engine demos. Please install it. ${run_again}" + printf "\n\nCMAKE is not installed. It is required to build Inference Engine demos. Please install it. %s" "${run_again}" exit 1 fi @@ -189,13 +189,13 @@ cmake -DCMAKE_BUILD_TYPE=Release "$demos_path" make $NUM_THREADS security_barrier_camera_demo # Step 3. Run samples -printf "${dashes}" +printf "%s" "${dashes}" printf "Run Inference Engine security_barrier_camera demo\n\n" binaries_dir="${build_dir}/${OS_PATH}/Release" cd "$binaries_dir" -print_and_run ./security_barrier_camera_demo -d "$target" -d_va "$target" -d_lpr "$target" -i "$target_image_path" "${model_args[@]}" ${sampleoptions} +print_and_run ./security_barrier_camera_demo -d "$target" -d_va "$target" -d_lpr "$target" -i "$target_image_path" "${model_args[@]}" "${sampleoptions}" -printf "${dashes}" +printf "%s" "${dashes}" printf "Demo completed successfully.\n\n" diff --git a/scripts/demo/demo_squeezenet_download_convert_run.sh b/scripts/demo/demo_squeezenet_download_convert_run.sh index 00daead548e..91a9055375b 100755 --- a/scripts/demo/demo_squeezenet_download_convert_run.sh +++ b/scripts/demo/demo_squeezenet_download_convert_run.sh @@ -46,7 +46,7 @@ done target_precision="FP16" -printf "target_precision = ${target_precision}\n" +printf "target_precision = %s\n" "${target_precision}" models_path="$HOME/openvino_models/models" models_cache="$HOME/openvino_models/cache" @@ -67,12 +67,12 @@ else fi if ! . "$setupvars_path" ; then - printf "Unable to run ./setupvars.sh. Please check its presence. ${run_again}" + printf "Unable to run ./setupvars.sh. Please check its presence. %s" "${run_again}" exit 1 fi # Step 1. Download the Caffe model and the prototxt of the model -printf "${dashes}" +printf "%s" "${dashes}" printf "\n\nDownloading the Caffe model and the prototxt" cur_path=$PWD @@ -115,7 +115,7 @@ elif [[ $DISTRO == "ubuntu" ]]; then python_binary=python3 pip_binary=pip3 - system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2` + system_ver=$(grep -i "DISTRIB_RELEASE" -f /etc/lsb-release | cut -d "=" -f2) if [ "$system_ver" = "16.04" ]; then sudo -E apt-get install -y libpng12-dev else @@ -139,7 +139,7 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then fi if ! command -v $python_binary &>/dev/null; then - printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. ${run_again}" + printf "\n\nPython 3.5 (x64) or higher is not installed. It is required to run Model Optimizer, please install it. %s" "${run_again}" exit 1 fi @@ -162,14 +162,14 @@ ir_dir="${irs_path}/${model_dir}/${target_precision}" if [ ! -e "$ir_dir" ]; then # Step 2. Configure Model Optimizer - printf "${dashes}" + printf "%s" "${dashes}" printf "Install Model Optimizer dependencies\n\n" cd "${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/install_prerequisites" . ./install_prerequisites.sh caffe cd "$cur_path" # Step 3. Convert a model with Model Optimizer - printf "${dashes}" + printf "%s" "${dashes}" printf "Convert a model with Model Optimizer\n\n" mo_path="${INTEL_OPENVINO_DIR}/deployment_tools/model_optimizer/mo.py" @@ -177,12 +177,12 @@ if [ ! -e "$ir_dir" ]; then export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp print_and_run "$python_binary" "$downloader_dir/converter.py" --mo "$mo_path" --name "$model_name" -d "$models_path" -o "$irs_path" --precisions "$target_precision" else - printf "\n\nTarget folder ${ir_dir} already exists. Skipping IR generation with Model Optimizer." - printf "If you want to convert a model again, remove the entire ${ir_dir} folder. ${run_again}" + printf "\n\nTarget folder %s already exists. Skipping IR generation with Model Optimizer." "${ir_dir}" + printf "If you want to convert a model again, remove the entire %s folder. %s" "${ir_dir}" "${run_again}" fi # Step 4. Build samples -printf "${dashes}" +printf "%s" "${dashes}" printf "Build Inference Engine samples\n\n" OS_PATH=$(uname -m) @@ -207,15 +207,14 @@ cmake -DCMAKE_BUILD_TYPE=Release "$samples_path" make $NUM_THREADS classification_sample_async # Step 5. Run samples -printf "${dashes}" +printf "%s" "${dashes}" printf "Run Inference Engine classification sample\n\n" cd "$binaries_dir" cp -f "$ROOT_DIR/${model_name}.labels" "${ir_dir}/" -print_and_run ./classification_sample_async -d "$target" -i "$target_image_path" -m "${ir_dir}/${model_name}.xml" ${sampleoptions} - -printf "${dashes}" +print_and_run ./classification_sample_async -d "$target" -i "$target_image_path" -m "${ir_dir}/${model_name}.xml" "${sampleoptions}" +printf "%s" "${dashes}" printf "Demo completed successfully.\n\n" diff --git a/scripts/demo/utils.sh b/scripts/demo/utils.sh index 552ba20aefa..b7349e0e3a8 100755 --- a/scripts/demo/utils.sh +++ b/scripts/demo/utils.sh @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/install_dependencies/install_4_14_kernel.sh b/scripts/install_dependencies/install_4_14_kernel.sh index a7e01309f19..bcef687d787 100755 --- a/scripts/install_dependencies/install_4_14_kernel.sh +++ b/scripts/install_dependencies/install_4_14_kernel.sh @@ -6,28 +6,28 @@ # This script installs Linux kernel 4.14 required for Intel NEO OpenCL driver on Ubuntu and CentOS if grep -i "rhel" /etc/os-release &>/dev/null; then - # Cent OS - echo "install kernel build dependencies" - sudo -E yum install -y git gcc gcc-c++ ncurses-devel openssl-devel bc xz elfutils-libelf-devel xorg-x11-drv-nouveau rpm-build + # Cent OS + echo "install kernel build dependencies" + sudo -E yum install -y git gcc gcc-c++ ncurses-devel openssl-devel bc xz elfutils-libelf-devel xorg-x11-drv-nouveau rpm-build - echo "download 4.14.20 kernel" - if [ ! -f ./linux-4.14.20.tar.xz ]; then - wget https://www.kernel.org/pub/linux/kernel/v4.x/linux-4.14.20.tar.xz - fi + echo "download 4.14.20 kernel" + if [ ! -f ./linux-4.14.20.tar.xz ]; then + wget https://www.kernel.org/pub/linux/kernel/v4.x/linux-4.14.20.tar.xz + fi - tar -xJf linux-4.14.20.tar.xz - cd linux-4.14.20 - echo "build 4.14.20 kernel" - make olddefconfig + tar -xJf linux-4.14.20.tar.xz + cd linux-4.14.20 + echo "build 4.14.20 kernel" + make olddefconfig - make -j 8 binrpm-pkg - cd ~/rpmbuild/RPMS/x86_64 - sudo -E yum -y localinstall *.rpm - sudo -E grub2-set-default 0 + make -j 8 binrpm-pkg + cd ~/rpmbuild/RPMS/x86_64 + sudo -E yum -y localinstall ./*.rpm + sudo -E grub2-set-default 0 elif grep -i "ubuntu" /etc/os-release &>/dev/null; then - # Ubuntu - sudo -E add-apt-repository ppa:teejee2008/ppa - sudo -E apt-get update && sudo apt-get install -y ukuu - sudo -E ukuu --install v4.14.20 + # Ubuntu + sudo -E add-apt-repository ppa:teejee2008/ppa + sudo -E apt-get update && sudo apt-get install -y ukuu + sudo -E ukuu --install v4.14.20 fi \ No newline at end of file diff --git a/scripts/install_dependencies/install_NCS_udev_rules.sh b/scripts/install_dependencies/install_NCS_udev_rules.sh index 3666b4502e9..7062e9d753d 100755 --- a/scripts/install_dependencies/install_NCS_udev_rules.sh +++ b/scripts/install_dependencies/install_NCS_udev_rules.sh @@ -22,5 +22,3 @@ else echo "File '97-myriad-usbboot.rules' is missing. Please make sure you installed 'Inference Engine Runtime for Intel® Movidiusâ„¢ VPU'." exit -1 fi - - From 58eef532e95bfc41149cb3b47cdb2b2f4fa66f2a Mon Sep 17 00:00:00 2001 From: Bartosz Lesniewski Date: Tue, 15 Jun 2021 15:54:00 +0200 Subject: [PATCH 16/43] Revise ExtractImagePatches op - specification (#6105) * Change example input matrices to tex * Remove leftover matrix template * Remove reference to TF operation --- docs/ops/movement/ExtractImagePatches_3.md | 165 +++++++++++---------- 1 file changed, 89 insertions(+), 76 deletions(-) diff --git a/docs/ops/movement/ExtractImagePatches_3.md b/docs/ops/movement/ExtractImagePatches_3.md index 3604d3b49ca..5046854ee22 100644 --- a/docs/ops/movement/ExtractImagePatches_3.md +++ b/docs/ops/movement/ExtractImagePatches_3.md @@ -8,9 +8,7 @@ **Detailed description**: -The *ExtractImagePatches* operation is similar to the TensorFlow* operation [ExtractImagePatches](https://www.tensorflow.org/api_docs/python/tf/image/extract_patches). - -This op extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions. +The *ExtractImagePatches* operation extracts patches of shape `sizes` which are `strides` apart in the input image. The output elements are taken from the input at intervals given by the `rate` argument, as in dilated convolutions. The result is a 4D tensor containing image patches with size `size[0] * size[1] * depth` vectorized in the "depth" dimension. @@ -92,20 +90,23 @@ The "auto_pad" attribute has no effect on the size of each patch, it determines Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We use the symbol `x` to mark output patches. 1. `sizes="3,3", strides="5,5", rates="1,1", auto_pad="valid"` +\f[ + \begin{bmatrix} + x & x & x & 4 & 5 & x & x & x & 9 & 10 \\ + x & x & x & 14 & 15 & x & x & x & 19 & 20 \\ + x & x & x & 24 & 25 & x & x & x & 29 & 30 \\ + 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\ + 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\ + x & x & x & 54 & 55 & x & x & x & 59 & 60 \\ + x & x & x & 64 & 65 & x & x & x & 69 & 70 \\ + x & x & x & 74 & 75 & x & x & x & 79 & 80 \\ + 81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\ + 91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100 + \end{bmatrix} +\f] -   x   x   x    4   5   x   x   x   9 10 -   x   x   x  14 15   x   x   x 19 20 -   x   x   x  24 25   x   x   x 29 30 - 31 32 33 34 35 36 37 38 39 40 - 41 42 43 44 45 46 47 48 49 50 -   x   x   x  54 55   x   x   x 59 60 -   x   x   x  64 65   x   x   x 69 70 -   x   x   x  74 75   x   x   x 79 80 - 81 82 83 84 85 86 87 88 89 90 - 91 92 93 94 95 96 97 98 99 100 - output: - +``` [[[[ 1 6] [51 56]] @@ -132,25 +133,28 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u [[23 28] [73 78]]]] - +``` output shape: `[1, 9, 2, 2]` 2. `sizes="4,4", strides="8,8", rates="1,1", auto_pad="valid"` - -   x   x   x   x    5   6   7   8   9 10 -   x   x   x   x  15 16 17 18 19 20 -   x   x   x   x  25 26 27 28 29 30 -   x   x   x   x  35 36 37 38 39 40 - 41 42 43 44 45 46 47 48 49 50 - 51 52 53 54 55 56 57 58 59 60 - 61 62 63 64 65 66 67 68 69 70 - 71 72 73 74 75 76 77 78 79 80 - 81 82 83 84 85 86 87 88 89 90 - 91 92 93 94 95 96 97 98 99 100 +\f[ + \begin{bmatrix} + x & x & x & x & 5 & 6 & 7 & 8 & 9 & 10 \\ + x & x & x & x & 15 & 16 & 17 & 18 & 19 & 20 \\ + x & x & x & x & 25 & 26 & 27 & 28 & 29 & 30 \\ + x & x & x & x & 35 & 36 & 37 & 38 & 39 & 40 \\ + 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 \\ + 51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 \\ + 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\ + 71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 \\ + 81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\ + 91 & 92 & 93 & 94 & 95 & 96 & 79 & 98 & 99 & 100 + \end{bmatrix} +\f] output: - - [[[[ 1]] +``` + [[[[ 1]] [[ 2]] @@ -180,28 +184,30 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u [[33]] - [[34]]]] - + [[34]]]] +``` output shape: `[1, 16, 1, 1]` 3. `sizes="4,4", strides="9,9", rates="1,1", auto_pad="same_upper"` - -   x   x   x   x    0   0   0   0   0   x   x   x   x -   x   x   x   x    4   5   6   7   8   x   x   x   x -   x   x   x   x  14 15 16 17 18   x   x   x   x -   x   x   x   x  24 25 26 27 28   x   x   x   x -   0 31 32 33 34 35 36 37 38 39 40   0   0 -   0 41 42 43 44 45 46 47 48 49 50   0   0 -   0 51 52 53 54 55 56 57 58 59 60   0   0 -   0 61 62 63 64 65 66 67 68 69 70   0   0 -   0 71 72 73 74 75 76 77 78 79 80   0   0 -   x   x   x   x  84 85 86 87 88   x   x   x   x -   x   x   x   x  94 95 96 97 98   x   x   x   x -   x   x   x   x    0   0   0   0   0   x   x   x   x -   x   x   x   x    0   0   0   0   0   x   x   x   x - +\f[ + \begin{bmatrix} + x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\ + x & x & x & x & 4 & 5 & 6 & 7 & 8 & x & x & x & x\\ + x & x & x & x & 14 & 15 & 16 & 17 & 18 & x & x & x & x\\ + x & x & x & x & 24 & 25 & 26 & 27 & 28 & x & x & x & x\\ + 0 & 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 & 0 & 0\\ + 0 & 41 & 42 & 43 & 44 & 45 & 46 & 47 & 48 & 49 & 50 & 0 & 0\\ + 0 & 51 & 52 & 53 & 54 & 55 & 56 & 57 & 58 & 59 & 60 & 0 & 0\\ + 0 & 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 & 0 & 0\\ + 0 & 71 & 72 & 73 & 74 & 75 & 76 & 77 & 78 & 79 & 80 & 0 & 0\\ + x & x & x & x & 84 & 85 & 86 & 87 & 88 & x & x & x & x\\ + x & x & x & x & 94 & 95 & 96 & 79 & 98 & x & x & x & x\\ + x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x\\ + x & x & x & x & 0 & 0 & 0 & 0 & 0 & x & x & x & x + \end{bmatrix} +\f] output: - +``` [[[[ 0 0] [ 0 89]] @@ -249,25 +255,28 @@ Image is a `1 x 1 x 10 x 10` array that contains the numbers 1 through 100. We u [[ 23 0] [ 0 0]]]] - +``` output shape: `[1, 16, 2, 2]` 4. `sizes="3,3", strides="5,5", rates="2,2", auto_pad="valid"` This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches: - -   x   2   x   4   x   y   7   y   9   y - 11 12 13 14 15 16 17 18 19 20 -   x  22   x 24   x   y 27   y 29   y - 31 32 33 34 35 36 37 38 39 40 -   x  42   x 44   x   y 47   y 49   y -   z  52   z 54   z   k 57   k 59   k - 61 62 63 64 65 66 67 68 69 70 -   z  72   z 74   z   k 77   k 79   k - 81 82 83 84 85 86 87 88 89 90 -   z  92   z 94   z   k 97   k 99   k - - output: +\f[ + \begin{bmatrix} + x & 2 & x & 4 & x & y & 7 & y & 9 & y \\ + 11 & 12 & 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 \\ + x & 22 & x & 24 & x & y & 27 & y & 29 & y \\ + 31 & 32 & 33 & 34 & 35 & 36 & 37 & 38 & 39 & 40 \\ + x & 42 & x & 44 & x & y & 47 & y & 49 & y \\ + z & 52 & z & 54 & z & k & 57 & k & 59 & k \\ + 61 & 62 & 63 & 64 & 65 & 66 & 67 & 68 & 69 & 70 \\ + z & 72 & z & 74 & z & k & 77 & k & 79 & k \\ + 81 & 82 & 83 & 84 & 85 & 86 & 87 & 88 & 89 & 90 \\ + z & 92 & z & 94 & z & k & 79 & k & 99 & k + \end{bmatrix} +\f] + output: +``` [[[[ 1 6] [ 51 56]] @@ -294,26 +303,30 @@ This time we use the symbols `x`, `y`, `z` and `k` to distinguish the patches: [[ 45 50] [ 95 100]]]] - +``` output_shape: `[1, 9, 2, 2]` 5. `sizes="2,2", strides="3,3", rates="1,1", auto_pad="valid"` Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature map with coordinate 0 contains numbers in a range `[1, 25]` and feature map with coordinate 1 contains numbers in a range `[26, 50]` -   x   x   3   x   x -   6   7   8   x   x - 11 12 13 14 15 -   x   x  18   x   x -   x   x  23   x   x - -   x   x  28   x   x -   x   x  33   x   x - 36 37 38 39 40 -   x   x  43   x   x -   x   x  48   x   x - +\f[ + \begin{bmatrix} + x & x & 3 & x & x\\ + x & x & 8 & x & x\\ + 11 & 12 & 13 & 14 & 15\\ + x & x & 18 & x & x\\ + x & x & 23 & x & x + \end{bmatrix}\\ + \begin{bmatrix} + x & x & 28 & x & x\\ + x & x & 33 & x & x\\ + 36 & 37 & 38 & 39 & 40\\ + x & x & 43 & x & x\\ + x & x & 48 & x & x + \end{bmatrix} +\f] output: - +``` [[[[ 1 4] [16 19]] @@ -337,5 +350,5 @@ Image is a `1 x 2 x 5 x 5` array that contains two feature maps where feature ma [[32 35] [47 50]]]] - +``` output shape: `[1, 8, 2, 2]` From a16af0d2ea550c955a34659510831577e43d0b43 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Tue, 15 Jun 2021 16:59:10 +0300 Subject: [PATCH 17/43] [LPT] FakeQuantize folding fix to support ConvolutionBackpropData with FQ on weights (#6160) --- .../src/network_helper.cpp | 2 +- ...nvolution_backprop_data_transformation.cpp | 14 +++++++---- ...nvolution_backprop_data_transformation.cpp | 23 +++++++++++++++---- ...nvolution_backprop_data_transformation.hpp | 2 +- ...nvolution_backprop_data_transformation.cpp | 18 +++++++++------ .../convolution_backprop_data_function.cpp | 1 + 6 files changed, 42 insertions(+), 18 deletions(-) diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 4a1e942e575..90995bc0ffb 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -683,7 +683,7 @@ std::shared_ptr NetworkHelper::foldFakeQuantize( auto levels_1 = fq->get_levels() - 1.f; const size_t DHW = D * H * W; - const size_t IDHW = IC * D * H * W; + const size_t IDHW = outChannelsShapeIndex == 0 ? IC * D * H * W : OC * D * H * W; const auto values = constant->cast_vector(); std::vector quantizedValues(OC * IC * D * H * W); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp index 44d26d231b6..a6e2e93a37e 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp @@ -15,11 +15,17 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), - LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false) + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true) }; const std::vector params = { + // FQ on weights + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 25.5f }, { 0.f }, { 25.5f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }}, + "convolutionBackpropData_original", + "U8" + }, // FQ on weights // with zero point { @@ -87,8 +93,8 @@ const std::vector inputShapes = { - { 1, 8, 16, 16 } +const std::vector> inputShapes = { + {{ 1, 8, 16, 16 }, true} }; const std::vector outputShapes = { diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp index d33e3c42f9e..2cf625b5862 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp @@ -16,11 +16,24 @@ const std::vector netPrecisions = { }; const std::vector trasformationParamValues = { - LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true), - LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(false) + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true) }; const std::vector params = { + // FQ on weights + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { 0.f }, { 25.5f }, { 0.f }, { 25.5f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }}, + "convolutionBackpropData_original", + "U8" + }, + // FQ on weights + { + {256ul, ngraph::Shape{1, 1, 1, 1}, { -12.8f }, { 12.7f }, { -12.8f }, { 12.7f }}, + {255ul, ngraph::Shape{1, 1, 1, 1}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }}, + "convolutionBackpropData_original", + "I8" + }, // FQ on weights // with zero point { @@ -82,9 +95,9 @@ const std::vector inputShapes = { - { 1, 8, 16, 16 }, - { 1, 32, 16, 16 } +const std::vector> inputShapes = { + {{ 1, 8, 16, 16 }, false}, + {{ 1, 32, 16, 16 }, true} }; const std::vector outputShapes = { diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp index 39d5ea58391..8b690e2df2e 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/convolution_backprop_data_transformation.hpp @@ -43,7 +43,7 @@ public: typedef std::tuple< ngraph::element::Type, // netPrecision - ngraph::Shape, // inputShape + std::pair, // input shape and shape support flag ngraph::Shape, // outputShape std::string, // targetDevice ngraph::pass::low_precision::LayerTransformation::Params, diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp index 951af4fdd4e..c8d15a5702e 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/convolution_backprop_data_transformation.cpp @@ -14,7 +14,7 @@ namespace LayerTestsDefinitions { std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::TestParamInfo obj) { ngraph::element::Type netPrecision; - ngraph::Shape inputShape; + std::pair inputShape; ngraph::Shape outputShape; std::string targetDevice; ngraph::pass::low_precision::LayerTransformation::Params params; @@ -22,7 +22,7 @@ std::string ConvolutionBackpropDataTransformation::getTestCaseName(testing::Test std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = obj.param; std::ostringstream result; - result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << "_" << + result << getTestCaseNameByParams(netPrecision, inputShape.first, targetDevice, params) << "_" << outputShape << "_" << param.fakeQuantizeOnData << "_" << param.fakeQuantizeOnWeights << "_" << @@ -34,14 +34,15 @@ void ConvolutionBackpropDataTransformation::SetUp() { threshold = 0.1f; ngraph::element::Type netPrecision; - ngraph::Shape inputShape; + std::pair inputShapeAndHandling; ngraph::Shape outputShape; ngraph::pass::low_precision::LayerTransformation::Params params; ConvolutionBackpropDataTransformationParam param; - std::tie(netPrecision, inputShape, outputShape, targetDevice, params, param) = this->GetParam(); + std::tie(netPrecision, inputShapeAndHandling, outputShape, targetDevice, params, param) = this->GetParam(); std::shared_ptr weights; + const auto inputShape = inputShapeAndHandling.first; if (!param.fakeQuantizeOnWeights.empty()) { weights = ngraph::builder::subgraph::ConvolutionBackpropDataFunction::getWeights( ngraph::Shape{inputShape[1], inputShape[1] / 2, 1, 1}, @@ -65,9 +66,12 @@ void ConvolutionBackpropDataTransformation::SetUp() { void ConvolutionBackpropDataTransformation::Run() { LayerTestsCommon::Run(); - const auto params = std::get<5>(GetParam()); - const auto actualType = getRuntimePrecision(params.layerName); - EXPECT_EQ(actualType, params.expectedKernelType); + const auto inputShape = std::get<1>(GetParam()); + if (inputShape.second) { + const auto params = std::get<5>(GetParam()); + const auto actualType = getRuntimePrecision(params.layerName); + EXPECT_EQ(actualType, params.expectedKernelType); + } } TEST_P(ConvolutionBackpropDataTransformation, CompareWithRefImpl) { diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp index ae7d3847f69..969e1394a0f 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/convolution_backprop_data_function.cpp @@ -38,6 +38,7 @@ std::shared_ptr ConvolutionBackpropDataFunction::get( CoordinateDiff{ 0, 0 }, CoordinateDiff{ 0, 0 }, Strides{ 1, 1 }); + convolutionBackpropData->set_friendly_name("convolutionBackpropData"); ngraph::ResultVector results{ std::make_shared(convolutionBackpropData) }; return std::make_shared(results, ParameterVector{ input }, "ConvolutionBackpropDataTransformation"); From 0981a1584677d447e6997a7d86694e599431ddff Mon Sep 17 00:00:00 2001 From: Mikhail Nosov Date: Tue, 15 Jun 2021 22:40:46 +0300 Subject: [PATCH 18/43] [Caching] Klocwork fixes (#6157) --- inference-engine/src/inference_engine/compilation_context.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference-engine/src/inference_engine/compilation_context.cpp b/inference-engine/src/inference_engine/compilation_context.cpp index ae6aa698eaf..1b656200996 100644 --- a/inference-engine/src/inference_engine/compilation_context.cpp +++ b/inference-engine/src/inference_engine/compilation_context.cpp @@ -42,7 +42,7 @@ static int32_t as_int32_t(T v) { } class OstreamHashWrapper final: public std::streambuf { - std::size_t m_res = {}; + std::size_t m_res = 0; public: std::size_t getResult() const { return m_res; } std::streamsize xsputn(const char* s, std::streamsize n) override { @@ -65,7 +65,7 @@ public: ////////////////////////////////////////////////// std::string NetworkCompilationContext::calculateFileInfo(const std::string& filePath) { - size_t seed {}; + size_t seed = 0; auto absPath = filePath; try { absPath = FileUtils::absoluteFilePath(filePath); From 613bb981ce67f8cbbdda816aa12500dd37243726 Mon Sep 17 00:00:00 2001 From: Szymon Durawa Date: Tue, 15 Jun 2021 23:44:34 +0200 Subject: [PATCH 19/43] external_port_id is calculated based on number of op inputs. (#6037) * External_port_id is calcultaed based on number of op inputs. * Add test for external_port_id serialization. * Restore data section appearance in xml file. --- .../src/transformations/serialize.cpp | 38 +++++++++++-------- .../ir_serialization/tensor_iterator.cpp | 38 +++++++++++++++++++ 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp index 5f5c8144f20..73b72313e5f 100644 --- a/inference-engine/src/transformations/src/transformations/serialize.cpp +++ b/inference-engine/src/transformations/src/transformations/serialize.cpp @@ -235,7 +235,7 @@ class XmlSerializer : public ngraph::AttributeVisitor { void output_descriptions_on_adapter(const std::vector>& output_descriptions, - const std::vector& parameter_mapping, + const uint32_t& input_count, const std::vector& result_mapping, pugi::xml_node& port_map) { NGRAPH_CHECK(!result_mapping.empty(), "No results found in body Function."); @@ -246,7 +246,7 @@ class XmlSerializer : public ngraph::AttributeVisitor { for (const auto& output_description : output_descriptions) { pugi::xml_node output = port_map.append_child("output"); - output.append_attribute("external_port_id").set_value(parameter_mapping.size() + output_description->m_output_index); + output.append_attribute("external_port_id").set_value(input_count + output_description->m_output_index); output.append_attribute("internal_layer_id").set_value(result_mapping[output_description->m_body_value_index].c_str()); if (auto concat_output = as_type_ptr(output_description)) { @@ -306,7 +306,11 @@ public: input_descriptions_on_adapter(a->get(), parameter_mapping, result_mapping, port_map); } else if (const auto& a = ngraph::as_type>>>(&adapter)) { - output_descriptions_on_adapter(a->get(), parameter_mapping, result_mapping, port_map); + uint32_t op_input_count = 0; + for (auto c = m_xml_node.parent().child("input").first_child(); !c.empty(); c = c.next_sibling()) { + op_input_count++; + } + output_descriptions_on_adapter(a->get(), op_input_count, result_mapping, port_map); } else if (const auto& a = ngraph::as_type>(&adapter)) { special_body_ports_on_adapter(a->get(), parameter_mapping, result_mapping, port_map); } @@ -700,19 +704,6 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, // general attributes pugi::xml_node data = layer.append_child("data"); - XmlSerializer visitor(data, node_type_name, custom_opsets, constant_node_write_handler); - NGRAPH_CHECK(node->visit_attributes(visitor), "Visitor API is not supported in ", node); - rt_info::XmlSerializer{data}.serialize(node->get_rt_info()); - - if (exec_graph) { - visit_exec_graph_node(layer, node); - } - - const bool data_attr_size = - data.attributes().begin() == data.attributes().end(); - if (data_attr_size) { - layer.remove_child(data); - } int port_id = 0; // @@ -780,6 +771,21 @@ void ngfunction_2_irv10(pugi::xml_node& netXml, layer.insert_move_after(output, layer.first_child()); } } + + // fill general attributes + XmlSerializer visitor(data, node_type_name, custom_opsets, constant_node_write_handler); + NGRAPH_CHECK(node->visit_attributes(visitor), "Visitor API is not supported in ", node); + rt_info::XmlSerializer{data}.serialize(node->get_rt_info()); + + if (exec_graph) { + visit_exec_graph_node(layer, node); + } + + const bool data_attr_size = + data.attributes().begin() == data.attributes().end(); + if (data_attr_size) { + layer.remove_child(data); + } } // const std::vector edge_mapping = create_edge_mapping(layer_ids, f); diff --git a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp index c63371baf89..cccad0f70a5 100644 --- a/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp +++ b/inference-engine/tests/functional/inference_engine/ir_serialization/tensor_iterator.cpp @@ -9,6 +9,7 @@ #include "ie_core.hpp" #include "ie_blob.h" #include "common_test_utils/data_utils.hpp" +#include "pugixml.hpp" #ifndef IR_SERIALIZATION_MODELS_PATH // should be already defined by cmake #define IR_SERIALIZATION_MODELS_PATH "" @@ -84,3 +85,40 @@ TEST_F(SerializationTensorIteratorTest, TiNegativeStride) { serialize_and_compare(model_path, weights); } + +TEST_F(SerializationTensorIteratorTest, SerializationExternalPortIdInXmlFile) { + const std::string model_path = IR_SERIALIZATION_MODELS_PATH "loop_2d_add.xml"; + const std::string binary_path = IR_SERIALIZATION_MODELS_PATH "loop_2d_add.bin"; + + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork expected; + pugi::xml_document loop_orig; + pugi::xml_document loop_serialized; + + expected = ie.ReadNetwork(model_path, binary_path); + expected.serialize(m_out_xml_path, m_out_bin_path); + + pugi::xml_parse_result result = loop_orig.load_file(model_path.c_str()); + ASSERT_FALSE(result.status) << result.description(); + result = loop_serialized.load_file(m_out_xml_path.c_str()); + ASSERT_FALSE(result.status) << result.description(); + + auto node1 = loop_orig.child("net").child("layers").find_child_by_attribute("type", "Loop"); + auto node2 = loop_serialized.child("net").child("layers").find_child_by_attribute("type", "Loop"); + auto node2_port_map = node2.child("port_map").first_child(); + + for (auto ch = node1.child("port_map").first_child(); !ch.empty(); ch = ch.next_sibling()) { + auto node1_external_port_id = std::stoi(ch.attribute("external_port_id").value()); + auto node2_external_port_id = std::stoi(node2_port_map.attribute("external_port_id").value()); + + if (node1_external_port_id == -1) { + continue; + } + if (node2_external_port_id == -1) { + node2_external_port_id = std::stoi(node2_port_map.next_sibling().attribute("external_port_id").value()); + } + node2_port_map = node2_port_map.next_sibling(); + + EXPECT_EQ(node1_external_port_id, node2_external_port_id); + } +} From ad852f78b47e0580a7d6cefce43bcd1ff5019115 Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Wed, 16 Jun 2021 01:00:19 +0300 Subject: [PATCH 20/43] =?UTF-8?q?[=C2=A7]=20cleans=20snippets=20interface?= =?UTF-8?q?=20and=20adds=20subgraph=20tests=20(#6123)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/snippets/include/snippets/emitter.hpp | 56 ++++++++++++ .../snippets/include/snippets/generator.hpp | 89 ++++++++++--------- .../snippets/include/snippets/op/kernel.hpp | 37 ++++++++ .../src/snippets/include/snippets/op/nop.hpp | 4 + .../snippets/include/snippets/op/subgraph.hpp | 47 +++++++++- .../src/snippets/include/snippets/op/tile.hpp | 36 ++++++++ .../src/snippets/src/generator.cpp | 59 +++++++++++- .../src/snippets/src/op/kernel.cpp | 14 +++ .../src/snippets/src/op/subgraph.cpp | 20 ++++- inference-engine/src/snippets/src/op/tile.cpp | 14 +++ .../snippets/src/pass/collapse_subgraph.cpp | 25 ++++-- .../subgraph_tests/codegen_add.cpp | 29 ++++++ .../subgraph_tests/codegen_bert.cpp | 26 ++++++ .../subgraph_tests/codegen_gelu.cpp | 26 ++++++ .../include/subgraph_tests/codegen_add.hpp | 34 +++++++ .../include/subgraph_tests/codegen_bert.hpp | 34 +++++++ .../include/subgraph_tests/codegen_gelu.hpp | 34 +++++++ .../shared/src/subgraph_tests/codegen_add.cpp | 60 +++++++++++++ .../src/subgraph_tests/codegen_bert.cpp | 76 ++++++++++++++++ .../src/subgraph_tests/codegen_gelu.cpp | 74 +++++++++++++++ 20 files changed, 740 insertions(+), 54 deletions(-) create mode 100644 inference-engine/src/snippets/include/snippets/emitter.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/kernel.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/tile.hpp create mode 100644 inference-engine/src/snippets/src/op/kernel.cpp create mode 100644 inference-engine/src/snippets/src/op/tile.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_add.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_bert.cpp create mode 100644 inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_gelu.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_add.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_bert.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_gelu.hpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_add.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_bert.cpp create mode 100644 inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_gelu.cpp diff --git a/inference-engine/src/snippets/include/snippets/emitter.hpp b/inference-engine/src/snippets/include/snippets/emitter.hpp new file mode 100644 index 00000000000..78d54098324 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/emitter.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +namespace ngraph { +namespace snippets { + +using code = const uint8_t *; +using RegInfo = std::pair, std::vector>; + +/** + * @interface Emitter + * @brief Base class for all target specific code emitters used by generator. + * @ingroup snippets + */ +class TRANSFORMATIONS_API Emitter { +public: + /** + * @brief Default constructor + */ + Emitter(const std::shared_ptr& n) { + } + + Emitter(std::vector, RegInfo>>& region) { + } + + /** + * @brief called by generator to generate code to produce target code for a specific operation + * @param in vector of vector argument registers + * @param out vector of vector resulting registers + * @param pool optional vector of free vector registers which might be used inside method + * @param gpr vector of free generam puproce registers which might be used inside method + * @return void + */ + virtual void emit_code(const std::vector& in, + const std::vector& out, + const std::vector& pool = {}, + const std::vector& gpr = {}) const = 0; + + /** + * @brief called by generator to generate data section, if needed for a specific operation + * @return void + */ + virtual void emit_data() const { + } +}; + +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/generator.hpp b/inference-engine/src/snippets/include/snippets/generator.hpp index a7408134c5e..1a7ace69854 100644 --- a/inference-engine/src/snippets/include/snippets/generator.hpp +++ b/inference-engine/src/snippets/include/snippets/generator.hpp @@ -10,49 +10,13 @@ #include #include "snippets_isa.hpp" +#include "emitter.hpp" namespace ngraph { namespace snippets { -using code = const uint8_t *; -using RegInfo = std::pair, std::vector>; - TRANSFORMATIONS_API auto getRegisters(std::shared_ptr& n) -> ngraph::snippets::RegInfo; -/** - * @interface Emitter - * @brief Base class for all target specific code emitters used by generator. - * @ingroup snippets - */ -class TRANSFORMATIONS_API Emitter { -public: - /** - * @brief Default constructor - */ - Emitter(const std::shared_ptr& n) { - } - - /** - * @brief called by generator to generate code to produce target code for a specific operation - * @param in vector of vector argument registers - * @param out vector of vector resulting registers - * @param pool optional vector of free vector registers which might be used inside method - * @param gpr vector of free generam puproce registers which might be used inside method - * @return void - */ - virtual void emit_code(const std::vector& in, - const std::vector& out, - const std::vector& pool = {}, - const std::vector& gpr = {}) const = 0; - - /** - * @brief called by generator to generate data section, if needed for a specific operation - * @return void - */ - virtual void emit_data() const { - } -}; - /** * @interface TargetMachine * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors @@ -61,12 +25,45 @@ public: class TRANSFORMATIONS_API TargetMachine { public: /** - * @brief called by generator to all the emittors available for a target machine + * @brief checks if target is natively supported + * @return true, if supported + */ + virtual bool is_supported() const = 0; + + /** + * @brief finalizes code generation + * @return generated kernel binary + */ + virtual code get_snippet() const = 0; + + /** + * @brief gets number of lanes supported by target's vector ISA + * @return number of lanes + */ + virtual size_t get_lanes() const = 0; + + /** + * @brief called by generator to all the emittor for a target machine * @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type */ - virtual auto getJitters() -> std::map(std::shared_ptr)>>{ - return {}; + std::function(std::shared_ptr)> get(const ngraph::DiscreteTypeInfo type) const { + auto jitter = jitters.find(type); + if (jitter == jitters.end()) { + throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation."); + } + return jitter->second; } + + /** + * @brief checks if emitter for a specific operation is supported + * @return true, if supported + */ + bool has(const ngraph::DiscreteTypeInfo type) const { + return jitters.find(type) != jitters.end(); + } + +protected: + std::map(std::shared_ptr)>> jitters; }; /** @@ -87,6 +84,12 @@ public: * @param p pointer to generated code */ Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {} + /** + * @brief Returns callable instanse of code pointer + */ + template K get_callable() const { + return reinterpret_cast(const_cast(ptr)); + } Shape work_size {}; bool is_flat {false}; @@ -103,7 +106,7 @@ public: /** * @brief Default constructor */ - Generator() = default; + Generator(const std::shared_ptr& t) : target(t) {} /** * @brief Default destructor */ @@ -113,10 +116,10 @@ public: * @param f runction in canonical for for table-based code generation * @return pointer to generated code */ - virtual code generate(std::shared_ptr& f) const = 0; + code generate(std::shared_ptr& f) const; protected: - mutable std::map(std::shared_ptr)>> jitters; + std::shared_ptr target; }; } // namespace snippets diff --git a/inference-engine/src/snippets/include/snippets/op/kernel.hpp b/inference-engine/src/snippets/include/snippets/op/kernel.hpp new file mode 100644 index 00000000000..f93b7109616 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/kernel.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "ngraph/op/op.hpp" +#include "snippets/emitter.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Kernel + * @brief Generated by Canonicalization and represents compute kernel legal for sheduling + * @ingroup snippets + */ +class TRANSFORMATIONS_API Kernel : public ngraph::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + Kernel(const std::vector, ngraph::snippets::RegInfo>>& region); + Kernel() = default; + + std::vector, ngraph::snippets::RegInfo>> region; + + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { + return std::make_shared(region); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/nop.hpp b/inference-engine/src/snippets/include/snippets/op/nop.hpp index 8bf6b21a2b6..815776d59a0 100644 --- a/inference-engine/src/snippets/include/snippets/op/nop.hpp +++ b/inference-engine/src/snippets/include/snippets/op/nop.hpp @@ -23,6 +23,10 @@ public: Nop(const OutputVector& arguments, const OutputVector& results); Nop() = default; + + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { + return std::make_shared(); + } }; } // namespace op diff --git a/inference-engine/src/snippets/include/snippets/op/subgraph.hpp b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp index b204fdd22fc..4b00b2bb9a0 100644 --- a/inference-engine/src/snippets/include/snippets/op/subgraph.hpp +++ b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include "snippets/generator.hpp" @@ -24,6 +25,47 @@ namespace op { */ class TRANSFORMATIONS_API Subgraph : public ngraph::op::Op { public: + // < 1, 42, 17, 15, 16> < 0, 1, 2, 3, 1> + // should be: + // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1> + // B = < 1, 1, 17, 15> -> < 1, 1, 17, 15, 16> < 0, 1, 2, 3, 1> + // D = < 1, 42, 1, 1 > -> < 1, 3, 1, 1, 16> < 0, 1, 2, 3, 1> ??? + // C = A + B + // C = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1> + // + // how it works now (multi-demention broadcast): + // [BroadcastLoad] doesn't perform post increment + // [Load] performs += vlan + // [ScalarLoad] performs += 1 + // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1> + // B = < 1, 1, 17, 15> -> < 1, 1, 17, 15, 1> < 0, 1, 2, 3, 1> + // [A] [B] + // [Load] [ScalarLoad] <- should consider AxisVector to choose right type of load + // [Broadcast] + // [Add] + // [Store] + // [C] + // C = A + B + // C = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1> + // + // Multiple-dimension broadcasts support? + // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 4> + // B = < 1, 1, 17, 15> -> < 1, 1, 17, 15, 1> < 0, 1, 2, 3, 4> + // + // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 4> + // B = < 1, 1, 17, 15> -> < 1, 3, 17, 15, 1> < 0, 1, 2, 3, 4> + // + // Collapse moat varying dimensions with broadcast + // A = < 1, 42, 17, 15> -> < 1, 3, 17, 15, 16> < 0, 1, 2, 3, 1> + // B = < 1, 1, 17, 15> -> < 1, 3, 17, 15, 1> < 0, 1, 2, 3, 1> + // + // Collapse for mixed broadcast + // A = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4> + // B = < 1, 3, 17, 1, 32> < 0, 1, 2, 3, 4> + // C = < 1, 3, 1, 15, 32> < 0, 1, 2, 3, 4> + // + // D = < 1, 3, 17, 15, 32> < 0, 1, 2, 3, 4> + // E = < 1, 3, 17, 1, 32> < 0, 1, 2, 3, 4> using BlockedShape = std::tuple; using BlockedShapeVector = std::vector; @@ -49,7 +91,8 @@ public: std::shared_ptr make_canonical_from_this(); - snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); + snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, + ngraph::pass::Manager opt = ngraph::pass::Manager()); bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; /// Set a new body for the op; body needs to satisfy requirements on inputs/outputs @@ -62,6 +105,8 @@ public: void print() const; void print_statistics(bool verbose); + void serialize() const; + static auto wrap_node_as_subgraph(const std::shared_ptr& node) -> std::shared_ptr; private: diff --git a/inference-engine/src/snippets/include/snippets/op/tile.hpp b/inference-engine/src/snippets/include/snippets/op/tile.hpp new file mode 100644 index 00000000000..874e962ef09 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/tile.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "ngraph/op/op.hpp" +#include "snippets/emitter.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Tile + * @brief Generated by Canonicalization and represents Loop in affine notation + * @ingroup snippets + */ +class TRANSFORMATIONS_API Tile : public ngraph::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + Tile(const std::vector, ngraph::snippets::RegInfo>>& region); + Tile() = default; + std::vector, ngraph::snippets::RegInfo>> region; + + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { + return std::make_shared(region); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/src/generator.cpp b/inference-engine/src/snippets/src/generator.cpp index 650c9199234..23fb777d4de 100644 --- a/inference-engine/src/snippets/src/generator.cpp +++ b/inference-engine/src/snippets/src/generator.cpp @@ -4,10 +4,18 @@ #include "snippets/generator.hpp" #include "snippets/register_info.hpp" +#include "snippets/pass/assign_registers.hpp" +#include "snippets/pass/vector_to_scalar.hpp" +#include "snippets/pass/insert_load_store.hpp" +#include "snippets/op/tile.hpp" +#include "snippets/op/kernel.hpp" + +#include auto ngraph::snippets::getRegisters(std::shared_ptr& n) -> ngraph::snippets::RegInfo { auto rt = n->get_rt_info(); + // ToDo: change to reg_t std::vector rout; if (auto rinfo = rt["reginfo"]) { auto reginfo = ngraph::as_type_ptr>>(rinfo)->get(); @@ -27,4 +35,53 @@ auto ngraph::snippets::getRegisters(std::shared_ptr& n) -> ngraph: } } return std::make_pair(rin, rout); -} \ No newline at end of file +} + +ngraph::snippets::code ngraph::snippets::Generator::generate(std::shared_ptr& f) const { + if (!target->is_supported()) + throw ngraph_error("unsupported architecture for code genration"); + + auto params = f->get_parameters(); + auto results = f->get_results(); + auto nptrs = results.size() + params.size(); + + if (nptrs > 7) { + throw ngraph_error("snippet signature should not exceed 7 arguments. got " + std::to_string(nptrs)); + } + + // vector tile + std::vector, ngraph::snippets::RegInfo>> lowered; + for (auto n : f->get_ordered_ops()) { + lowered.push_back(std::make_pair(target->get(n->get_type_info())(n), ngraph::snippets::getRegisters(n))); + } + + // scalar tile + auto f_scalar = ngraph::clone_function(*f.get()); + ngraph::pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f_scalar); + + std::vector, RegInfo>> scalar_lowered; + for (auto n : f_scalar->get_ordered_ops()) { + scalar_lowered.push_back(std::make_pair(target->get(n->get_type_info())(n), ngraph::snippets::getRegisters(n))); + } + + // wrapping into tiles + std::vector, RegInfo>> tiles; + tiles.push_back(std::make_pair(target->get(ngraph::snippets::op::Tile::type_info)(std::make_shared(lowered)), + std::make_pair(std::vector({target->get_lanes(), nptrs}), std::vector{}))); + tiles.push_back(std::make_pair(target->get(ngraph::snippets::op::Tile::type_info)(std::make_shared(scalar_lowered)), + std::make_pair(std::vector{{1, nptrs}}, std::vector{}))); + + // emission + std::shared_ptr kernel = target->get(ngraph::snippets::op::Kernel::type_info)(std::make_shared(tiles)); + kernel->emit_code({params.size(), results.size()}, {}); + + lowered.insert(lowered.end(), scalar_lowered.begin(), scalar_lowered.end()); + for (auto& op : lowered) { + op.first->emit_data(); + } + + return target->get_snippet(); +} diff --git a/inference-engine/src/snippets/src/op/kernel.cpp b/inference-engine/src/snippets/src/op/kernel.cpp new file mode 100644 index 00000000000..381db1a5076 --- /dev/null +++ b/inference-engine/src/snippets/src/op/kernel.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/kernel.hpp" +#include "snippets/generator.hpp" + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Kernel, "Kernel", 0); + +snippets::op::Kernel::Kernel(const std::vector, snippets::RegInfo>>& nested) : Op(), region(nested) { +} diff --git a/inference-engine/src/snippets/src/op/subgraph.cpp b/inference-engine/src/snippets/src/op/subgraph.cpp index 544f398c21f..f58b1d383dc 100644 --- a/inference-engine/src/snippets/src/op/subgraph.cpp +++ b/inference-engine/src/snippets/src/op/subgraph.cpp @@ -12,6 +12,7 @@ #include "snippets/pass/assign_registers.hpp" #include +#include #include #include @@ -176,9 +177,7 @@ void snippets::op::Subgraph::canonicalize(const BlockedShapeVector& output_shape if (param->get_element_type() != std::get<2>(input_shapes[i])) { throw ngraph::ngraph_error("changes in presision. Is it legal??"); } - if (param->get_shape().size() != std::get<0>(input_shapes[i]).size()) { - m_body->replace_parameter(i, std::make_shared(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i]))); - } + m_body->replace_parameter(i, std::make_shared(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i]))); } } @@ -204,16 +203,20 @@ void snippets::op::Subgraph::convert_to_snippet_dialect() { manager.run_passes(m_body); } -snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) { +snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, + ngraph::pass::Manager opt) { INTERNAL_OP_SCOPE(Subgraph); NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set"); canonicalize(output_shapes, input_shapes); convert_to_snippet_dialect(); + opt.run_passes(m_body); // generation flow snippets::pass::AssignRegisters().run_on_function(m_body); + // shedule generation should go here and be target agnostic + // actual code emission ngraph::snippets::code ptr = m_generator->generate(m_body); @@ -342,3 +345,12 @@ void snippets::op::Subgraph::print_statistics(bool verbose) { this->print(); } } + +void snippets::op::Subgraph::serialize() const { + std::stringstream xmlFile, binFile; + ngraph::pass::Serialize serializer(xmlFile, xmlFile, ngraph::pass::Serialize::Version::IR_V10); + serializer.run_on_function(get_body()); + auto m_constants = binFile.str(); + auto m_model = xmlFile.str(); + std::cout << m_model << std::endl; +} diff --git a/inference-engine/src/snippets/src/op/tile.cpp b/inference-engine/src/snippets/src/op/tile.cpp new file mode 100644 index 00000000000..6e630585e65 --- /dev/null +++ b/inference-engine/src/snippets/src/op/tile.cpp @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/tile.hpp" +#include "snippets/generator.hpp" + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Tile, "Tile", 0); + +snippets::op::Tile::Tile(const std::vector, snippets::RegInfo>>& nested) : Op(), region(nested) { +} diff --git a/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp index 6c864ec5c4f..6f05719d9d5 100644 --- a/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp +++ b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp @@ -12,7 +12,6 @@ #include #include - #include #include #include @@ -66,10 +65,6 @@ auto has_cycles_of_dependencies(const std::vector n) -> bool { if (in.get_tensor().get_element_type() != ngraph::element::f32) { return false; } + + if (in.get_partial_shape().is_dynamic()) { + return false; + } + + if (in.get_partial_shape().is_static() && in.get_shape().size() > 6) { + return false; + } } for (auto out : n->outputs()) { @@ -196,10 +199,22 @@ auto has_supported_in_out(std::shared_ptr n) -> bool { return false; } + if (out.get_partial_shape().is_dynamic()) { + return false; + } + + if (out.get_partial_shape().is_static() && out.get_shape().size() > 6) { + return false; + } + for (auto in_out : out.get_target_inputs()) { if (!!as_type_ptr(in_out.get_node()->shared_from_this())) { return false; } + + if (!!as_type_ptr(in_out.get_node()->shared_from_this())) { + return false; + } } } diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_add.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_add.cpp new file mode 100644 index 00000000000..72c50bce2a4 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_add.cpp @@ -0,0 +1,29 @@ + +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/codegen_add.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 + }; + + INSTANTIATE_TEST_CASE_P(NoReshape, CodegenAdd, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({1, 42, 16, 64})), + ::testing::Values(InferenceEngine::SizeVector({1, 42, 16, 64}), + InferenceEngine::SizeVector({1, 42, 16, 1}), + InferenceEngine::SizeVector({1, 42, 1, 64}), + InferenceEngine::SizeVector({1, 1, 16, 64})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CodegenAdd::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_bert.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_bert.cpp new file mode 100644 index 00000000000..97b3737d693 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_bert.cpp @@ -0,0 +1,26 @@ + +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/codegen_bert.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 + }; + + INSTANTIATE_TEST_CASE_P(NoReshape, CodegenBert, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({1, 42, 16, 64})), + ::testing::Values(InferenceEngine::SizeVector({1, 42, 64, 64})), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CodegenBert::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_gelu.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_gelu.cpp new file mode 100644 index 00000000000..81b03911dab --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/codegen_gelu.cpp @@ -0,0 +1,26 @@ + +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/codegen_gelu.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 + }; + + INSTANTIATE_TEST_CASE_P(NoReshape, CodegenGelu, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(InferenceEngine::SizeVector({1, 384, 4096})), + ::testing::Values(true, false), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CodegenGelu::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_add.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_add.hpp new file mode 100644 index 00000000000..969f15c3c51 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_add.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + InferenceEngine::SizeVector, // Input 0 Shape + InferenceEngine::SizeVector, // Input 1 Shape + std::string // Target Device +> multiInputParams; + +class CodegenAdd : public testing::WithParamInterface, +public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_bert.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_bert.hpp new file mode 100644 index 00000000000..449f259f348 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_bert.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + InferenceEngine::SizeVector, // Input 0 Shape + InferenceEngine::SizeVector, // Input 1 Shape + std::string // Target Device +> multiInputParams; + +class CodegenBert : public testing::WithParamInterface, +public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_gelu.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_gelu.hpp new file mode 100644 index 00000000000..d4c90a036d4 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/codegen_gelu.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + InferenceEngine::SizeVector, // Input Shape, + bool, + std::string // Target Device +> multiInputParams; + +class CodegenGelu : public testing::WithParamInterface, +public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_add.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_add.cpp new file mode 100644 index 00000000000..662ef3313b5 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_add.cpp @@ -0,0 +1,60 @@ + +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +#include "subgraph_tests/codegen_add.hpp" + +namespace LayerTestsDefinitions { + + std::string CodegenAdd::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes0, inputShapes1, newInputShapes; + std::string targetDevice; + std::tie(netPrecision, inputShapes0, inputShapes1, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_"; + result << "IS[1]=" << CommonTestUtils::vec2str(inputShapes1) << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); + } + + void CodegenAdd::SetUp() { + std::vector inputShape0, inputShape1; + InferenceEngine::Precision netPrecision; + std::tie(netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam(); + + auto input0 = std::make_shared(ngraph::element::f32, ngraph::Shape{inputShape0}); + auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{inputShape1}); + + auto add = std::make_shared(input0, input1); + auto neg = std::make_shared(add); + auto result = std::make_shared(neg); + + function = std::make_shared( + ngraph::ResultVector{result}, + ngraph::ParameterVector{input0, input1}, + "CodegenAdd"); + } + +TEST_P(CodegenAdd, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_bert.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_bert.cpp new file mode 100644 index 00000000000..d4e328e19b8 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_bert.cpp @@ -0,0 +1,76 @@ + +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +#include "subgraph_tests/codegen_bert.hpp" + +namespace LayerTestsDefinitions { + + std::string CodegenBert::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes0, inputShapes1, newInputShapes; + std::string targetDevice; + std::tie(netPrecision, inputShapes0, inputShapes1, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_"; + result << "IS[1]=" << CommonTestUtils::vec2str(inputShapes1) << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); + } + + // the simplest possible eltwise operation with streaming access to the data + void CodegenBert::SetUp() { + std::vector inputShape0, inputShape1; + InferenceEngine::Precision netPrecision; + std::tie(netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam(); + + auto shape = ngraph::Shape{inputShape0}; + auto input1 = std::make_shared(ngraph::element::f32, shape); + auto input2 = std::make_shared(ngraph::element::f32, shape); + + auto shapeMM = ngraph::Shape{inputShape1}; + auto input3 = std::make_shared(ngraph::element::f32, shapeMM); + + auto add = std::make_shared(input1, input2); + auto mm = std::make_shared(add, input3); + + std::vector vals(ngraph::shape_size(shape)); + for (int i = 0; i < vals.size(); i++) { + vals[i] = static_cast(i)*vals.size(); + } + + auto c0 = std::make_shared(ngraph::element::f32, shape); + auto add2 = std::make_shared(mm, c0); + + auto add3 = std::make_shared(add, add2); + auto result = std::make_shared(add3); + + function = std::make_shared( + ngraph::ResultVector{result}, + // it should be some topological order to pass parameters for reference code to be executed correctly + ngraph::ParameterVector{input1, input2, c0, input3}, + "CodegenBert"); + } + +TEST_P(CodegenBert, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_gelu.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_gelu.cpp new file mode 100644 index 00000000000..bbf7402d4fa --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/codegen_gelu.cpp @@ -0,0 +1,74 @@ + +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +#include "subgraph_tests/codegen_gelu.hpp" + +#include +#include + +#include +#include + +namespace LayerTestsDefinitions { + + std::string CodegenGelu::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes0, newInputShapes; + bool useSubgraph; + std::string targetDevice; + std::tie(netPrecision, inputShapes0, useSubgraph, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "overSnippet=" << (useSubgraph ? "yes" : "no") << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); + } + + // Gelu from bert-large-uncased-whole-word-masking-squad-fp32-onnx-0001 + void CodegenGelu::SetUp() { + std::vector inputShape0; + InferenceEngine::Precision netPrecision; + bool useSubgraph; + std::tie(netPrecision, inputShape0, useSubgraph, targetDevice) = this->GetParam(); + + auto input0 = std::make_shared(ngraph::element::f32, ngraph::Shape{inputShape0}); + auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{inputShape0}); + auto add = std::make_shared(input0, input1); + + auto gelu = std::make_shared(add); + auto result = std::make_shared(gelu); + + function = std::make_shared( + ngraph::ResultVector{result}, + ngraph::ParameterVector{input0, input1}, + "CodegenGelu"); + + if (useSubgraph) { + ngraph::pass::InitNodeInfo().run_on_function(function); + ngraph::pass::ConstantFolding().run_on_function(function); + } + } + +TEST_P(CodegenGelu, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions From ea3ed8af21b78da54e8e0d89b38cd236d3b6c7ba Mon Sep 17 00:00:00 2001 From: Gleb Kazantaev Date: Wed, 16 Jun 2021 03:12:24 +0300 Subject: [PATCH 21/43] Turn on IE and NG python APIs by default inside Model Optimizer (#5721) * Turn on IE and NG python APIs by default inside Model Optimizer * Remove fallback * Fix mo_ut * Remove MO wheel tests * Add model_optimizer custom target to gather all MO deps inside single traget * Fix PR comments --- .github/workflows/mo.yml | 38 ----------- .../ie_bridges/python/CMakeLists.txt | 4 ++ model-optimizer/CMakeLists.txt | 8 +++ model-optimizer/mo/main.py | 67 ++++++------------- model-optimizer/mo/utils/check_ie_bindings.py | 8 +++ model-optimizer/mo/utils/cli_parser.py | 9 +-- .../unit_tests/mo/utils/cli_parser_test.py | 4 +- 7 files changed, 45 insertions(+), 93 deletions(-) diff --git a/.github/workflows/mo.yml b/.github/workflows/mo.yml index eb2ea91484e..ebc0827adb0 100644 --- a/.github/workflows/mo.yml +++ b/.github/workflows/mo.yml @@ -62,42 +62,4 @@ jobs: mkdir ../mo-ut-logs python3 -m xmlrunner discover -p *_test.py --output=../mo-ut-logs working-directory: model-optimizer - - build_wheel: - name: Build Python wheel - runs-on: ubuntu-18.04 - steps: - - uses: actions/checkout@v2 - - name: Install dependencies - run: | - python3 -m pip install --upgrade pip - python3 -m pip install wheel setuptools - python3 -m pip install tensorflow==2.3.0 - - - name: Build - run: | - python3 setup.py sdist bdist_wheel - working-directory: model-optimizer - - - name: Test package content - run: | - echo "src = open('openvino_mo.egg-info/SOURCES.txt', 'rt').read().split()" | tee -a test_wheel.py - echo "ref = open('automation/package_BOM.txt', 'rt').read().split()" | tee -a test_wheel.py - echo "for name in ref:" | tee -a test_wheel.py - echo " if name.endswith('.py'):" | tee -a test_wheel.py - echo " assert name in src or './' + name in src, name + ' file missed'" | tee -a test_wheel.py - python3 test_wheel.py - working-directory: model-optimizer - - - name: Test conversion - run: | - wget -q http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz - tar -xf mobilenet_v1_1.0_224.tgz - python3 -m pip install model-optimizer/dist/*.whl - python3 -m mo --input_model mobilenet_v1_1.0_224_frozen.pb --input_shape "[1,224,224,3]" - - - uses: actions/upload-artifact@v2 - with: - name: mo_wheel - path: "model-optimizer/dist/*.whl" diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt index 5c80af7f574..b8216b0cb34 100644 --- a/inference-engine/ie_bridges/python/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/CMakeLists.txt @@ -68,6 +68,10 @@ if(ENABLE_WHEEL) add_subdirectory(wheel) endif() +if (NGRAPH_PYTHON_BUILD_ENABLE) + add_dependencies(ie_api _pyngraph) +endif() + # install ie_cpack_add_component(${PYTHON_VERSION}) diff --git a/model-optimizer/CMakeLists.txt b/model-optimizer/CMakeLists.txt index e9d54d60818..b0e75505cc2 100644 --- a/model-optimizer/CMakeLists.txt +++ b/model-optimizer/CMakeLists.txt @@ -1,6 +1,14 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +if (NOT NGRAPH_PYTHON_BUILD_ENABLE) + message(WARNING "Please enable nGraph Python API (_pyngraph) target to enable Model Optimizer target") +elseif(NOT ENABLE_PYTHON) + message(WARNING "Please enable IE Python API (ie_api and offline_transformations_api) targets to enable Model Optimizer target") +else() + add_custom_target(model_optimizer DEPENDS ie_api offline_transformations_api inference_engine_ir_reader) +endif() + # install ie_cpack_add_component(model_optimizer) diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py index aa0eb628ad8..b4b32fa786e 100644 --- a/model-optimizer/mo/main.py +++ b/model-optimizer/mo/main.py @@ -144,20 +144,18 @@ def prepare_ir(argv: argparse.Namespace): # This try-except is additional reinsurance that the IE # dependency search does not break the MO pipeline - try: - argv.ie_is_available = find_ie_version(silent=argv.silent) - - if not argv.ie_is_available and not argv.silent: - print("[ WARNING ] Could not find the Inference Engine Python API. At this moment, the Inference Engine dependency is not required, but will be required in future releases.") - print("[ WARNING ] Consider building the Inference Engine Python API from sources or try to install OpenVINO (TM) Toolkit using \"install_prerequisites.{}\"".format( + def raise_ie_not_found(): + raise Error("Could not find the Inference Engine or nGraph Python API.\n" + "Consider building the Inference Engine and nGraph Python APIs from sources or try to install OpenVINO (TM) Toolkit using \"install_prerequisites.{}\"".format( "bat" if sys.platform == "windows" else "sh")) - # If the IE was not found, it will not print the MO version, so we have to print it manually - print("{}: \t{}".format("Model Optimizer version", get_version())) + try: + if not find_ie_version(silent=argv.silent): + raise_ie_not_found() except Exception as e: - argv.ie_is_available = False + raise_ie_not_found() # This is just to check that transform key is valid and transformations are available - check_available_transforms(parse_transform(argv.transform), argv.ie_is_available) + check_available_transforms(parse_transform(argv.transform)) if argv.legacy_ir_generation and len(argv.transform) != 0: raise Error("--legacy_ir_generation and --transform keys can not be used at the same time.") @@ -261,10 +259,6 @@ def emit_ir(graph: Graph, argv: argparse.Namespace): mean_data = deepcopy(graph.graph['mf']) if 'mf' in graph.graph else None input_names = deepcopy(graph.graph['input_names']) if 'input_names' in graph.graph else [] - # Remove temporary ie_is_available key from argv no to have it in IR - ie_is_available = argv.ie_is_available - del argv.ie_is_available - prepare_emit_ir(graph=graph, data_type=graph.graph['cmd_params'].data_type, output_dir=argv.output_dir, @@ -285,7 +279,7 @@ def emit_ir(graph: Graph, argv: argparse.Namespace): # This try-except is additional reinsurance that the IE # dependency search does not break the MO pipeline try: - if not argv.legacy_ir_generation and ie_is_available: + if not argv.legacy_ir_generation: path_to_offline_transformations = os.path.join(os.path.realpath(os.path.dirname(__file__)), 'back', 'offline_transformations.py') status = subprocess.run([sys.executable, path_to_offline_transformations, @@ -295,7 +289,7 @@ def emit_ir(graph: Graph, argv: argparse.Namespace): return_code = status.returncode except Exception as e: return_code = "failed" - log.error(e, extra={'is_warning': True}) + log.error(e) message = str(dict({ "platform": platform.system(), @@ -307,39 +301,20 @@ def emit_ir(graph: Graph, argv: argparse.Namespace): t = tm.Telemetry() t.send_event('mo', 'offline_transformations_status', message) - # if IR wasn't produced by offline_transformations step we need to fallback to IR - # produced by prepare_ir. This IR needs to be renamed from XXX_tmp.xml to XXX.xml - suffixes = [".xml", ".bin", ".mapping"] if return_code != 0: - if len(argv.transform) != 0: - # Remove temporary IR before throwing exception - for suf in suffixes: - path_to_file = orig_model_name + "_tmp" + suf - if os.path.exists(path_to_file): - os.remove(path_to_file) - raise Error("Failed to apply transformations: {}".format(argv.transform)) + raise Error("offline transformations step has failed.") - log.error("Using fallback to produce IR.", extra={'is_warning': True}) - for suf in suffixes: - # remove existing files - path_to_file = orig_model_name + suf - if os.path.exists(path_to_file): - os.remove(path_to_file) + for suf in [".xml", ".bin", ".mapping"]: + # remove existing files + path_to_file = orig_model_name + "_tmp" + suf + if os.path.exists(path_to_file): + os.remove(path_to_file) - # rename tmp IR to original name - os.rename(orig_model_name + "_tmp" + suf, orig_model_name + suf) - else: - for suf in suffixes: - # remove existing files - path_to_file = orig_model_name + "_tmp" + suf - if os.path.exists(path_to_file): - os.remove(path_to_file) - - # add meta information to IR - append_ir_info(file=orig_model_name, - meta_info=get_meta_info(argv), - mean_data=mean_data, - input_names=input_names) + # add meta information to IR + append_ir_info(file=orig_model_name, + meta_info=get_meta_info(argv), + mean_data=mean_data, + input_names=input_names) print('[ SUCCESS ] Generated IR version {} model.'.format(get_ir_version(argv))) print('[ SUCCESS ] XML file: {}.xml'.format(orig_model_name)) diff --git a/model-optimizer/mo/utils/check_ie_bindings.py b/model-optimizer/mo/utils/check_ie_bindings.py index fe03294bad1..dae7aeb9958 100644 --- a/model-optimizer/mo/utils/check_ie_bindings.py +++ b/model-optimizer/mo/utils/check_ie_bindings.py @@ -51,7 +51,13 @@ def import_core_modules(silent: bool, path_to_module: str): from openvino.offline_transformations import ApplyMOCTransformations, ApplyLowLatencyTransformation, \ GenerateMappingFile # pylint: disable=import-error,no-name-in-module + # TODO: it is temporary import to check that nGraph python API is available. But in future + # we need to replace it with Frontend imports + from ngraph.impl.op import Parameter # pylint: disable=import-error,no-name-in-module + from _pyngraph import PartialShape, Dimension # pylint: disable=import-error,no-name-in-module + import openvino # pylint: disable=import-error,no-name-in-module + import ngraph # pylint: disable=import-error,no-name-in-module if silent: return True @@ -60,6 +66,8 @@ def import_core_modules(silent: bool, path_to_module: str): mo_version = str(v.get_version()) # pylint: disable=no-member,no-name-in-module print("\t- {}: \t{}".format("Inference Engine found in", os.path.dirname(openvino.__file__))) + # TODO: when nGraph version will be available we need to start compare it to IE and MO versions. Ticket: 58091 + print("\t- {}: \t{}".format("nGraph found in", os.path.dirname(ngraph.__file__))) print("{}: \t{}".format("Inference Engine version", ie_version)) print("{}: \t{}".format("Model Optimizer version", mo_version)) diff --git a/model-optimizer/mo/utils/cli_parser.py b/model-optimizer/mo/utils/cli_parser.py index e6b2f2d2517..a7dd5df023b 100644 --- a/model-optimizer/mo/utils/cli_parser.py +++ b/model-optimizer/mo/utils/cli_parser.py @@ -1217,17 +1217,12 @@ def parse_transform(transform: str) -> list: return transforms -def check_available_transforms(transforms: list, ie_is_available: bool): +def check_available_transforms(transforms: list): """ This function check that transformations specified by user are available. :param transforms: list of user specified transformations - :param ie_is_available: True if IE Python API is available and False if it is not - :return: raises an Error if IE or transformation is not available + :return: raises an Error if transformation is not available """ - if not ie_is_available and len(transforms) != 0: - raise Error('Can not apply {} transformations due to missing Inference Engine Python API'.format( - ','.join([name for name, _ in transforms]))) - from mo.back.offline_transformations import get_available_transformations available_transforms = get_available_transformations() diff --git a/model-optimizer/unit_tests/mo/utils/cli_parser_test.py b/model-optimizer/unit_tests/mo/utils/cli_parser_test.py index c6c1c96e46a..c4d36780a68 100644 --- a/model-optimizer/unit_tests/mo/utils/cli_parser_test.py +++ b/model-optimizer/unit_tests/mo/utils/cli_parser_test.py @@ -959,11 +959,11 @@ class TransformChecker(unittest.TestCase): def test_check_low_latency_is_available(self, available_transformations): available_transformations.return_value = {"LowLatency2": None} try: - check_available_transforms([("LowLatency2", "")], True) + check_available_transforms([("LowLatency2", "")]) except Error as e: self.assertTrue(False, "Exception \"{}\" is unexpected".format(e)) @patch("mo.back.offline_transformations.get_available_transformations") def test_check_dummy_pass_is_available(self, available_transformations): available_transformations.return_value = {"LowLatency2": None} - self.assertRaises(Error, check_available_transforms, [("DummyPass", "")], True) + self.assertRaises(Error, check_available_transforms, [("DummyPass", "")]) From 3292543252c657d49de03f2d006920e217bfad0d Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Wed, 16 Jun 2021 06:43:12 +0300 Subject: [PATCH 22/43] Fix recovery of output subscript in Einsum implicit mode (#6131) * Fix recovery of output subscript in Einsum implicit mode Signed-off-by: Roman Kazantsev * Fix code style Signed-off-by: Roman Kazantsev --- model-optimizer/extensions/ops/einsum.py | 23 ++++++++++- .../unit_tests/extensions/ops/einsum_test.py | 5 +++ ngraph/core/src/op/einsum.cpp | 39 +++++++++++++++++-- ngraph/test/type_prop/einsum.cpp | 28 +++++++++++++ 4 files changed, 90 insertions(+), 5 deletions(-) diff --git a/model-optimizer/extensions/ops/einsum.py b/model-optimizer/extensions/ops/einsum.py index 907989216ac..a30bb96bdfc 100644 --- a/model-optimizer/extensions/ops/einsum.py +++ b/model-optimizer/extensions/ops/einsum.py @@ -28,6 +28,22 @@ class Einsum(Op): def backend_attrs(self): return ['equation'] + @staticmethod + def is_label_elsewhere(input_subscripts: list, label_to_check: str, excluded_subscript_inds: list) -> bool: + """ + Check if the given label is met in input subscripts excluding ones specified by a list of indices + excluded_subscript_inds + + :param input_subscripts: input subscripts among which to check if the label is met + :param label_to_check: a label to check + :param excluded_subscript_inds: indices of input subscripts to be excluded for this check + :return: True - met, False - otherwise + """ + for ind, input_subscript in enumerate(input_subscripts): + if ind not in excluded_subscript_inds and label_to_check in input_subscript: + return True + return False + @staticmethod def parse_equation(node_name: str, equation: str) -> (list, str): """ @@ -70,7 +86,12 @@ class Einsum(Op): "The output subscript of Einsum node {} must contain ellipsis".format(node_name) elif len(splitted_equation) == 1: # recover output subscript in case implicit mode - output_subscript = ''.join(input_subscripts_list) + output_subscript = "" + for ind, input_subscript in enumerate(input_subscripts_list): + labels = Einsum.extract_subscript_labels(node_name, input_subscript) + for label in labels: + if Einsum.is_label_elsewhere(input_subscripts_list, label, [ind]) is False: + output_subscript += label output_subscript = ''.join(sorted(list(set(output_subscript) - {'.'}))) if is_ellipsis_met: output_subscript = "..." + output_subscript diff --git a/model-optimizer/unit_tests/extensions/ops/einsum_test.py b/model-optimizer/unit_tests/extensions/ops/einsum_test.py index 9407fe63d5f..960db04310d 100644 --- a/model-optimizer/unit_tests/extensions/ops/einsum_test.py +++ b/model-optimizer/unit_tests/extensions/ops/einsum_test.py @@ -60,6 +60,11 @@ class TestEinsum(unittest.TestCase): ([int64_array([1, 3, 5])], "AbC", int64_array([1, 5, 3])), # mixed case letters and equation in implicit mode ([int64_array([3, 11, 1, 5]), int64_array([1, 3, 1, 7])], "a...b,B...", int64_array([3, 11, 7, 1, 3, 5])), + # inner product in implicit mode + ([int64_array([3]), int64_array([3])], "i,i", int64_array([])), + # equation with ellipsis and repeated labels in implicit mode + # "a...b,b..." is equivalent to "a...b,b...->...a" + ([int64_array([9, 1, 4, 3]), int64_array([3, 11, 7, 1])], "a...b,b...", int64_array([11, 7, 4, 9])), ]) def test_einsum(self, input_shapes, equation, ref_output_shape): graph = create_einsum_graph(input_shapes, equation) diff --git a/ngraph/core/src/op/einsum.cpp b/ngraph/core/src/op/einsum.cpp index fbf52ef888b..721a8e0211e 100644 --- a/ngraph/core/src/op/einsum.cpp +++ b/ngraph/core/src/op/einsum.cpp @@ -60,11 +60,40 @@ bool is_subscript_correct(const std::string& subscript, bool& is_ellipsis_met) return true; } +/// \brief Check if the given label is met in input subscripts excluding ones +/// specified by a vector excluded_indices +/// +/// \param input_subscripts The vector of the input subscripts +/// \param label_to_check A label to check +/// \param excluded_indices A vector of input subscript indices to be excluded +/// +/// \return true - met, false - otherwise +/// +bool is_label_elsewhere(const std::vector& input_subscripts, + const std::string& label_to_check, + const std::vector& excluded_indices) +{ + for (size_t input_ind = 0; input_ind < input_subscripts.size(); ++input_ind) + { + const auto& input_subscript = input_subscripts[input_ind]; + // the subscript is checked only if its index is not in excluded indices list + bool check_subscript = + (std::find(excluded_indices.begin(), excluded_indices.end(), input_ind) == + excluded_indices.end()); + if (check_subscript && input_subscript.find(label_to_check) != std::string::npos) + { + return true; + } + } + return false; +} + void op::v7::Einsum::parse_equation(const std::string& equation, std::vector& input_subscripts, std::string& output_subscript) { NGRAPH_OP_SCOPE(v7_Einsum_parse_equation); + constexpr char ellipsis[] = "..."; // split equation to input subscripts and an output subscript auto pos_output_delimeter = equation.find("->"); @@ -93,13 +122,15 @@ void op::v7::Einsum::parse_equation(const std::string& equation, if (pos_output_delimeter == std::string::npos) { - // recover output subscript + // equation is in implicit mode so recover output subscript output_subscript = ""; - for (auto const& input_subscript : input_subscripts) + for (size_t ind = 0; ind < input_subscripts.size(); ++ind) { - for (auto const& label : input_subscript) + auto const& input_subscript = input_subscripts[ind]; + for (auto const& label : extract_labels(input_subscript)) { - if (std::isalpha(label) && output_subscript.find(label) == std::string::npos) + if (label != ellipsis && + (is_label_elsewhere(input_subscripts, label, {ind}) == false)) { output_subscript += label; } diff --git a/ngraph/test/type_prop/einsum.cpp b/ngraph/test/type_prop/einsum.cpp index a65fb0677f4..cea89ea7076 100644 --- a/ngraph/test/type_prop/einsum.cpp +++ b/ngraph/test/type_prop/einsum.cpp @@ -186,6 +186,34 @@ TEST(type_prop, einsum_implicitmode_mixedcaseletters2) ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); } +TEST(type_prop, einsum_implicitmode_repeatedlabels) +{ + // the following equation is equivalent to "a...b,b...->...a" + std::string equation = "a...b,b..."; + const auto input1_shape = PartialShape{Dimension(3, 5), 11, 1, 3}; + const auto input2_shape = PartialShape{Dimension(1, 3), 3, 1, 7}; + const auto out_shape = PartialShape{3, 11, 7, Dimension(3, 5)}; + auto I1 = make_shared(element::f32, input1_shape); + auto I2 = make_shared(element::f32, input2_shape); + auto O = make_shared(OutputVector{I1, I2}, equation); + ASSERT_EQ(O->get_element_type(), element::f32); + ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); +} + +TEST(type_prop, einsum_implicitmode_innerprod) +{ + // the following equation is equivalent to "i,i->" + std::string equation = "i,i"; + const auto input1_shape = PartialShape{11}; + const auto input2_shape = PartialShape{Dimension(1, 20)}; + const auto out_shape = PartialShape{}; + auto I1 = make_shared(element::f32, input1_shape); + auto I2 = make_shared(element::f32, input2_shape); + auto O = make_shared(OutputVector{I1, I2}, equation); + ASSERT_EQ(O->get_element_type(), element::f32); + ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); +} + TEST(type_prop, einsum_dynamicrank_multimatmul) { std::string equation = "ab,bcd,bc->ca"; From 4fb2d83db9b4bd524498800e50de9cb9bc3e669e Mon Sep 17 00:00:00 2001 From: Patryk Elszkowski Date: Wed, 16 Jun 2021 06:09:57 +0200 Subject: [PATCH 23/43] use RTTI declaration macro and add visitor API test (#6147) --- ngraph/core/include/ngraph/op/parameter.hpp | 5 ++-- ngraph/core/src/op/parameter.cpp | 2 +- ngraph/test/CMakeLists.txt | 3 +- ngraph/test/visitors/op/parameter.cpp | 33 +++++++++++++++++++++ 4 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 ngraph/test/visitors/op/parameter.cpp diff --git a/ngraph/core/include/ngraph/op/parameter.hpp b/ngraph/core/include/ngraph/op/parameter.hpp index 50de93b529b..01b57f5b80f 100644 --- a/ngraph/core/include/ngraph/op/parameter.hpp +++ b/ngraph/core/include/ngraph/op/parameter.hpp @@ -21,8 +21,7 @@ namespace ngraph class NGRAPH_API Parameter : public op::Op { public: - static constexpr NodeTypeInfo type_info{"Parameter", 0}; - const NodeTypeInfo& get_type_info() const override { return type_info; } + NGRAPH_RTTI_DECLARATION; /// \brief Constructions a tensor-typed parameter node. Parameter() = default; /// \brief Constructions a tensor-typed parameter node. @@ -56,7 +55,7 @@ namespace ngraph protected: PartialShape m_partial_shape; element::Type m_element_type; - bool m_is_relevant_to_shapes; + bool m_is_relevant_to_shapes{false}; }; } // namespace v0 using v0::Parameter; diff --git a/ngraph/core/src/op/parameter.cpp b/ngraph/core/src/op/parameter.cpp index c8d33c49db9..ef979410a86 100644 --- a/ngraph/core/src/op/parameter.cpp +++ b/ngraph/core/src/op/parameter.cpp @@ -11,7 +11,7 @@ using namespace std; using namespace ngraph; -constexpr NodeTypeInfo op::Parameter::type_info; +NGRAPH_RTTI_DEFINITION(op::v0::Parameter, "Parameter", 0); op::Parameter::Parameter(const element::Type& element_type, const PartialShape& pshape) : m_partial_shape(pshape) diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index b9e65209bb8..2850bd9d2e6 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -252,6 +252,7 @@ set(SRC visitors/op/normalize_l2.cpp visitors/op/one_hot.cpp visitors/op/pad.cpp + visitors/op/parameter.cpp visitors/op/prior_box.cpp visitors/op/proposal.cpp visitors/op/psroi_pooling.cpp @@ -595,4 +596,4 @@ target_include_directories(unit-test PRIVATE ${FRONTEND_INCLUDE_PATH}) target_link_libraries(unit-test PRIVATE frontend_manager) add_subdirectory(frontend) -### END FRONTEND ### \ No newline at end of file +### END FRONTEND ### diff --git a/ngraph/test/visitors/op/parameter.cpp b/ngraph/test/visitors/op/parameter.cpp new file mode 100644 index 00000000000..991bf6a3e9e --- /dev/null +++ b/ngraph/test/visitors/op/parameter.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" + +#include "ngraph/ngraph.hpp" +#include "ngraph/op/util/attr_types.hpp" +#include "ngraph/opsets/opset1.hpp" +#include "ngraph/opsets/opset3.hpp" +#include "ngraph/opsets/opset4.hpp" +#include "ngraph/opsets/opset5.hpp" + +#include "util/visitor.hpp" + +using namespace ngraph; +using ngraph::test::NodeBuilder; + +TEST(attributes, parameter_op) +{ + NodeBuilder::get_ops().register_factory(); + auto parameter = + std::make_shared(element::f32, PartialShape{Dimension{1}, Dimension{4}}); + + NodeBuilder builder(parameter); + auto g_parameter = as_type_ptr(builder.create()); + + const auto expected_attr_count = 2; + EXPECT_EQ(builder.get_value_map_size(), expected_attr_count); + + EXPECT_EQ(g_parameter->get_partial_shape(), parameter->get_partial_shape()); + EXPECT_EQ(g_parameter->get_element_type(), parameter->get_element_type()); +} From c4274d4c32def52189ee8fd5f92d663c9fa00321 Mon Sep 17 00:00:00 2001 From: Jan Iwaszkiewicz Date: Wed, 16 Jun 2021 06:36:06 +0200 Subject: [PATCH 24/43] Change version of pybind (#6145) --- ngraph/python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ngraph/python/CMakeLists.txt b/ngraph/python/CMakeLists.txt index 501bfffb4b2..58d30c300fd 100644 --- a/ngraph/python/CMakeLists.txt +++ b/ngraph/python/CMakeLists.txt @@ -20,7 +20,7 @@ include(FetchContent) FetchContent_Declare( pybind11 GIT_REPOSITORY "https://github.com/pybind/pybind11.git" - GIT_TAG "v2.5.0" + GIT_TAG "v2.6.2" ) FetchContent_GetProperties(pybind11) From 59fd456ba2b7dc9e2d5bc8bdf72e891ec811dc3f Mon Sep 17 00:00:00 2001 From: Patryk Elszkowski Date: Wed, 16 Jun 2021 06:37:37 +0200 Subject: [PATCH 25/43] minor fixes in Parameter spec (#6146) * minor fixes in Parameter spec * expand supported type list and add output shape info --- docs/ops/infrastructure/Parameter_1.md | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/ops/infrastructure/Parameter_1.md b/docs/ops/infrastructure/Parameter_1.md index 807a606a375..879880002e6 100644 --- a/docs/ops/infrastructure/Parameter_1.md +++ b/docs/ops/infrastructure/Parameter_1.md @@ -11,18 +11,27 @@ * *element_type* * **Description**: the type of element of output tensor - * **Range of values**: u8, u16, u32, u64, i8, i16, i32, i64, f16, f32, boolean, bf16 - * **Type**: string + * **Range of values**: u1, u4, u8, u16, u32, u64, i4, i8, i16, i32, i64, f16, f32, boolean, bf16 + * **Type**: `string` * **Default value**: None - * **Required**: *Yes* + * **Required**: *yes* * *shape* * **Description**: the shape of the output tensor - * **Range of values**: list of non-negative integers, empty list is allowed that means 0D or scalar tensor - * **Type**: int[] + * **Range of values**: list of non-negative integers, empty list is allowed, which means 0D or scalar tensor + * **Type**: `int[]` * **Default value**: None - * **Required**: *Yes* + * **Required**: *yes* + + +**Outputs** + +* **1**: Output tensor of type *T* and shape equal to *shape* attribute. + +**Types** + +* *T*: any type from *element type* values. **Example** @@ -38,4 +47,4 @@ -``` \ No newline at end of file +``` From 8c68f8b43c1cc0d56f767446550d032033edf79a Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Wed, 16 Jun 2021 08:01:57 +0200 Subject: [PATCH 26/43] Revise reference implementation for ReduceMin operation (#5797) * Revise reference implementation for ReduceMin operation * Refactor backend unit tests * Move tests with zero dims to op_eval * Fix code style * Added minor changes * Replace CoordinateTransform for CoordinateTransformBasic * Added constant expression to set keep_dims as false * Add const qualifier to local variables * Use host tensor to retrieve and normalize axes * Re-arrange unit tests in manifest --- .../include/ngraph/runtime/reference/min.hpp | 35 +-- ngraph/core/src/op/min.cpp | 9 +- ngraph/test/CMakeLists.txt | 1 + ngraph/test/backend/reduce_min.in.cpp | 241 ---------------- ngraph/test/op_eval/reduce_min.cpp | 258 ++++++++++++++++++ ngraph/test/runtime/ie/unit_test.manifest | 28 +- 6 files changed, 286 insertions(+), 286 deletions(-) create mode 100644 ngraph/test/op_eval/reduce_min.cpp diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/min.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/min.hpp index 9cfa7c16c4d..ada31988345 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/min.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/min.hpp @@ -6,6 +6,7 @@ #include #include +#include #include "ngraph/coordinate_transform.hpp" #include "ngraph/shape_util.hpp" @@ -21,34 +22,34 @@ namespace ngraph namespace reference { template - void min(const T* arg, - T* out, - const Shape& in_shape, - const AxisSet& reduction_axes, - const bool keep_dims) + void min(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) { T minval = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); - const auto out_shape = reduce(in_shape, reduction_axes, keep_dims); - CoordinateTransform output_transform(out_shape); + constexpr bool dont_keep_dims_in_output = false; + const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output); + std::fill(out, out + shape_size(out_shape), minval); - for (const Coordinate& output_coord : output_transform) - { - out[output_transform.index(output_coord)] = minval; - } - - CoordinateTransform input_transform(in_shape); + const auto in_strides = row_major_strides(in_shape); + const auto out_strides = row_major_strides(out_shape); + CoordinateTransformBasic input_transform(in_shape); for (const Coordinate& input_coord : input_transform) { - Coordinate output_coord = reduce(input_coord, reduction_axes, keep_dims); + const Coordinate output_coord = + reduce(input_coord, reduction_axes, dont_keep_dims_in_output); - T x = arg[input_transform.index(input_coord)]; - T min = out[output_transform.index(output_coord)]; + const size_t in_idx = std::inner_product( + input_coord.begin(), input_coord.end(), in_strides.begin(), 0); + const size_t out_idx = std::inner_product( + output_coord.begin(), output_coord.end(), out_strides.begin(), 0); + + const T x = arg[in_idx]; + const T min = out[out_idx]; if (x < min) { - out[output_transform.index(output_coord)] = x; + out[out_idx] = x; } } } diff --git a/ngraph/core/src/op/min.cpp b/ngraph/core/src/op/min.cpp index 37f3512413b..36b248ed69b 100644 --- a/ngraph/core/src/op/min.cpp +++ b/ngraph/core/src/op/min.cpp @@ -5,6 +5,7 @@ #include "ngraph/op/min.hpp" #include #include "itt.hpp" +#include "ngraph/op/util/evaluate_helpers.hpp" #include "ngraph/runtime/host_tensor.hpp" #include "ngraph/runtime/reference/min.hpp" #include "ngraph/shape_util.hpp" @@ -22,7 +23,7 @@ namespace minop { out->set_shape(reduce(arg->get_shape(), axes, keep_dims)); runtime::reference::min( - arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes, keep_dims); + arg->get_data_ptr(), out->get_data_ptr(), arg->get_shape(), axes); return true; } @@ -69,7 +70,11 @@ bool op::v1::ReduceMin::evaluate(const HostTensorVector& outputs, NGRAPH_OP_SCOPE(v1_ReduceMin_evaluate); NGRAPH_CHECK(validate_host_tensor_vector(inputs, 2)); NGRAPH_CHECK(validate_host_tensor_vector(outputs, 1)); - return minop::evaluate_min(inputs[0], outputs[0], get_reduction_axes(), get_keep_dims()); + + const auto reduction_axes = get_normalized_axes_from_tensor( + inputs[1], inputs[0]->get_partial_shape().rank(), get_friendly_name()); + + return minop::evaluate_min(inputs[0], outputs[0], reduction_axes, get_keep_dims()); } bool op::v1::ReduceMin::has_evaluate() const diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 2850bd9d2e6..ca45c965c9f 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -72,6 +72,7 @@ set(SRC op_eval/mish.cpp op_eval/non_zero.cpp op_eval/reduce_max.cpp + op_eval/reduce_min.cpp op_eval/reduce_prod.cpp op_eval/reduce_sum.cpp op_eval/roi_align.cpp diff --git a/ngraph/test/backend/reduce_min.in.cpp b/ngraph/test/backend/reduce_min.in.cpp index 21565248d08..d43c96b17f7 100644 --- a/ngraph/test/backend/reduce_min.in.cpp +++ b/ngraph/test/backend/reduce_min.in.cpp @@ -124,100 +124,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_min_matrix_rows_int32) EXPECT_EQ((vector{1, 3, 5}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity(), - std::numeric_limits::infinity(), - std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{2}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity(), - std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{}; - auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); -} - NGRAPH_TEST(${BACKEND_NAME}, reduce_min_3d_to_matrix_most_sig) { Shape shape_a{3, 3, 3}; @@ -334,33 +240,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_min_3d_to_scalar_int32) EXPECT_EQ((vector{1}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 2}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, false), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the - // right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - float inf = std::numeric_limits::infinity(); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{inf, inf, inf, inf, inf, inf}), read_vector(result)); -} - // ----------------------- keep dims = true ----------------------- // NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_to_scalar) @@ -469,99 +348,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_matrix_rows_int32) EXPECT_EQ((vector{1, 3, 5}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_matrix_rows_zero) -{ - Shape shape_a{3, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 1}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity(), - std::numeric_limits::infinity(), - std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_matrix_cols_zero) -{ - // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). - Shape shape_a{0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{1, 2}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3, 3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity(), - std::numeric_limits::infinity()}), - read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_vector_zero) -{ - Shape shape_a{0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{1}; - auto axes = make_shared(element::i32, Shape{}, 0); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); -} - -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_matrix_to_scalar_zero_by_zero) -{ - Shape shape_a{0, 0}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{1, 1}; - auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - copy_data(result, vector({3})); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); -} NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_3d_to_matrix_most_sig) { @@ -679,33 +465,6 @@ NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_3d_to_scalar_int32) EXPECT_EQ((vector{1}), read_vector(result)); } -NGRAPH_TEST(${BACKEND_NAME}, reduce_min_keep_3d_eliminate_zero_dim) -{ - Shape shape_a{3, 0, 2}; - auto A = make_shared(element::f32, shape_a); - Shape shape_rt{3, 1, 2}; - auto axes = make_shared(element::i32, Shape{}, 1); - auto f = - make_shared(make_shared(A, axes, true), ParameterVector{A}); - - auto backend = runtime::Backend::create("${BACKEND_NAME}"); - - // Create some tensors for input/output - auto a = backend->create_tensor(element::f32, shape_a); - copy_data(a, vector{}); - auto result = backend->create_tensor(element::f32, shape_rt); - - // Overwrite the initial result vector to make sure we're not just coincidentally getting the - // right value. - copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); - - float inf = std::numeric_limits::infinity(); - - auto handle = backend->compile(f); - handle->call_with_validate({result}, {a}); - EXPECT_EQ((vector{inf, inf, inf, inf, inf, inf}), read_vector(result)); -} - // Dynamic NGRAPH_TEST(${BACKEND_NAME}, reduce_min_matrix_columns_dynamic) diff --git a/ngraph/test/op_eval/reduce_min.cpp b/ngraph/test/op_eval/reduce_min.cpp new file mode 100644 index 00000000000..cc74b0b6f59 --- /dev/null +++ b/ngraph/test/op_eval/reduce_min.cpp @@ -0,0 +1,258 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" +#include "ngraph/ngraph.hpp" +#include "util/test_control.hpp" +#include "util/all_close.hpp" +#include "util/all_close_f.hpp" +#include "util/ndarray.hpp" + + +using namespace std; +using namespace ngraph; + +static string s_manifest = "${MANIFEST}"; + +TEST(op_eval, reduce_min_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_min_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{2}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity(), + std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_min_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_min_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{}; + auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_min_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 2}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, false), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the + // right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + float inf = std::numeric_limits::infinity(); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{inf, inf, inf, inf, inf, inf}), read_vector(result)); +} + +TEST(op_eval, reduce_min_keep_matrix_rows_zero) +{ + Shape shape_a{3, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 1}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_min_keep_matrix_cols_zero) +{ + // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})). + Shape shape_a{0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{1, 2}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3, 3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity(), + std::numeric_limits::infinity()}), + read_vector(result)); +} + +TEST(op_eval, reduce_min_keep_vector_zero) +{ + Shape shape_a{0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{1}; + auto axes = make_shared(element::i32, Shape{}, 0); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_min_keep_matrix_to_scalar_zero_by_zero) +{ + Shape shape_a{0, 0}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{1, 1}; + auto axes = make_shared(element::i32, Shape{2}, vector{0, 1}); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + copy_data(result, vector({3})); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{std::numeric_limits::infinity()}), read_vector(result)); +} + +TEST(op_eval, reduce_min_keep_3d_eliminate_zero_dim) +{ + Shape shape_a{3, 0, 2}; + auto A = make_shared(element::f32, shape_a); + Shape shape_rt{3, 1, 2}; + auto axes = make_shared(element::i32, Shape{}, 1); + auto f = + make_shared(make_shared(A, axes, true), ParameterVector{A}); + + auto backend = runtime::Backend::create("INTERPRETER"); + + // Create some tensors for input/output + auto a = backend->create_tensor(element::f32, shape_a); + copy_data(a, vector{}); + auto result = backend->create_tensor(element::f32, shape_rt); + + // Overwrite the initial result vector to make sure we're not just coincidentally getting the + // right value. + copy_data(result, vector{2112, 2112, 2112, 2112, 2112, 2112}); + + float inf = std::numeric_limits::infinity(); + + auto handle = backend->compile(f); + handle->call_with_validate({result}, {a}); + EXPECT_EQ((vector{inf, inf, inf, inf, inf, inf}), read_vector(result)); +} diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index a0a7c8735f5..adfebe2b57a 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -375,6 +375,8 @@ reduce_sum_keep_2d_to_scalar_int8 reduce_sum_2d_to_scalar_int8 reduce_product_to_scalar_int8 reduce_product_keep_to_scalar_int8 +reduce_min_to_scalar_int8 +reduce_min_keep_to_scalar_int8 # accuracy reduce_sum_keep_stable_acc reduce_sum_keep_3d_to_scalar_int32 @@ -441,34 +443,8 @@ onnx_dyn_shapes_model_tile_static gather_4d_indices_axis_0_uint8 tensor_constant_with_op constant_equality_bool -reduce_min_matrix_columns -reduce_min_matrix_rows -reduce_min_matrix_rows_int32 -reduce_min_3d_to_matrix_most_sig -reduce_min_3d_to_matrix_least_sig -reduce_min_keep_matrix_columns -reduce_min_keep_matrix_rows -reduce_min_keep_matrix_rows_int32 -reduce_min_keep_3d_to_matrix_most_sig -reduce_min_keep_3d_to_matrix_least_sig -reduce_min_matrix_columns_dynamic -reduce_min_matrix_rows_dynamic -reduce_min_keep_matrix_columns_dynamic -reduce_min_keep_matrix_rows_dynamic # zero dimension / result mismatch -reduce_min_to_scalar_int8 -reduce_min_matrix_rows_zero -reduce_min_matrix_cols_zero -reduce_min_vector_zero -reduce_min_matrix_to_scalar_zero_by_zero -reduce_min_3d_eliminate_zero_dim -reduce_min_keep_to_scalar_int8 -reduce_min_keep_matrix_rows_zero -reduce_min_keep_matrix_cols_zero -reduce_min_keep_vector_zero -reduce_min_keep_matrix_to_scalar_zero_by_zero -reduce_min_keep_3d_eliminate_zero_dim reduce_mean_to_scalar_int8 reduce_mean_matrix_rows_int32 reduce_mean_keep_to_scalar_int8 From b36c7736da8fd4f08cdc3f4d6d81af653622cf31 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Wed, 16 Jun 2021 09:06:19 +0300 Subject: [PATCH 27/43] Deprecated legacy maximum_value method (#6178) * Deprecated legacy maximum_value method since it is less reliable than the existing one * Style and doxygen \deprecated comment --- .../common/src/ngraph/operations/static_shape_broadcast.cpp | 1 - inference-engine/src/vpu/common/src/ngraph/utilities.cpp | 1 - ngraph/core/include/ngraph/validation_util.hpp | 3 +++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp index a52bf97e707..1490bd408e1 100644 --- a/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp +++ b/inference-engine/src/vpu/common/src/ngraph/operations/static_shape_broadcast.cpp @@ -8,7 +8,6 @@ #include "vpu/utils/error.hpp" #include "ngraph/opsets/opset3.hpp" -#include "ngraph/evaluator.hpp" #include namespace ngraph { namespace vpu { namespace op { diff --git a/inference-engine/src/vpu/common/src/ngraph/utilities.cpp b/inference-engine/src/vpu/common/src/ngraph/utilities.cpp index 47afb6a83a6..23576617f67 100644 --- a/inference-engine/src/vpu/common/src/ngraph/utilities.cpp +++ b/inference-engine/src/vpu/common/src/ngraph/utilities.cpp @@ -6,7 +6,6 @@ #include "ngraph/opsets/opset3.hpp" #include "ngraph/opsets/opset5.hpp" -#include "ngraph/evaluator.hpp" #include diff --git a/ngraph/core/include/ngraph/validation_util.hpp b/ngraph/core/include/ngraph/validation_util.hpp index d8922f132a9..de81ebfd171 100644 --- a/ngraph/core/include/ngraph/validation_util.hpp +++ b/ngraph/core/include/ngraph/validation_util.hpp @@ -234,6 +234,9 @@ namespace ngraph /// \brief Try to compute the maximum value of value /// \return (true, max_value) if can be determined, or (false, numeric_limits::max()) /// if not. + /// \deprecated Use evaluate_upper_bound instead + NGRAPH_DEPRECATED( + "Use evaluate_upper_bound: it would return HostTensorPtr to the value instead of a pair") NGRAPH_API std::pair maximum_value(const Output& value); /// \brief Evaluates outputs, treating values in value_map as already computed. value_map is From 2f81968a3106dbd8f4adaa7bcf8bec778c84ce1c Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 16 Jun 2021 09:27:16 +0300 Subject: [PATCH 28/43] [IE CLDNN] Introduced new runtime API (#5417) --- .../src/cldnn_engine/CMakeLists.txt | 2 +- .../src/cldnn_engine/cldnn_common_utils.h | 2 +- .../src/cldnn_engine/cldnn_config.h | 2 +- .../src/cldnn_engine/cldnn_custom_layer.h | 2 +- .../src/cldnn_engine/cldnn_engine.cpp | 19 +- .../src/cldnn_engine/cldnn_engine.h | 4 +- .../cldnn_engine/cldnn_executable_network.cpp | 11 +- .../src/cldnn_engine/cldnn_graph.cpp | 34 +- .../src/cldnn_engine/cldnn_graph.h | 6 +- .../src/cldnn_engine/cldnn_infer_request.cpp | 223 +- .../src/cldnn_engine/cldnn_infer_request.h | 9 +- .../src/cldnn_engine/cldnn_program.cpp | 4 +- .../src/cldnn_engine/cldnn_program.h | 14 +- .../src/cldnn_engine/cldnn_remote_context.cpp | 109 +- .../src/cldnn_engine/cldnn_remote_context.h | 39 +- .../src/cldnn_engine/ops/batch_to_space.cpp | 2 +- .../src/cldnn_engine/ops/broadcast.cpp | 6 +- .../src/cldnn_engine/ops/concat.cpp | 2 +- .../src/cldnn_engine/ops/constant.cpp | 9 +- .../src/cldnn_engine/ops/convert.cpp | 2 +- .../src/cldnn_engine/ops/convolution.cpp | 10 +- .../cldnn_engine/ops/ctc_greedy_decoder.cpp | 10 +- .../src/cldnn_engine/ops/cum_sum.cpp | 2 +- .../src/cldnn_engine/ops/custom.cpp | 4 +- .../src/cldnn_engine/ops/depth_to_space.cpp | 2 +- .../src/cldnn_engine/ops/detection_output.cpp | 2 +- .../src/cldnn_engine/ops/eltwise.cpp | 8 +- .../src/cldnn_engine/ops/embedding_bag.cpp | 4 +- .../ops/extract_image_patches.cpp | 2 +- .../src/cldnn_engine/ops/fake_quantize.cpp | 2 +- .../src/cldnn_engine/ops/gather tree.cpp | 4 +- .../src/cldnn_engine/ops/gather.cpp | 4 +- .../src/cldnn_engine/ops/gather_nd.cpp | 2 +- inference-engine/src/cldnn_engine/ops/grn.cpp | 2 +- .../src/cldnn_engine/ops/interpolate.cpp | 2 +- inference-engine/src/cldnn_engine/ops/lrn.cpp | 2 +- .../src/cldnn_engine/ops/matmul.cpp | 10 +- inference-engine/src/cldnn_engine/ops/mvn.cpp | 3 +- .../cldnn_engine/ops/non_max_suppression.cpp | 12 +- .../src/cldnn_engine/ops/normalize_l2.cpp | 8 +- .../src/cldnn_engine/ops/one_hot.cpp | 2 +- inference-engine/src/cldnn_engine/ops/pad.cpp | 2 +- .../src/cldnn_engine/ops/parameter.cpp | 12 +- .../src/cldnn_engine/ops/pooling.cpp | 2 +- .../src/cldnn_engine/ops/prior_box.cpp | 2 +- .../src/cldnn_engine/ops/proposal.cpp | 6 +- .../src/cldnn_engine/ops/reduce.cpp | 6 +- .../src/cldnn_engine/ops/region_yolo.cpp | 2 +- .../src/cldnn_engine/ops/reorg_yolo.cpp | 2 +- .../src/cldnn_engine/ops/reshape.cpp | 4 +- .../src/cldnn_engine/ops/result.cpp | 2 +- .../src/cldnn_engine/ops/reverse_sequence.cpp | 2 +- inference-engine/src/cldnn_engine/ops/rnn.cpp | 12 +- .../src/cldnn_engine/ops/roi_pooling.cpp | 2 +- .../ops/scatter_elements_update.cpp | 2 +- .../cldnn_engine/ops/scatter_nd_update.cpp | 2 +- .../src/cldnn_engine/ops/scatter_update.cpp | 2 +- .../src/cldnn_engine/ops/select.cpp | 6 +- .../src/cldnn_engine/ops/shuffle_channels.cpp | 2 +- .../src/cldnn_engine/ops/softmax.cpp | 4 +- .../src/cldnn_engine/ops/space_to_batch.cpp | 2 +- .../src/cldnn_engine/ops/space_to_depth.cpp | 2 +- .../src/cldnn_engine/ops/split.cpp | 2 +- .../src/cldnn_engine/ops/strided_slice.cpp | 6 +- .../src/cldnn_engine/ops/tensor_iterator.cpp | 17 +- .../src/cldnn_engine/ops/tile.cpp | 2 +- .../src/cldnn_engine/ops/topk.cpp | 6 +- .../src/cldnn_engine/ops/transpose.cpp | 2 +- .../src/cldnn_engine/ops/unary.cpp | 2 +- .../functional/plugin/gpu/CMakeLists.txt | 2 +- .../thirdparty/clDNN/CMakeLists.txt | 58 +- .../thirdparty/clDNN/api/cldnn.hpp | 244 -- .../clDNN/api/{ => cldnn/graph}/network.hpp | 67 +- .../clDNN/api/{ => cldnn/graph}/program.hpp | 25 +- .../clDNN/api/{ => cldnn/graph}/topology.hpp | 25 +- .../api/{ => cldnn/primitives}/activation.hpp | 0 .../{ => cldnn/primitives}/arg_max_min.hpp | 0 .../primitives}/average_unpooling.hpp | 0 .../{ => cldnn/primitives}/batch_to_space.hpp | 0 .../primitives}/binary_convolution.hpp | 0 .../api/{ => cldnn/primitives}/border.hpp | 0 .../api/{ => cldnn/primitives}/broadcast.hpp | 0 .../{ => cldnn/primitives}/concatenation.hpp | 0 .../api/{ => cldnn/primitives}/condition.hpp | 2 +- .../{ => cldnn/primitives}/convolution.hpp | 0 .../clDNN/api/{ => cldnn/primitives}/crop.hpp | 0 .../primitives}/ctc_greedy_decoder.hpp | 0 .../api/{ => cldnn/primitives}/cum_sum.hpp | 0 .../primitives}/custom_gpu_primitive.hpp | 2 +- .../clDNN/api/{ => cldnn/primitives}/data.hpp | 6 +- .../{ => cldnn/primitives}/deconvolution.hpp | 0 .../{ => cldnn/primitives}/depth_to_space.hpp | 0 .../primitives}/detection_output.hpp | 0 .../api/{ => cldnn/primitives}/eltwise.hpp | 0 .../{ => cldnn/primitives}/embedding_bag.hpp | 0 .../primitives}/extract_image_patches.hpp | 0 .../primitives}/fully_connected.hpp | 0 .../cldnn/primitives}/fused_conv_eltwise.hpp | 4 +- .../api/{ => cldnn/primitives}/gather.hpp | 0 .../api/{ => cldnn/primitives}/gather_nd.hpp | 17 +- .../{ => cldnn/primitives}/gather_tree.hpp | 0 .../clDNN/api/{ => cldnn/primitives}/gemm.hpp | 0 .../clDNN/api/{ => cldnn/primitives}/grn.hpp | 0 .../{ => cldnn/primitives}/input_layout.hpp | 2 +- .../clDNN/api/{ => cldnn/primitives}/loop.hpp | 2 +- .../clDNN/api/{ => cldnn/primitives}/lrn.hpp | 0 .../clDNN/api/{ => cldnn/primitives}/lstm.hpp | 0 .../{ => cldnn/primitives}/lstm_dynamic.hpp | 0 .../cldnn/primitives}/lstm_dynamic_input.hpp | 2 +- .../primitives}/lstm_dynamic_timeloop.hpp | 2 +- .../{ => cldnn/primitives}/max_unpooling.hpp | 0 .../{ => cldnn/primitives}/mutable_data.hpp | 8 +- .../clDNN/api/{ => cldnn/primitives}/mvn.hpp | 0 .../primitives}/non_max_suppression.hpp | 0 .../api/{ => cldnn/primitives}/normalize.hpp | 0 .../api/{ => cldnn/primitives}/one_hot.hpp | 0 .../api/{ => cldnn/primitives}/permute.hpp | 0 .../api/{ => cldnn/primitives}/pooling.hpp | 0 .../api/{ => cldnn/primitives}/primitive.hpp | 6 +- .../api/{ => cldnn/primitives}/prior_box.hpp | 0 .../api/{ => cldnn/primitives}/proposal.hpp | 0 .../primitives}/pyramid_roi_align.hpp | 0 .../api/{ => cldnn/primitives}/quantize.hpp | 0 .../api/{ => cldnn/primitives}/reduce.hpp | 0 .../{ => cldnn/primitives}/region_yolo.hpp | 0 .../api/{ => cldnn/primitives}/reorder.hpp | 2 +- .../api/{ => cldnn/primitives}/reorg_yolo.hpp | 0 .../api/{ => cldnn/primitives}/resample.hpp | 0 .../api/{ => cldnn/primitives}/reshape.hpp | 0 .../primitives}/reverse_sequence.hpp | 0 .../{ => cldnn/primitives}/roi_pooling.hpp | 0 .../api/{ => cldnn/primitives}/scale.hpp | 0 .../primitives}/scatter_elements_update.hpp | 0 .../primitives}/scatter_nd_update.hpp | 0 .../{ => cldnn/primitives}/scatter_update.hpp | 0 .../api/{ => cldnn/primitives}/select.hpp | 0 .../primitives}/shuffle_channels.hpp | 0 .../api/{ => cldnn/primitives}/softmax.hpp | 0 .../{ => cldnn/primitives}/space_to_batch.hpp | 0 .../{ => cldnn/primitives}/space_to_depth.hpp | 0 .../api/{ => cldnn/primitives}/split.hpp | 0 .../{ => cldnn/primitives}/strided_slice.hpp | 0 .../clDNN/api/{ => cldnn/primitives}/tile.hpp | 0 .../runtime/compounds.hpp} | 3 +- .../clDNN/api/cldnn/runtime/device.hpp | 24 + .../clDNN/api/cldnn/runtime/device_info.hpp | 76 + .../clDNN/api/cldnn/runtime/device_query.hpp | 29 + .../clDNN/api/cldnn/runtime/engine.hpp | 150 ++ .../cldnn/runtime/engine_configuration.hpp | 110 + .../cldnn/runtime/error_handler.hpp} | 5 +- .../cldnn/runtime/event.hpp} | 20 +- .../clDNN/api/cldnn/runtime/half.hpp | 55 + .../clDNN/api/cldnn/runtime/kernel.hpp | 24 + .../clDNN/api/cldnn/runtime/kernel_args.hpp | 133 ++ .../clDNN/api/{ => cldnn/runtime}/layout.hpp | 4 +- .../clDNN/api/cldnn/runtime/memory.hpp | 150 ++ .../clDNN/api/cldnn/runtime/memory_caps.hpp | 84 + .../cldnn/runtime/memory_pool.hpp} | 81 +- .../api/{ => cldnn/runtime}/profiling.hpp | 1 + .../clDNN/api/cldnn/runtime/stream.hpp | 46 + .../clDNN/api/{ => cldnn/runtime}/tensor.hpp | 7 +- .../clDNN/api/cldnn/runtime/utils.hpp | 144 ++ .../thirdparty/clDNN/api/device.hpp | 161 -- .../thirdparty/clDNN/api/engine.hpp | 189 -- .../thirdparty/clDNN/api/event.hpp | 83 - .../thirdparty/clDNN/api/memory.hpp | 278 --- .../thirdparty/clDNN/api/meta_utils.hpp | 43 - .../clDNN/api_test_builds/CMakeLists.txt | 38 +- .../clDNN/kernel_selector/CMakeLists.txt | 1 + .../kernel_selector/common/tensor_type.h | 2 + .../activation/activation_kernel_base.cpp | 2 +- .../arg_max_min/arg_max_min_kernel_axis.cpp | 2 +- .../binary_convolution_kernel_base.cpp | 4 +- .../concatenation_kernel_base.cpp | 14 +- .../concatenation_kernel_fs_b_yx_fsv32.cpp | 14 +- .../concatenation_kernel_ref.cpp | 6 +- .../convolution/convolution_kernel_base.cpp | 12 +- .../ctc_greedy_decoder_kernel_base.cpp | 2 +- .../cum_sum/cum_sum_kernel_partial_sum.cpp | 12 +- .../deconvolution_kernel_base.cpp | 2 +- .../eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp | 8 +- .../eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp | 8 +- .../eltwise/eltwise_kernel_base.cpp | 8 +- .../eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp | 9 +- ...se_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp | 8 +- .../eltwise/eltwise_kernel_vload8.cpp | 8 +- .../fused_conv_eltwise_kernel_base.cpp | 6 +- .../gather/gather_nd_kernel_ref.cpp | 16 +- .../gather/gather_nd_kernel_ref.h | 16 +- .../gather/gather_nd_kernel_selector.cpp | 16 +- .../gather/gather_nd_kernel_selector.h | 16 +- .../lstm/lstm_elt_kernel_base.cpp | 10 +- .../lstm/lstm_gemm_kernel_base.cpp | 16 +- ...m_gemv_gpu_subgroup1x64_bfyx_ff_simd16.cpp | 2 +- ...m_gemv_gpu_subgroup1x64_bfyx_hh_simd16.cpp | 2 +- .../lstm_dynamic_input_bfyx_opt.cpp | 6 +- .../lstm_dynamic_input_kernel_base.cpp | 14 +- .../lstm_dynamic_timeloop_kernel_base.cpp | 22 +- .../max_unpooling_kernel_base.cpp | 2 +- .../mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp | 30 +- .../normalize/normalize_kernel_base.cpp | 2 +- .../pooling/pooling_kernel_base.cpp | 2 +- .../quantize/quantize_kernel_base.cpp | 8 +- .../reorder/reorder_biplanar_nv12.cpp | 2 +- .../reorder/reorder_kernel_base.cpp | 6 +- .../reshape/reshape_kernel_ref.cpp | 8 +- .../roi_pooling/roi_pooling_kernel_base.cpp | 4 +- .../select/select_kernel_base.cpp | 8 +- .../clDNN/kernel_selector/core/auto_tuner.cpp | 4 +- .../clDNN/kernel_selector/core/auto_tuner.h | 2 +- .../core/cl_kernels/gather_nd_ref.cl | 14 +- .../core/cl_kernels/max_unpooling_gpu_ref.cl | 4 +- .../core/common/kernel_base_opencl.cpp | 8 +- .../core}/device_cache_reader.cpp | 27 +- .../core/device_cache_reader.h | 14 + .../kernel_selector/core/kernel_selector.cpp | 9 +- .../core/kernel_selector_common.h | 119 +- .../thirdparty/clDNN/runtime/CMakeLists.txt | 79 + .../cldnn_itt.h => runtime/cldnn_itt.hpp} | 2 +- .../thirdparty/clDNN/runtime/device_query.cpp | 30 + .../thirdparty/clDNN/runtime/engine.cpp | 160 ++ .../thirdparty/clDNN/runtime/event.cpp | 69 + .../{src/gpu => runtime}/kernels_cache.cpp | 109 +- .../kernels_cache.hpp} | 55 +- .../clDNN/runtime/kernels_factory.cpp | 22 + .../clDNN/runtime/kernels_factory.hpp | 22 + .../thirdparty/clDNN/runtime/memory.cpp | 36 + .../clDNN/{src => runtime}/memory_pool.cpp | 221 +- .../gpu => runtime/ocl}/ocl_base_event.cpp | 14 +- .../ocl/ocl_base_event.hpp} | 33 +- .../ocl/ocl_command_queues_builder.cpp} | 33 +- .../ocl/ocl_command_queues_builder.hpp} | 21 +- .../clDNN/runtime/ocl/ocl_common.hpp | 30 + .../ocl/ocl_device.cpp} | 144 +- .../clDNN/runtime/ocl/ocl_device.hpp | 42 + .../ocl/ocl_device_detector.cpp} | 106 +- .../clDNN/runtime/ocl/ocl_device_detector.hpp | 30 + .../clDNN/runtime/ocl/ocl_engine.cpp | 204 ++ .../clDNN/runtime/ocl/ocl_engine.hpp | 51 + .../clDNN/runtime/ocl/ocl_engine_factory.hpp | 19 + .../ocl/ocl_events_pool.hpp} | 39 +- .../cl2_ext.hpp => runtime/ocl/ocl_ext.hpp} | 41 +- .../clDNN/runtime/ocl/ocl_kernel.hpp | 33 + .../clDNN/runtime/ocl/ocl_kernels_factory.cpp | 21 + .../clDNN/runtime/ocl/ocl_memory.cpp | 373 +++ .../clDNN/runtime/ocl/ocl_memory.hpp | 128 ++ .../clDNN/runtime/ocl/ocl_stream.cpp | 428 ++++ .../clDNN/runtime/ocl/ocl_stream.hpp | 99 + .../gpu => runtime/ocl}/ocl_user_event.cpp | 4 +- .../ocl/ocl_user_event.hpp} | 14 +- .../ocl/ocl_wrapper.hpp} | 7 +- .../thirdparty/clDNN/src/CMakeLists.txt | 13 +- .../thirdparty/clDNN/src/activation.cpp | 2 +- .../thirdparty/clDNN/src/arg_max_min.cpp | 3 +- .../clDNN/src/average_unpooling.cpp | 2 +- .../thirdparty/clDNN/src/batch_to_space.cpp | 2 +- .../clDNN/src/binary_convolution.cpp | 2 +- .../thirdparty/clDNN/src/border.cpp | 2 +- .../thirdparty/clDNN/src/broadcast.cpp | 2 +- .../thirdparty/clDNN/src/concatenation.cpp | 2 +- .../thirdparty/clDNN/src/condition.cpp | 8 +- .../thirdparty/clDNN/src/convolution.cpp | 2 +- .../thirdparty/clDNN/src/crop.cpp | 4 +- .../thirdparty/clDNN/src/cum_sum.cpp | 2 +- .../thirdparty/clDNN/src/data.cpp | 22 +- .../thirdparty/clDNN/src/deconvolution.cpp | 2 +- .../clDNN/src/deformable_convolution.cpp | 2 +- .../thirdparty/clDNN/src/depth_to_space.cpp | 2 +- .../thirdparty/clDNN/src/detection_output.cpp | 2 +- .../thirdparty/clDNN/src/device.cpp | 58 - .../thirdparty/clDNN/src/eltwise.cpp | 2 +- .../thirdparty/clDNN/src/embedding_bag.cpp | 2 +- .../thirdparty/clDNN/src/engine.cpp | 298 --- .../thirdparty/clDNN/src/error_handler.cpp | 2 +- .../thirdparty/clDNN/src/event.cpp | 102 - .../clDNN/src/extract_image_patches.cpp | 2 +- .../thirdparty/clDNN/src/fully_connected.cpp | 2 +- .../clDNN/src/fused_conv_eltwise.cpp | 2 +- .../thirdparty/clDNN/src/gather.cpp | 2 +- .../thirdparty/clDNN/src/gather_nd.cpp | 18 +- .../thirdparty/clDNN/src/gather_tree.cpp | 2 +- .../thirdparty/clDNN/src/gemm.cpp | 2 +- .../clDNN/src/gpu/activation_gpu.cpp | 14 +- .../clDNN/src/gpu/arg_max_min_gpu.cpp | 11 +- .../clDNN/src/gpu/average_unpooling_gpu.cpp | 11 +- .../clDNN/src/gpu/batch_to_space_gpu.cpp | 6 +- .../clDNN/src/gpu/binary_convolution_gpu.cpp | 19 +- .../thirdparty/clDNN/src/gpu/border_gpu.cpp | 6 +- .../clDNN/src/gpu/broadcast_gpu.cpp | 6 +- .../clDNN/src/gpu/concatenation_gpu.cpp | 7 +- .../clDNN/src/gpu/condition_gpu.cpp | 32 +- .../clDNN/src/gpu/configuration.cpp | 29 - .../thirdparty/clDNN/src/gpu/configuration.h | 37 - .../clDNN/src/gpu/convolution_gpu.cpp | 28 +- .../thirdparty/clDNN/src/gpu/crop_gpu.cpp | 6 +- .../clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp | 6 +- .../src/gpu/custom_gpu_primitive_gpu.cpp | 78 +- .../clDNN/src/gpu/deconvolution_gpu.cpp | 15 +- .../src/gpu/deformable_convolution_gpu.cpp | 21 +- .../clDNN/src/gpu/depth_to_space_gpu.cpp | 6 +- .../clDNN/src/gpu/detection_output_cpu.cpp | 92 +- .../clDNN/src/gpu/device_cache_reader.h | 27 - .../thirdparty/clDNN/src/gpu/device_info.h | 58 - .../thirdparty/clDNN/src/gpu/eltwise_gpu.cpp | 11 +- .../clDNN/src/gpu/embedding_bag_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/events_waiter.h | 26 - .../src/gpu/extract_image_patches_gpu.cpp | 6 +- .../clDNN/src/gpu/fully_connected_gpu.cpp | 21 +- .../clDNN/src/gpu/fused_conv_eltwise_gpu.cpp | 15 +- .../thirdparty/clDNN/src/gpu/gather_gpu.cpp | 6 +- .../clDNN/src/gpu/gather_nd_gpu.cpp | 22 +- .../clDNN/src/gpu/gather_tree_gpu.cpp | 7 +- .../thirdparty/clDNN/src/gpu/gemm_gpu.cpp | 6 +- .../clDNN/src/gpu/generic_layer_gpu.cpp | 88 +- .../thirdparty/clDNN/src/gpu/grn_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/kernel.cpp | 275 --- .../thirdparty/clDNN/src/gpu/kernel.h | 96 - .../clDNN/src/gpu/kernel_runner.cpp | 90 +- .../thirdparty/clDNN/src/gpu/kernel_runner.h | 26 +- .../thirdparty/clDNN/src/gpu/loop_gpu.cpp | 77 +- .../thirdparty/clDNN/src/gpu/lrn_gpu.cpp | 6 +- .../clDNN/src/gpu/lstm_dynamic_input_gpu.cpp | 19 +- .../src/gpu/lstm_dynamic_timeloop_gpu.cpp | 25 +- .../thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp | 17 +- .../clDNN/src/gpu/lstm_gemm_gpu.cpp | 23 +- .../clDNN/src/gpu/max_unpooling_gpu.cpp | 21 +- .../thirdparty/clDNN/src/gpu/memory_gpu.cpp | 310 --- .../thirdparty/clDNN/src/gpu/memory_gpu.h | 176 -- .../clDNN/src/gpu/mutable_data_gpu.cpp | 12 +- .../thirdparty/clDNN/src/gpu/mvn_gpu.cpp | 6 +- .../clDNN/src/gpu/non_max_suppression_cpu.cpp | 126 +- .../clDNN/src/gpu/normalize_gpu.cpp | 13 +- .../thirdparty/clDNN/src/gpu/ocl_builder.h | 38 - .../clDNN/src/gpu/ocl_queue_wrapper.cpp | 158 -- .../clDNN/src/gpu/ocl_queue_wrapper.h | 96 - .../thirdparty/clDNN/src/gpu/ocl_toolkit.cpp | 236 -- .../thirdparty/clDNN/src/gpu/ocl_toolkit.h | 147 -- .../thirdparty/clDNN/src/gpu/one_hot_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/permute_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/pooling_gpu.cpp | 13 +- .../clDNN/src/gpu/primitive_gpu_base.h | 133 +- .../thirdparty/clDNN/src/gpu/proposal_gpu.cpp | 59 +- .../thirdparty/clDNN/src/gpu/push_pop_map.h | 47 - .../clDNN/src/gpu/pyramid_roi_align_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/quantize_gpu.cpp | 23 +- .../thirdparty/clDNN/src/gpu/reduce_gpu.cpp | 6 +- .../clDNN/src/gpu/region_yolo_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/register_gpu.hpp | 124 +- .../thirdparty/clDNN/src/gpu/reorder_gpu.cpp | 14 +- .../clDNN/src/gpu/reorg_yolo_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/resample_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/reshape_gpu.cpp | 6 +- .../clDNN/src/gpu/reverse_sequence_gpu.cpp | 6 +- .../clDNN/src/gpu/roi_pooling_gpu.cpp | 21 +- .../thirdparty/clDNN/src/gpu/scale_gpu.cpp | 16 +- .../src/gpu/scatter_elements_update_gpu.cpp | 6 +- .../clDNN/src/gpu/scatter_nd_update_gpu.cpp | 6 +- .../clDNN/src/gpu/scatter_update_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/select_gpu.cpp | 6 +- .../clDNN/src/gpu/shuffle_channels_gpu.cpp | 6 +- .../thirdparty/clDNN/src/gpu/softmax_gpu.cpp | 6 +- .../clDNN/src/gpu/space_to_batch_gpu.cpp | 6 +- .../clDNN/src/gpu/space_to_depth_gpu.cpp | 6 +- .../clDNN/src/gpu/strided_slice_gpu.cpp | 15 +- .../thirdparty/clDNN/src/gpu/tile_gpu.cpp | 6 +- .../clDNN/src/gpu/wait_for_events_gpu.cpp | 16 +- .../graph_optimizer/add_required_reorders.cpp | 2 +- .../basic_memory_dependencies.cpp | 2 +- .../graph_optimizer/calculate_prior_boxes.cpp | 3 +- .../src/graph_optimizer/compile_graph.cpp | 7 +- .../graph_optimizer/concat_input_order.cpp | 35 +- .../graph_optimizer/eltwise_remove_stride.cpp | 2 +- .../graph_optimizer/handle_input_padding.cpp | 4 +- .../oooq_memory_dependencies.cpp | 2 +- .../graph_optimizer/post_input_reorder.cpp | 5 +- .../graph_optimizer/post_optimize_weights.cpp | 4 +- .../graph_optimizer/pre_replace_deconv.cpp | 22 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 3 +- .../prepare_primitive_fusing.cpp | 23 +- .../graph_optimizer/prepare_quantization.cpp | 243 +- .../graph_optimizer/propagate_constants.cpp | 19 +- .../src/graph_optimizer/reorder_inputs.cpp | 3 +- .../skipped_branch_memory_dependencies.cpp | 2 +- .../strided_slice_optimize.cpp | 2 +- .../clDNN/src/include/activation_inst.h | 5 +- .../clDNN/src/include/arg_max_min_inst.h | 2 +- .../src/include/average_unpooling_inst.h | 3 +- .../clDNN/src/include/batch_to_space_inst.h | 3 +- .../src/include/binary_convolution_inst.h | 6 +- .../clDNN/src/include/border_inst.h | 5 +- .../clDNN/src/include/broadcast_inst.h | 2 +- .../clDNN/src/include/concatenation_inst.h | 3 +- .../clDNN/src/include/condition_inst.h | 17 +- .../clDNN/src/include/convolution_inst.h | 30 +- .../thirdparty/clDNN/src/include/crop_inst.h | 3 +- .../src/include/ctc_greedy_decoder_inst.h | 3 +- .../clDNN/src/include/cum_sum_inst.h | 3 +- .../src/include/custom_gpu_primitive_inst.h | 3 +- .../thirdparty/clDNN/src/include/data_inst.h | 10 +- .../clDNN/src/include/deconvolution_inst.h | 16 +- .../src/include/deformable_convolution_inst.h | 16 +- .../clDNN/src/include/depth_to_space_inst.h | 3 +- .../clDNN/src/include/detection_output_inst.h | 9 +- .../clDNN/src/include/device_impl.h | 105 - .../clDNN/src/include/eltwise_inst.h | 5 +- .../clDNN/src/include/embedding_bag_inst.h | 4 +- .../clDNN/src/include/engine_impl.h | 131 -- .../src/include/extract_image_patches_inst.h | 3 +- .../clDNN/src/include/fully_connected_inst.h | 7 +- .../src/include/fused_conv_eltwise_inst.h | 10 +- .../clDNN/src/include/gather_inst.h | 3 +- .../clDNN/src/include/gather_nd_inst.h | 19 +- .../clDNN/src/include/gather_tree_inst.h | 5 +- .../thirdparty/clDNN/src/include/gemm_inst.h | 3 +- .../clDNN/src/include/generic_layer.hpp | 5 +- .../clDNN/src/include/generic_layer_inst.h | 1 + .../thirdparty/clDNN/src/include/grn_inst.h | 3 +- .../clDNN/src/include/implementation_map.h | 2 + .../clDNN/src/include/input_layout_inst.h | 7 +- .../clDNN/src/include/internal_primitive.h | 36 - .../include/internal_primitive_type_base.h | 69 - .../src/include/kernel_selector_helper.h | 14 +- .../clDNN/src/include/layout_optimizer.h | 6 +- .../thirdparty/clDNN/src/include/loop_inst.h | 89 +- .../thirdparty/clDNN/src/include/lrn_inst.h | 3 +- .../src/include/lstm_dynamic_input_inst.h | 12 +- .../clDNN/src/include/lstm_dynamic_inst.h | 4 +- .../src/include/lstm_dynamic_timeloop_inst.h | 18 +- .../clDNN/src/include/lstm_elt_inst.h | 5 +- .../clDNN/src/include/lstm_gemm_inst.h | 11 +- .../thirdparty/clDNN/src/include/lstm_inst.h | 15 +- .../clDNN/src/include/max_unpooling_inst.h | 3 +- .../clDNN/src/include/memory_impl.h | 115 - .../thirdparty/clDNN/src/include/meta_utils.h | 32 +- .../clDNN/src/include/mutable_data_inst.h | 16 +- .../thirdparty/clDNN/src/include/mvn_inst.h | 3 +- .../clDNN/src/include/network_impl.h | 68 +- .../src/include/non_max_suppression_inst.h | 34 +- .../clDNN/src/include/normalize_inst.h | 5 +- .../clDNN/src/include/one_hot_inst.h | 5 +- .../clDNN/src/include/pass_manager.h | 2 +- .../clDNN/src/include/permute_inst.h | 3 +- .../clDNN/src/include/pooling_inst.h | 3 +- .../clDNN/src/include/primitive_inst.h | 140 +- .../clDNN/src/include/primitive_type.h | 17 +- .../clDNN/src/include/primitive_type_base.h | 24 +- .../clDNN/src/include/prior_box_inst.h | 9 +- .../clDNN/src/include/program_dump_graph.h | 3 +- .../clDNN/src/include/program_helpers.h | 6 +- .../clDNN/src/include/program_impl.h | 55 +- .../clDNN/src/include/program_node.h | 90 +- .../clDNN/src/include/proposal_inst.h | 3 +- .../src/include/pyramid_roi_align_inst.h | 12 +- .../clDNN/src/include/quantize_inst.h | 2 +- .../clDNN/src/include/reduce_inst.h | 3 +- .../clDNN/src/include/refcounted_obj.h | 135 -- .../clDNN/src/include/region_yolo_inst.h | 3 +- .../clDNN/src/include/reorder_inst.h | 8 +- .../clDNN/src/include/reorg_yolo_inst.h | 3 +- .../clDNN/src/include/resample_inst.h | 5 +- .../clDNN/src/include/reshape_inst.h | 5 +- .../clDNN/src/include/reverse_sequence_inst.h | 3 +- .../clDNN/src/include/roi_pooling_inst.h | 7 +- .../thirdparty/clDNN/src/include/scale_inst.h | 9 +- .../include/scatter_elements_update_inst.h | 2 +- .../src/include/scatter_nd_update_inst.h | 2 +- .../clDNN/src/include/scatter_update_inst.h | 3 +- .../clDNN/src/include/select_inst.h | 4 +- .../clDNN/src/include/shuffle_channels_inst.h | 3 +- .../clDNN/src/include/sliding_window_utils.h | 8 +- .../clDNN/src/include/softmax_inst.h | 3 +- .../clDNN/src/include/space_to_batch_inst.h | 4 +- .../clDNN/src/include/space_to_depth_inst.h | 4 +- .../thirdparty/clDNN/src/include/split_inst.h | 4 +- .../clDNN/src/include/strided_slice_inst.h | 6 +- .../thirdparty/clDNN/src/include/tile_inst.h | 4 +- .../clDNN/src/include/to_string_utils.h | 11 +- .../clDNN/src/include/topology_impl.h | 9 +- .../thirdparty/clDNN/src/input_layout.cpp | 16 +- .../clDNN/src/kernel_selector_helper.cpp | 24 +- .../thirdparty/clDNN/src/layout_optimizer.cpp | 2 +- .../thirdparty/clDNN/src/loop.cpp | 57 +- inference-engine/thirdparty/clDNN/src/lrn.cpp | 2 +- .../thirdparty/clDNN/src/lstm.cpp | 2 +- .../thirdparty/clDNN/src/lstm_dynamic.cpp | 2 +- .../clDNN/src/lstm_dynamic_input.cpp | 2 +- .../clDNN/src/lstm_dynamic_timeloop.cpp | 2 +- .../thirdparty/clDNN/src/lstm_elt.cpp | 2 +- .../thirdparty/clDNN/src/lstm_gemm.cpp | 2 +- .../thirdparty/clDNN/src/max_unpooling.cpp | 2 +- .../thirdparty/clDNN/src/memory.cpp | 149 -- .../thirdparty/clDNN/src/mutable_data.cpp | 77 +- .../thirdparty/clDNN/src/network.cpp | 300 +-- .../thirdparty/clDNN/src/nodes_ordering.cpp | 2 +- .../thirdparty/clDNN/src/normalize.cpp | 2 +- .../thirdparty/clDNN/src/one_hot.cpp | 2 +- .../thirdparty/clDNN/src/permute.cpp | 2 +- .../thirdparty/clDNN/src/pooling.cpp | 2 +- .../thirdparty/clDNN/src/primitive_inst.cpp | 64 +- .../thirdparty/clDNN/src/prior_box.cpp | 20 +- .../thirdparty/clDNN/src/program.cpp | 171 +- .../clDNN/src/program_dump_graph.cpp | 7 +- .../thirdparty/clDNN/src/program_helpers.cpp | 11 +- .../thirdparty/clDNN/src/program_node.cpp | 18 +- .../clDNN/src/pyramid_roi_align.cpp | 2 +- .../thirdparty/clDNN/src/quantize.cpp | 4 +- .../thirdparty/clDNN/src/reduce.cpp | 2 +- .../thirdparty/clDNN/src/reorder.cpp | 4 +- .../thirdparty/clDNN/src/resample.cpp | 2 +- .../thirdparty/clDNN/src/reshape.cpp | 4 +- .../thirdparty/clDNN/src/reverse_sequence.cpp | 2 +- .../thirdparty/clDNN/src/roi_pooling.cpp | 2 +- .../thirdparty/clDNN/src/scale.cpp | 2 +- .../clDNN/src/scatter_elements_update.cpp | 2 +- .../clDNN/src/scatter_nd_update.cpp | 2 +- .../thirdparty/clDNN/src/scatter_update.cpp | 2 +- .../thirdparty/clDNN/src/select.cpp | 2 +- .../thirdparty/clDNN/src/shuffle_channels.cpp | 2 +- .../thirdparty/clDNN/src/space_to_batch.cpp | 2 +- .../thirdparty/clDNN/src/space_to_depth.cpp | 2 +- .../thirdparty/clDNN/src/split.cpp | 4 +- .../thirdparty/clDNN/src/strided_slice.cpp | 2 +- .../thirdparty/clDNN/src/tile.cpp | 4 +- .../thirdparty/clDNN/src/topology.cpp | 11 +- .../thirdparty/clDNN/tests/CMakeLists.txt | 21 +- .../tests/module_tests/events_pool_test.cpp | 17 +- .../tests/module_tests/gpu_toolkit_test.cpp | 225 -- .../graph_manipulation_gpu_test.cpp | 77 +- .../prepare_conv_eltw_fusing.cpp | 33 +- .../module_tests}/program_impl_wrapper.h | 16 + .../module_tests}/reorder_inputs_test.cpp | 56 +- .../module_tests/test_uqr_distribution.cpp | 8 +- .../module_tests}/usm_memory_test.cpp | 104 +- .../test_cases/activation_simple_gpu_test.cpp | 304 ++- .../test_cases/add_reorders_gpu_test.cpp | 83 +- .../tests/test_cases/arg_max_gpu_test.cpp | 282 +-- .../test_cases/average_unpooling_gpu_test.cpp | 68 +- .../clDNN/tests/test_cases/barriers_test.cpp | 51 +- .../test_cases/batch_to_space_gpu_test.cpp | 125 +- .../binary_convolution_gpu_test.cpp | 128 +- .../tests/test_cases/border_gpu_test.cpp | 150 +- .../tests/test_cases/broadcast_gpu_test.cpp | 75 +- .../clDNN/tests/test_cases/cache_test.cpp | 33 +- .../tests/test_cases/cl_mem_input_test.cpp | 92 +- .../tests/test_cases/command_queue_test.cpp | 62 +- .../test_cases/concatenation_gpu_test.cpp | 180 +- .../tests/test_cases/condition_gpu_test.cpp | 193 +- .../tests/test_cases/convolution_gpu_test.cpp | 1454 ++++++------ .../clDNN/tests/test_cases/crop_gpu_test.cpp | 204 +- .../tests/test_cases/cum_sum_gpu_test.cpp | 91 +- .../test_cases/custom_gpu_primitive_test.cpp | 197 +- .../test_cases/deconvolution_gpu_test.cpp | 478 ++-- .../test_cases/depth_concatenate_gpu_test.cpp | 300 ++- .../test_cases/depth_to_space_gpu_test.cpp | 87 +- .../test_cases/detection_output_test.cpp | 311 ++- .../tests/test_cases/eltwise_gpu_test.cpp | 765 +++---- .../test_cases/embedding_bag_gpu_test.cpp | 287 ++- .../extract_image_patches_gpu_test.cpp | 71 +- .../test_cases/fully_connected_gpu_test.cpp | 246 +- .../fused_conv_eltwise_gpu_test.cpp | 89 +- .../tests/test_cases/fusings_gpu_test.cpp | 107 +- .../tests/test_cases/gather_gpu_test.cpp | 365 ++- .../tests/test_cases/gather_nd_gpu_test.cpp | 141 +- .../clDNN/tests/test_cases/gemm_gpu_test.cpp | 216 +- .../clDNN/tests/test_cases/loop_gpu_test.cpp | 117 +- .../clDNN/tests/test_cases/lrn_gpu_test.cpp | 44 +- .../test_cases/lstm_dynamic_gpu_test.cpp | 181 +- .../clDNN/tests/test_cases/lstm_gpu_test.cpp | 301 ++- .../test_cases/max_unpooling_gpu_test.cpp | 88 +- .../clDNN/tests/test_cases/memory_test.cpp | 211 +- .../clDNN/tests/test_cases/mvn_gpu_test.cpp | 176 +- .../test_cases/non_max_suppression_test.cpp | 59 +- .../tests/test_cases/normalizel2_gpu_test.cpp | 53 +- .../tests/test_cases/one_hot_gpu_test.cpp | 154 +- .../tests/test_cases/permute_gpu_test.cpp | 303 ++- .../tests/test_cases/pooling_gpu_test.cpp | 587 +++-- .../propagate_constants_gpu_test.cpp | 37 +- .../tests/test_cases/proposal_cpu_test.cpp | 72 +- .../test_cases/pyramid_roi_align_gpu_test.cpp | 30 +- .../tests/test_cases/quantize_gpu_test.cpp | 164 +- .../tests/test_cases/reduce_gpu_test.cpp | 278 ++- .../tests/test_cases/region_yolo_gpu_test.cpp | 26 +- .../test_cases/removing_output_node_test.cpp | 52 +- .../tests/test_cases/reorder_gpu_test.cpp | 434 ++-- .../tests/test_cases/resample_gpu_test.cpp | 311 ++- .../tests/test_cases/reshape_gpu_test.cpp | 106 +- .../test_cases/reverse_sequence_gpu_test.cpp | 189 +- .../clDNN/tests/test_cases/scale_gpu_test.cpp | 1994 ----------------- .../scatter_elements_update_gpu_test.cpp | 30 +- .../test_cases/scatter_nd_update_gpu_test.cpp | 544 +++-- .../test_cases/scatter_update_gpu_test.cpp | 507 +++-- .../tests/test_cases/select_gpu_test.cpp | 641 +++--- .../test_cases/shuffle_channels_test.cpp | 83 +- .../tests/test_cases/softmax_gpu_test.cpp | 187 +- .../test_cases/space_to_batch_gpu_test.cpp | 125 +- .../test_cases/space_to_depth_gpu_test.cpp | 155 +- .../spatial_concatenate_gpu_test.cpp | 322 ++- .../clDNN/tests/test_cases/split_gpu_test.cpp | 125 +- .../clDNN/tests/test_cases/streams_test.cpp | 145 +- .../test_cases/strided_slice_gpu_test.cpp | 352 ++- .../clDNN/tests/test_cases/tensor_test.cpp | 3 +- .../clDNN/tests/test_cases/tile_gpu_test.cpp | 107 +- .../clDNN/tests/test_cases/topology_test.cpp | 43 +- .../test_cases/trim_to_outputs_gpu_test.cpp | 61 +- .../tests/test_utils/instrumentation.cpp | 386 ---- .../clDNN/tests/test_utils/instrumentation.h | 39 - .../clDNN/tests/test_utils/network_test.h | 67 +- .../clDNN/tests/test_utils/test_utils.cpp | 711 +++--- .../clDNN/tests/test_utils/test_utils.h | 413 ++-- .../uniform_quantized_real_distribution.hpp | 7 +- .../clDNN/tests_core_internal/CMakeLists.txt | 137 -- .../clDNN/tests_core_internal/main.cpp | 11 - 613 files changed, 13742 insertions(+), 18513 deletions(-) delete mode 100644 inference-engine/thirdparty/clDNN/api/cldnn.hpp rename inference-engine/thirdparty/clDNN/api/{ => cldnn/graph}/network.hpp (79%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/graph}/program.hpp (97%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/graph}/topology.hpp (84%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/activation.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/arg_max_min.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/average_unpooling.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/batch_to_space.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/binary_convolution.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/border.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/broadcast.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/concatenation.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/condition.hpp (98%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/convolution.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/crop.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/ctc_greedy_decoder.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/cum_sum.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/custom_gpu_primitive.hpp (98%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/data.hpp (92%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/deconvolution.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/depth_to_space.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/detection_output.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/eltwise.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/embedding_bag.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/extract_image_patches.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/fully_connected.hpp (100%) rename inference-engine/thirdparty/clDNN/{api_extension => api/cldnn/primitives}/fused_conv_eltwise.hpp (98%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/gather.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/gather_nd.hpp (63%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/gather_tree.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/gemm.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/grn.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/input_layout.hpp (97%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/loop.hpp (99%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/lrn.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/lstm.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/lstm_dynamic.hpp (100%) rename inference-engine/thirdparty/clDNN/{api_extension => api/cldnn/primitives}/lstm_dynamic_input.hpp (98%) rename inference-engine/thirdparty/clDNN/{api_extension => api/cldnn/primitives}/lstm_dynamic_timeloop.hpp (99%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/max_unpooling.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/mutable_data.hpp (92%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/mvn.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/non_max_suppression.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/normalize.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/one_hot.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/permute.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/pooling.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/primitive.hpp (98%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/prior_box.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/proposal.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/pyramid_roi_align.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/quantize.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/reduce.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/region_yolo.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/reorder.hpp (99%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/reorg_yolo.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/resample.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/reshape.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/reverse_sequence.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/roi_pooling.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/scale.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/scatter_elements_update.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/scatter_nd_update.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/scatter_update.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/select.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/shuffle_channels.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/softmax.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/space_to_batch.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/space_to_depth.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/split.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/strided_slice.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/primitives}/tile.hpp (100%) rename inference-engine/thirdparty/clDNN/api/{compounds.h => cldnn/runtime/compounds.hpp} (98%) create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/device.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_query.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine_configuration.hpp rename inference-engine/thirdparty/clDNN/{src/include/error_handler.h => api/cldnn/runtime/error_handler.hpp} (99%) rename inference-engine/thirdparty/clDNN/{src/include/event_impl.h => api/cldnn/runtime/event.hpp} (82%) create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/half.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel_args.hpp rename inference-engine/thirdparty/clDNN/api/{ => cldnn/runtime}/layout.hpp (99%) create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory.hpp create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp rename inference-engine/thirdparty/clDNN/{src/include/memory_pool.h => api/cldnn/runtime/memory_pool.hpp} (52%) rename inference-engine/thirdparty/clDNN/api/{ => cldnn/runtime}/profiling.hpp (99%) create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/stream.hpp rename inference-engine/thirdparty/clDNN/api/{ => cldnn/runtime}/tensor.hpp (99%) create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/runtime/utils.hpp delete mode 100644 inference-engine/thirdparty/clDNN/api/device.hpp delete mode 100644 inference-engine/thirdparty/clDNN/api/engine.hpp delete mode 100644 inference-engine/thirdparty/clDNN/api/event.hpp delete mode 100644 inference-engine/thirdparty/clDNN/api/memory.hpp delete mode 100644 inference-engine/thirdparty/clDNN/api/meta_utils.hpp rename inference-engine/thirdparty/clDNN/{src/gpu => kernel_selector/core}/device_cache_reader.cpp (65%) create mode 100644 inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.h create mode 100644 inference-engine/thirdparty/clDNN/runtime/CMakeLists.txt rename inference-engine/thirdparty/clDNN/{src/include/cldnn_itt.h => runtime/cldnn_itt.hpp} (92%) create mode 100644 inference-engine/thirdparty/clDNN/runtime/device_query.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/engine.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/event.cpp rename inference-engine/thirdparty/clDNN/{src/gpu => runtime}/kernels_cache.cpp (82%) rename inference-engine/thirdparty/clDNN/{src/gpu/kernels_cache.h => runtime/kernels_cache.hpp} (70%) create mode 100644 inference-engine/thirdparty/clDNN/runtime/kernels_factory.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/kernels_factory.hpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/memory.cpp rename inference-engine/thirdparty/clDNN/{src => runtime}/memory_pool.cpp (55%) rename inference-engine/thirdparty/clDNN/{src/gpu => runtime/ocl}/ocl_base_event.cpp (93%) rename inference-engine/thirdparty/clDNN/{src/gpu/ocl_base_event.h => runtime/ocl/ocl_base_event.hpp} (76%) rename inference-engine/thirdparty/clDNN/{src/gpu/command_queues_builder.cpp => runtime/ocl/ocl_command_queues_builder.cpp} (80%) rename inference-engine/thirdparty/clDNN/{src/gpu/command_queues_builder.h => runtime/ocl/ocl_command_queues_builder.hpp} (57%) create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_common.hpp rename inference-engine/thirdparty/clDNN/{src/gpu/device_info.cpp => runtime/ocl/ocl_device.cpp} (54%) create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.hpp rename inference-engine/thirdparty/clDNN/{src/gpu/ocl_builder.cpp => runtime/ocl/ocl_device_detector.cpp} (68%) create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.hpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.hpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine_factory.hpp rename inference-engine/thirdparty/clDNN/{src/gpu/events_pool.h => runtime/ocl/ocl_events_pool.hpp} (57%) rename inference-engine/thirdparty/clDNN/{common/include/cl2_ext.hpp => runtime/ocl/ocl_ext.hpp} (97%) create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernel.hpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernels_factory.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.hpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.cpp create mode 100644 inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.hpp rename inference-engine/thirdparty/clDNN/{src/gpu => runtime/ocl}/ocl_user_event.cpp (94%) rename inference-engine/thirdparty/clDNN/{src/gpu/ocl_user_event.h => runtime/ocl/ocl_user_event.hpp} (71%) rename inference-engine/thirdparty/clDNN/{common/include/cl2_wrapper.h => runtime/ocl/ocl_wrapper.hpp} (95%) delete mode 100644 inference-engine/thirdparty/clDNN/src/device.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/engine.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/event.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/configuration.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/device_info.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/kernel.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/push_pop_map.h delete mode 100644 inference-engine/thirdparty/clDNN/src/include/device_impl.h delete mode 100644 inference-engine/thirdparty/clDNN/src/include/engine_impl.h delete mode 100644 inference-engine/thirdparty/clDNN/src/include/internal_primitive.h delete mode 100644 inference-engine/thirdparty/clDNN/src/include/internal_primitive_type_base.h delete mode 100644 inference-engine/thirdparty/clDNN/src/include/memory_impl.h delete mode 100644 inference-engine/thirdparty/clDNN/src/include/refcounted_obj.h delete mode 100644 inference-engine/thirdparty/clDNN/src/memory.cpp delete mode 100644 inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp rename inference-engine/thirdparty/clDNN/{tests_core_internal/test_cases => tests/module_tests}/graph_manipulation_gpu_test.cpp (67%) rename inference-engine/thirdparty/clDNN/{tests_core_internal/test_cases => tests/module_tests}/prepare_conv_eltw_fusing.cpp (78%) rename inference-engine/thirdparty/clDNN/{tests_core_internal => tests/module_tests}/program_impl_wrapper.h (70%) rename inference-engine/thirdparty/clDNN/{tests_core_internal/test_cases => tests/module_tests}/reorder_inputs_test.cpp (79%) rename inference-engine/thirdparty/clDNN/{tests_core_internal/test_cases => tests/module_tests}/usm_memory_test.cpp (68%) delete mode 100644 inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp delete mode 100644 inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp delete mode 100644 inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.h delete mode 100644 inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt delete mode 100644 inference-engine/thirdparty/clDNN/tests_core_internal/main.cpp diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt index ff138843dc4..161d6f16a8d 100644 --- a/inference-engine/src/cldnn_engine/CMakeLists.txt +++ b/inference-engine/src/cldnn_engine/CMakeLists.txt @@ -33,7 +33,7 @@ target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} $ ${CLDNN__OCL_ICD_INCDIRS} - ${CLDNN_TOP_FOLDER}) + ${CLDNN_TOP_FOLDER}/api) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) diff --git a/inference-engine/src/cldnn_engine/cldnn_common_utils.h b/inference-engine/src/cldnn_engine/cldnn_common_utils.h index c374a71a465..f41f2d8e134 100644 --- a/inference-engine/src/cldnn_engine/cldnn_common_utils.h +++ b/inference-engine/src/cldnn_engine/cldnn_common_utils.h @@ -5,7 +5,7 @@ #pragma once #include -#include +#include #include "ngraph/type/element_type.hpp" diff --git a/inference-engine/src/cldnn_engine/cldnn_config.h b/inference-engine/src/cldnn_engine/cldnn_config.h index 8c6d5d6c921..873c01e2188 100644 --- a/inference-engine/src/cldnn_engine/cldnn_config.h +++ b/inference-engine/src/cldnn_engine/cldnn_config.h @@ -9,7 +9,7 @@ #include "cldnn_custom_layer.h" -#include +#include namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/cldnn_custom_layer.h b/inference-engine/src/cldnn_engine/cldnn_custom_layer.h index 95a6ff4c5c9..cbe41f8831b 100644 --- a/inference-engine/src/cldnn_engine/cldnn_custom_layer.h +++ b/inference-engine/src/cldnn_engine/cldnn_custom_layer.h @@ -10,7 +10,7 @@ #include #include #include "pugixml.hpp" -#include "api/tensor.hpp" +#include "cldnn/runtime/tensor.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 171919a8077..e3e67d95f15 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -81,6 +81,8 @@ #include "cldnn_itt.h" #include "gpu/gpu_config.hpp" +#include "cldnn/runtime/device_query.hpp" + #ifdef __linux__ # include #endif @@ -117,13 +119,13 @@ struct clDNNEngine::impl { }; cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map &config) const { - auto device_info = device_map.begin()->second.get_info(); + auto device_info = device_map.begin()->second->get_info(); if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { auto val = config.at(PluginConfigParams::KEY_DEVICE_ID); if (device_map.find(val) == device_map.end()) { IE_THROW() << "Invalid device ID: " << val; } - device_info = device_map.at(val).get_info(); + device_info = device_map.at(val)->get_info(); } return device_info; @@ -445,7 +447,8 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) { RegisterPrimitives(); // try loading clDNN engine and get info from it { - cldnn::device_query device_query; + // Set OCL runtime which should be always available + cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl); device_map = device_query.get_available_devices(); } // locate global custom kernel config @@ -851,8 +854,8 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s }; static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) { - auto freqGHz = info.core_frequency / 1000.f; - auto numEUs = info.cores_count; + auto freqGHz = info.gpu_frequency / 1000.f; + auto numEUs = info.execution_units_count; auto opsPerComputeBlock = 0; auto computeBlockIPC = 1.0f; switch (dt) { @@ -894,8 +897,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::mapsecond.get_info() : - device_map.begin()->second.get_info(); + iter->second->get_info() : + device_map.begin()->second->get_info(); if (name == METRIC_KEY(SUPPORTED_METRICS)) { std::vector metrics; @@ -931,7 +934,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map #include #include -#include +#include #include #include #include "cldnn_remote_context.h" @@ -22,7 +22,7 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin, std::shared_ptr _impl; // key: device_id, value: cldnn device - std::map device_map; + std::map device_map; std::mutex engine_mutex; mutable CLDNNRemoteCLContext::Ptr m_defaultContext; diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp index 5191da35c2e..94245c1d3b6 100644 --- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp @@ -2,13 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include #include "ie_metric_helpers.hpp" -#include -#include +#include +#include +#include + +#include "ie_metric_helpers.hpp" #include #include #include @@ -27,7 +27,6 @@ #include "threading/ie_cpu_streams_executor.hpp" #include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" - using namespace InferenceEngine; using namespace InferenceEngine::details; diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index 1f835d8ac2c..2b333a38ee9 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -2,22 +2,28 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#include + +#include "cldnn_graph.h" +#include "simple_math.h" +#include +#include "cldnn_infer_request.h" + +#include +#include +#include + +#include +#include + #include #include #include #include -#include -#include -#include -#include #include #include #include -#include "cldnn_graph.h" -#include "simple_math.h" -#include -#include "cldnn_infer_request.h" -#include #include #include #include @@ -71,12 +77,10 @@ void CLDNNGraph::Build() { for (int b = m_bv_sz - 1; b >= 0; b--) { auto network = BuildNetwork(m_program->GetCompiledProgram(b)); m_networks.insert(m_networks.begin(), network); - GetEngine()->release_pending_memory(network->get_id()); } } else { auto network = BuildNetwork(m_program->GetCompiledProgram()); m_networks.emplace_back(network); - GetEngine()->release_pending_memory(network->get_id()); } UpdateImplementationsMap(); @@ -499,7 +503,7 @@ void CLDNNGraph::UpdatePerfStatistics() { } }; - std::map executedPrimitives = GetNetwork()->get_executed_primitives(); + std::map executedPrimitives = GetNetwork()->get_executed_primitives(); auto allPrimitives = GetNetwork()->get_all_primitives(); // Get profiling info for all layers @@ -521,7 +525,7 @@ void CLDNNGraph::UpdatePerfStatistics() { auto event = execIter->second; executedPrimitives.erase(execIter); - cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()}; + cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event->get_profiling_info()}; collectTimings(cldnnInfo, perfCount); perfCount.num++; @@ -534,7 +538,7 @@ void CLDNNGraph::UpdatePerfStatistics() { pcIter = perfMap.find(executedID.first); auto& perfCount = pcIter->second.second; - cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second.get_profiling_info()}; + cldnn::instrumentation::profiling_info cldnnInfo{executedID.first, executedID.second->get_profiling_info()}; collectTimings(cldnnInfo, perfCount); perfCount.num++; @@ -675,7 +679,7 @@ std::map CLDNNGraph::G executedPrimitives.find(primId) != executedPrimitives.end()) { auto event = executedPrimitives.at(primId); - cldnn::instrumentation::profiling_info cldnnInfo{primId, event.get_profiling_info()}; + cldnn::instrumentation::profiling_info cldnnInfo{primId, event->get_profiling_info()}; // Collect timings long long cpuTime = 0; diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h index 774b159a16c..5ce64712fef 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.h +++ b/inference-engine/src/cldnn_engine/cldnn_graph.h @@ -17,8 +17,8 @@ #include "ie_blob.h" #include "cpp/ie_cnn_network.h" -#include -#include +#include +#include #include #include "cldnn_custom_layer.h" @@ -43,7 +43,7 @@ public: const Config& getConfig() const { return m_config; } InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; } - std::shared_ptr GetEngine() const { return getContextImpl(m_context)->GetEngine(); } + std::shared_ptr GetEngine() const { return getContextImpl(m_context)->GetEngine(); } int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; } const std::map& GetInputLayouts() const { return m_program->GetInputLayouts(); } size_t GetNetworksCount() const { return m_networks.size(); } diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index 23f9895970d..bb923f373b9 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -19,7 +19,7 @@ using namespace InferenceEngine; namespace CLDNNPlugin { -const char CLDNNInferRequest::fp32_suffix[] = "_fp32"; +const char fp32_suffix[] = "_fp32"; const char str_not_allocated[] = "Input data was not allocated."; const char cannot_set_compound[] = "cannot set compound blob: supported only for input pre-processing"; const char wrong_nv12_blob[] = "NV12 input blob is expected for input with NV12 color format"; @@ -110,7 +110,7 @@ Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* m } } -void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& inputMem) { +void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_attach"); auto impl = getContextImpl(m_graph->GetContext()); impl->acquire_lock(); @@ -127,159 +127,66 @@ void CLDNNInferRequest::input_attach(cldnn::primitive_id name, cldnn::memory& in void CLDNNInferRequest::input_alloc(cldnn::primitive_id name, const cldnn::layout& layout) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::input_alloc"); - cldnn::memory input_mem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout); + cldnn::memory::ptr input_mem = m_graph->GetEngine()->allocate_memory(layout); input_attach(name, input_mem); } -void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory, - Blob::Ptr bptr, - buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData"); - size_t n = (bi == nullptr) ? bptr->size() : bi->buf_size; +template +void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi, cldnn::stream& stream) { + size_t n = (bi == nullptr) ? dst->size() : bi->buf_size; size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; - auto layout = outputMemory.get_layout(); + auto layout = src->get_layout(); auto size = layout.size; - auto l_padd = layout.data_padding.lower_size(); - auto u_padd = layout.data_padding.upper_size(); - auto h_padding = u_padd.spatial[0] + l_padd.spatial[0]; - auto v_padding_l = (h_padding + size.spatial[0]) * u_padd.spatial[1]; - auto v_padding_u = (h_padding + size.spatial[0]) * l_padd.spatial[1]; + auto locked_dst = dst->buffer(); + auto dst_ptr = locked_dst.as(); + if (dst_ptr == nullptr) { + IE_THROW() << "Invalid output blob"; + } + cldnn::mem_lock src_lock{ src, stream }; + T* src_ptr = src_lock.data(); + dst_ptr += offset; - auto locked = bptr->buffer(); - switch (bptr->getTensorDesc().getPrecision()) { - case Precision::FP32: { - auto out_f = locked.as(); - if (out_f == nullptr) { - IE_THROW() << "Invalid output blob"; - } - auto resPtr = outputMemory.pointer(); - float *resVec = out_f + offset; - - if (h_padding || v_padding_l || v_padding_u) { - size_t i = 0; - for (size_t b = 0; b < size.batch[0]; b++) { - for (size_t f = 0; f < size.feature[0]; f++) { - i += v_padding_l; - for (size_t y = 0; y < size.spatial[1]; y++) { - i += l_padd.spatial[0]; - for (size_t x = 0; x < size.spatial[0]; x++, i++) { - *resVec++ = resPtr[i]; + if (layout.data_padding) { + for (size_t b = 0; b < size.batch[0]; b++) { + for (size_t f = 0; f < size.feature[0]; f++) { + for (size_t w = 0; w < size.spatial[3]; w++) { + for (size_t z = 0; z < size.spatial[2]; z++) { + for (size_t y = 0; y < size.spatial[1]; y++) { + for (size_t x = 0; x < size.spatial[0]; x++) { + *dst_ptr++ = src_ptr[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]; + } } - i += u_padd.spatial[0]; } - i += v_padding_u; } } - } else { - for (size_t i = 0; i < n; i++) { - resVec[i] = resPtr[i]; - } + } + } else { + for (size_t i = 0; i < n; i++) { + dst_ptr[i] = src_ptr[i]; } } - break; - case Precision::FP16: { - auto out_f = locked.as(); - if (out_f == nullptr) { - IE_THROW() << "Invalid output blob"; - } - auto resPtr = outputMemory.pointer(); - uint16_t* resVec = out_f + offset; +} - if (h_padding || v_padding_l || v_padding_u) { - size_t i = 0; - for (size_t b = 0; b < size.batch[0]; b++) { - for (size_t f = 0; f < size.feature[0]; f++) { - i += v_padding_l; - for (size_t y = 0; y < size.spatial[1]; y++) { - i += l_padd.spatial[0]; - for (size_t x = 0; x < size.spatial[0]; x++, i++) { - *resVec++ = resPtr[i]; - } - i += u_padd.spatial[0]; - } - i += v_padding_u; - } - } - } else { - for (size_t i = 0; i < n; i++) { - resVec[i] = resPtr[i]; - } - } - } - break; - case Precision::I32: { - auto out_f = locked.as(); - if (out_f == nullptr) { - IE_THROW() << "Invalid output blob"; - } - auto resPtr = outputMemory.pointer(); - int32_t* resVec = out_f + offset; - - if (h_padding || v_padding_l || v_padding_u) { - size_t i = 0; - for (size_t b = 0; b < size.batch[0]; b++) { - for (size_t f = 0; f < size.feature[0]; f++) { - i += v_padding_l; - for (size_t y = 0; y < size.spatial[1]; y++) { - i += l_padd.spatial[0]; - for (size_t x = 0; x < size.spatial[0]; x++, i++) { - *resVec++ = resPtr[i]; - } - i += u_padd.spatial[0]; - } - i += v_padding_u; - } - } - } else { - for (size_t i = 0; i < n; i++) { - resVec[i] = resPtr[i]; - } - } - } - break; - case Precision::I64: { - auto out_f = locked.as(); - if (out_f == nullptr) { - IE_THROW() << "Invalid output blob"; - } - auto resPtr = outputMemory.pointer(); - int64_t* resVec = out_f + offset; - - if (h_padding || v_padding_l || v_padding_u) { - size_t i = 0; - for (size_t b = 0; b < size.batch[0]; b++) { - for (size_t f = 0; f < size.feature[0]; f++) { - i += v_padding_l; - for (size_t y = 0; y < size.spatial[1]; y++) { - i += l_padd.spatial[0]; - for (size_t x = 0; x < size.spatial[0]; x++, i++) { - *resVec++ = resPtr[i]; - } - i += u_padd.spatial[0]; - } - i += v_padding_u; - } - } - } else { - for (size_t i = 0; i < n; i++) { - resVec[i] = resPtr[i]; - } - } - } - break; - default: - IE_THROW() << "The plugin does not support output " << bptr->getTensorDesc().getPrecision() << " precision"; +void CLDNNInferRequest::copyOutputData(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyOutputData"); + auto& stream = m_graph->GetNetwork()->get_stream(); + switch (dst->getTensorDesc().getPrecision()) { + case Precision::FP32: copyResultToOutputBlob(src, dst, bi, stream); break; + case Precision::FP16: copyResultToOutputBlob(src, dst, bi, stream); break; + case Precision::I32: copyResultToOutputBlob(src, dst, bi, stream); break; + case Precision::I64: copyResultToOutputBlob(src, dst, bi, stream); break; + default: IE_THROW(NotImplemented) << "The plugin does not support output " << dst->getTensorDesc().getPrecision() << " precision"; } } void CLDNNInferRequest::copyInputData(std::shared_ptr network, - const cldnn::primitive_id &inputName, - const cldnn::layout& inputLayout, - const Blob &inputBlob, buf_info* bi) { + const cldnn::primitive_id &inputName, + const cldnn::layout& inputLayout, + const Blob &inputBlob, buf_info* bi) { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copyInputData"); - size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size; + size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; cldnn::primitive_id internalName = "parameter:" + inputName; @@ -287,37 +194,37 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr network, switch (inputBlob.getTensorDesc().getPrecision()) { case Precision::FP32: { float* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } case Precision::I32: { int32_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } case Precision::I64: { int64_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } case Precision::FP16: { uint16_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } case Precision::I8: { int8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } case Precision::U8: { uint8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } case Precision::BOOL: { uint8_t* blob_ptr = const_cast(locked.as()) + offset; - network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + network->set_input_data(internalName, network->get_engine().attach_memory(inputLayout, blob_ptr)); break; } default: @@ -601,6 +508,7 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr &data) void CLDNNInferRequest::AllocateInputs() { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::AllocateInputs"); auto inputLayouts = m_graph->GetInputLayouts(); + auto& stream = m_graph->GetNetwork()->get_stream(); // allocate inputs for (auto& ni : _networkInputs) { std::string name = ni.first; @@ -623,25 +531,24 @@ void CLDNNInferRequest::AllocateInputs() { input_alloc(UVName, inputLayouts.at(UVName)); size_t height = desc.getDims()[2], width = desc.getDims()[3]; - cldnn::pointer input_mem_ptr_Y = inputsMemory.at(YName).pointer(); + cldnn::mem_lock input_mem_ptr_Y{inputsMemory.at(YName), stream}; TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC); auto blobY = createInputBlob(ydesc, input_mem_ptr_Y.data()); - cldnn::pointer input_mem_ptr_UV = inputsMemory.at(UVName).pointer(); + cldnn::mem_lock input_mem_ptr_UV{ inputsMemory.at(UVName), stream }; TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC); auto blobUV = createInputBlob(uvdesc, input_mem_ptr_UV.data()); blobs.push_back(make_shared_blob(blobY, blobUV)); } _inputs[name] = desc.getDims()[0] == 1 ? blobs[0] : make_shared_blob(blobs); - } else { if (inputLayouts.find(name) == inputLayouts.end()) { IE_THROW() << "Input layout for " << name << " is not found"; } cldnn::layout layout = inputLayouts.at(name); input_alloc(name, layout); - cldnn::pointer mem_ptr = inputsMemory.at(name).pointer(); + cldnn::mem_lock mem_ptr{inputsMemory.at(name), stream}; _inputs[name] = createInputBlob(desc, mem_ptr.data()); if (desc.getPrecision() == Precision::I16 || desc.getPrecision() == Precision::U16) { @@ -685,8 +592,8 @@ void CLDNNInferRequest::AllocateOutputs() { bool can_reuse_internal_mem = !m_useStreams; for (auto& no : _networkOutputs) { std::string outputID = m_graph->MapOutputName(no.first); - cldnn::memory output_mem = m_graph->GetNetwork()->get_output_memory(outputID); - cldnn::pointer output_mem_ptr = output_mem.pointer(); + cldnn::memory::ptr output_mem = m_graph->GetNetwork()->get_output_memory(outputID); + cldnn::mem_lock output_mem_ptr{output_mem, m_graph->GetNetwork()->get_stream()}; if (output_mem_ptr.data() == nullptr) { IE_THROW() << "Empty output memory for primitive " << outputID; } @@ -824,6 +731,7 @@ CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap void CLDNNInferRequest::execAndParse() { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::execAndParse"); auto networkOutputs = m_graph->GetNetwork()->execute(); + auto& stream = m_graph->GetNetwork()->get_stream(); // Collect outputs as requested by the model for (auto& no : _networkOutputs) { @@ -835,12 +743,12 @@ void CLDNNInferRequest::execAndParse() { // mapping remote blobs not needed - // let the user take care of them explicitly if (!bptr->is()) { - auto out_ptr = outputMemory.pointer(); + cldnn::mem_lock out_ptr{outputMemory, stream}; auto blob_ptr = bptr->buffer().as(); // If Async API is used, copy of output blobs is not needed, unless SetBlob function was called. // But in the case when old API is used we have to copy data to memory provided by user. - if (blob_ptr != &out_ptr[0]) { + if (blob_ptr != out_ptr.data()) { copyOutputData(outputMemory, bptr); } } @@ -965,19 +873,20 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const IE_THROW() << "Input name mismatch."; } auto inputLayout = m_graph->GetInputLayouts().at(inputName); - auto is_same_buffer = [](const Blob& blob, const cldnn::memory& memory) -> bool { + auto is_same_buffer = [&](const Blob& blob, cldnn::memory::ptr memory) -> bool { const std::string str_not_allocated("Input data was not allocated."); - cldnn::pointer ptr = memory.pointer(); + cldnn::mem_lock ptr{memory, m_graph->GetNetwork()->get_stream()}; const uint8_t* blob_ptr = blob.cbuffer().as(); const uint8_t* mem_ptr = ptr.data(); if (blob_ptr == nullptr || mem_ptr == nullptr) { IE_THROW() << str_not_allocated; } - return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size()); + return (blob_ptr == mem_ptr) && (blob.byteSize() == memory->size()); }; cldnn::primitive_id internalName = "parameter:" + inputName; - const cldnn::memory& memory = inputsMemory.at(inputName); + cldnn::memory::ptr memory = inputsMemory.at(inputName); + auto& stream = m_graph->GetNetwork()->get_stream(); auto _nw_ptr = m_graph->GetNetwork(); auto prec = inputBlob.getTensorDesc().getPrecision(); @@ -986,8 +895,8 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const _nw_ptr->set_input_data(internalName, memory); } else if (prec == Precision::I16 || prec == Precision::U16) { // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision - const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix); - cldnn::pointer ptr = fp32_mem.pointer(); + cldnn::memory::ptr fp32_mem = inputsMemory.at(inputName+fp32_suffix); + cldnn::mem_lock ptr {fp32_mem, stream}; if (prec == Precision::I16) { copyToFloat(ptr.data(), &inputBlob); } else { @@ -1031,4 +940,4 @@ void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, co } } -}; // namespace CLDNNPlugin +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h index f9ec4d94db5..a988438e8d6 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h @@ -46,7 +46,7 @@ public: void EnableStreams() { m_useStreams = true; } protected: - std::map inputsMemory; + std::map inputsMemory; std::map outputsMap; bool m_useProfiling; @@ -60,12 +60,12 @@ protected: InferenceEngine::Blob::Ptr createInputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); - void copyOutputData(const cldnn::memory& outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); + void copyOutputData(cldnn::memory::ptr outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); void copyInputData(std::shared_ptr network, const cldnn::primitive_id &inputName, const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, buf_info* bi = nullptr); - void input_attach(cldnn::primitive_id name, cldnn::memory& inputMem); + void input_attach(cldnn::primitive_id name, cldnn::memory::ptr inputMem); void input_alloc(cldnn::primitive_id name, const cldnn::layout& layout); void AllocateInputs(); void AllocateOutputs(); @@ -76,9 +76,6 @@ protected: void PrepareInput(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob); void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob); - -private: - static const char fp32_suffix[]; }; }; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp index d30434397fd..6ff0d4ecef3 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -92,7 +92,7 @@ bool Program::CanProcessDynBatch(std::vector> ops, return true; } -Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, bool createTopologyOnly) +Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, bool createTopologyOnly) : m_config(config) , m_engine(engine) , m_curBatch(-1) @@ -128,11 +128,9 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptr(b)); m_programs.insert(m_programs.begin(), BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly)); - m_engine->release_pending_memory(0); } } else { m_programs.emplace_back(BuildProgram(ops, networkInputs, networkOutputs, createTopologyOnly)); - m_engine->release_pending_memory(0); } } diff --git a/inference-engine/src/cldnn_engine/cldnn_program.h b/inference-engine/src/cldnn_engine/cldnn_program.h index a5299d810f9..8f90b4fabb7 100644 --- a/inference-engine/src/cldnn_engine/cldnn_program.h +++ b/inference-engine/src/cldnn_engine/cldnn_program.h @@ -15,8 +15,8 @@ #include "cldnn_config.h" -#include -#include +#include +#include // Forward declarations for cldnn part namespace cldnn { @@ -69,8 +69,8 @@ public: class Program { public: - Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, bool createTopologyOnly = false); - Program(std::shared_ptr engine, const Config& config) : m_config(config), m_engine(engine), + Program(InferenceEngine::CNNNetwork& network, std::shared_ptr engine, const Config& config, bool createTopologyOnly = false); + Program(std::shared_ptr engine, const Config& config) : m_config(config), m_engine(engine), m_curBatch(-1), queryMode(false), m_max_batch(1) {} Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false), m_max_batch(1) {} @@ -100,8 +100,8 @@ public: const std::map& GetInputLayouts() const { return inputLayouts; } InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; } InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; } - const cldnn::engine& GetEngine() const { return *m_engine; } - std::shared_ptr GetEnginePtr() const { return m_engine; } + cldnn::engine& GetEngine() const { return *m_engine; } + std::shared_ptr GetEnginePtr() const { return m_engine; } const Config& GetConfig() const { return m_config; } int GetMaxBatchSizeForSingleProgram(); @@ -150,7 +150,7 @@ public: private: static factories_map_t factories_map; std::vector> m_programs; - std::shared_ptr m_engine; + std::shared_ptr m_engine; Config m_config; std::shared_ptr m_topology; diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp index f03db1c4834..ce52a5eea07 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp @@ -6,21 +6,23 @@ #include "cldnn_remote_context.h" #include "cldnn_itt.h" +#include "cldnn/runtime/device_query.hpp" + using namespace InferenceEngine; using namespace InferenceEngine::gpu; using namespace InferenceEngine::details; namespace CLDNNPlugin { -static const char unsupported_str[] = "Unsupported shared object type "; CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator; CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context, + cldnn::stream& stream, const cldnn::layout& layout, cldnn::shared_handle mem, cldnn::shared_surface surf, uint32_t plane, BlobType mem_type) : - m_context(context), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane), + m_context(context), m_stream(stream), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane), _handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) { } @@ -67,8 +69,7 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const { } bool CLDNNRemoteBlobImpl::deallocate() noexcept { - if (m_memObject != nullptr) - m_memObject.reset(); + m_memObject.reset(); return m_memObject == nullptr; } @@ -86,32 +87,7 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() { _impl->acquire_lock(); if (m_memObject == nullptr) { - auto eng = _impl->GetEngine(); - switch (m_mem_type) { - case BlobType::BT_BUF_INTERNAL: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout))); - break; - case BlobType::BT_BUF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem))); - break; -#ifdef _WIN32 - case BlobType::BT_SURF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane))); - break; - case BlobType::BT_DX_BUF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem))); - break; -#else - case BlobType::BT_SURF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane))); - break; -#endif - case BlobType::BT_IMG_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem))); - break; - default: - IE_THROW() << unsupported_str << m_mem_type; - } + allocate(); } _impl->release_lock(); @@ -120,32 +96,38 @@ void CLDNNRemoteBlobImpl::allocate_if_needed() { void CLDNNRemoteBlobImpl::allocate() noexcept { assert(m_memObject == nullptr); - std::shared_ptr eng = getContextImpl(m_context.lock())->GetEngine(); + std::shared_ptr eng = getContextImpl(m_context.lock())->GetEngine(); switch (m_mem_type) { - case BlobType::BT_BUF_INTERNAL: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::allocate(*eng, m_layout))); + case BlobType::BT_BUF_INTERNAL: { + m_memObject = eng->allocate_memory(m_layout); break; - case BlobType::BT_BUF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_buffer(*eng, m_layout, m_mem))); + } + case BlobType::BT_BUF_SHARED: { + m_memObject = eng->share_buffer(m_layout, m_mem); break; + } #ifdef _WIN32 - case BlobType::BT_SURF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_mem, m_plane))); + case BlobType::BT_SURF_SHARED: { + m_memObject = eng->share_surface(m_layout, m_mem, m_plane); break; - case BlobType::BT_DX_BUF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_dx_buffer(*eng, m_layout, m_mem))); + } + case BlobType::BT_DX_BUF_SHARED: { + m_memObject = eng->share_dx_buffer(m_layout, m_mem); break; + } #else - case BlobType::BT_SURF_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_surface(*eng, m_layout, m_surf, m_plane))); + case BlobType::BT_SURF_SHARED: { + m_memObject = eng->share_surface(m_layout, m_surf, m_plane); break; + } #endif - case BlobType::BT_IMG_SHARED: - m_memObject = std::unique_ptr(new cldnn::memory(cldnn::memory::share_image(*eng, m_layout, m_mem))); + case BlobType::BT_IMG_SHARED: { + m_memObject = eng->share_image(m_layout, m_mem); break; + } default: - m_memObject = nullptr; + m_memObject.reset(); } } @@ -165,7 +147,7 @@ std::shared_ptr CLDNNRemoteBlobImpl::getContext() const noexcept } void CLDNNRemoteBlobImpl::lock() const { - lockedHolder = std::unique_ptr>(new cldnn::pointer(m_memObject->pointer())); + lockedHolder = std::unique_ptr>(new cldnn::mem_lock(m_memObject, m_stream)); auto ptr = lockedHolder->data(); _handle = reinterpret_cast(ptr); m_allocator.regLockedBlob(_handle, this); @@ -244,7 +226,11 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr(dev, - cldnn::engine_configuration((m_config.useProfiling || + bool enable_profiling = (m_config.useProfiling || (m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) || - (m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)), - false, - m_config.dumpCustomKernels, - std::string(), - std::string(), - true, - std::string(), - m_config.sources_dumps_dir, - m_config.queuePriority, - m_config.queueThrottle, - m_config.memory_pool_on, - m_config.throughput_streams, - m_config.kernels_cache_dir, - m_config.n_threads)); + (m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache)); + cldnn::queue_types queue_type = cldnn::queue_types::out_of_order; + bool use_unified_shared_memory = true; + m_engine = cldnn::engine::create(engine_type, runtime_type, dev, cldnn::engine_configuration(enable_profiling, + queue_type, + m_config.sources_dumps_dir, + m_config.queuePriority, + m_config.queueThrottle, + m_config.memory_pool_on, + use_unified_shared_memory, + m_config.kernels_cache_dir, + m_config.n_threads)); } } ParamMap CLDNNExecutionContextImpl::getParams() const { - ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_context() } }; + ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } }; switch (m_type) { case OCL: diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.h b/inference-engine/src/cldnn_engine/cldnn_remote_context.h index a2ce1729bd1..f6a92e82c48 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.h +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.h @@ -4,15 +4,11 @@ #pragma once -#include -#include -#include -#include +#include +#include #include #include #include "cldnn_config.h" -#include -#include #include "cldnn_common_utils.h" #ifndef NOMINMAX @@ -25,6 +21,11 @@ # include #endif +#include +#include +#include +#include + namespace CLDNNPlugin { class CLDNNRemoteAllocator; @@ -41,6 +42,7 @@ public: }; explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, const cldnn::layout& layout, cldnn::shared_handle mem, cldnn::shared_surface surf, @@ -63,11 +65,12 @@ public: bool is_allocated() const noexcept; bool is_locked() const noexcept; void allocate_if_needed(); - cldnn::memory& getMemory() { return *m_memObject; } + cldnn::memory::ptr getMemory() { return m_memObject; } protected: static CLDNNRemoteAllocator m_allocator; std::weak_ptr m_context; + cldnn::stream& m_stream; // constructor stuff cldnn::shared_handle m_mem; @@ -77,9 +80,9 @@ protected: cldnn::layout m_layout; BlobType m_mem_type; - std::unique_ptr m_memObject; + cldnn::memory::ptr m_memObject; - mutable std::unique_ptr> lockedHolder; + mutable std::unique_ptr> lockedHolder; mutable void* _handle; mutable std::shared_ptr _allocator; @@ -93,13 +96,14 @@ public: using Ptr = std::shared_ptr; explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout, cldnn::shared_handle mem, cldnn::shared_surface surf, uint32_t plane, CLDNNRemoteBlobImpl::BlobType mem_type) - : _impl(context, layout, mem, surf, plane, mem_type) + : _impl(context, stream, layout, mem, surf, plane, mem_type) , TpublicAPI(desc) {} void allocate() noexcept override { _impl.allocate(); } @@ -231,6 +235,7 @@ public: } protected: + // TODO: refactor to unique_ptr std::shared_ptr m_engine; InferenceEngine::gpu_handle_param m_va_display; Config m_config; @@ -267,6 +272,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, using namespace InferenceEngine; using InferenceEngine::gpu::details::param_map_obj_getter; InferenceEngine::RemoteBlob::Ptr ret = nullptr; + auto& stream = _impl.GetEngine()->get_program_stream(); uint32_t plane = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(VA_PLANE)); #ifdef _WIN32 cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); @@ -290,11 +296,11 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, std::dynamic_pointer_cast (std::enable_shared_from_this>::shared_from_this()); #ifdef _WIN32 - ret = std::make_shared(smart_this, + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, plane, CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); #else - ret = std::make_shared(smart_this, + ret = std::make_shared(smart_this, stream, tensorDesc, layout, nullptr, surf, plane, CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); #endif @@ -311,6 +317,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, InferenceEngine::RemoteBlob::Ptr ret = nullptr; _impl.acquire_lock(); + auto& stream = _impl.GetEngine()->get_program_stream(); // try to locate previously shared object auto itr = shared_obj_reg.find(mem); @@ -327,15 +334,15 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, switch (blob_type) { case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED: - ret = std::make_shared(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED: layout.format = ImageFormatFromLayout(tensorDesc.getLayout()); - ret = std::make_shared(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; #ifdef _WIN32 case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED: - ret = std::make_shared(smart_this, tensorDesc, layout, mem, 0, 0, blob_type); + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; #endif default: @@ -354,7 +361,9 @@ class typedCLDNNExecutionContext : public TpublicContextAPI, CldnnTensorFromIEDims(tensorDesc.getDims())); auto smart_this = std::dynamic_pointer_cast (std::enable_shared_from_this>::shared_from_this()); + auto& stream = _impl.GetEngine()->get_program_stream(); return std::make_shared(smart_this, + stream, tensorDesc, layout, nullptr, 0, 0, diff --git a/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp b/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp index 51d70e05f1a..e46643f0617 100644 --- a/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp +++ b/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/batch_to_space.hpp" #include "ngraph/op/constant.hpp" -#include "api/batch_to_space.hpp" +#include "cldnn/primitives/batch_to_space.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/broadcast.cpp b/inference-engine/src/cldnn_engine/ops/broadcast.cpp index af0ed9b7b75..6d6e6303200 100644 --- a/inference-engine/src/cldnn_engine/ops/broadcast.cpp +++ b/inference-engine/src/cldnn_engine/ops/broadcast.cpp @@ -8,9 +8,9 @@ #include "ngraph/op/broadcast.hpp" #include "ngraph/op/constant.hpp" -#include "api/broadcast.hpp" -#include "api/reorder.hpp" -#include "api/reshape.hpp" +#include "cldnn/primitives/broadcast.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/reshape.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/concat.cpp b/inference-engine/src/cldnn_engine/ops/concat.cpp index 5a300c3dc8f..9d37f959f03 100644 --- a/inference-engine/src/cldnn_engine/ops/concat.cpp +++ b/inference-engine/src/cldnn_engine/ops/concat.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/concat.hpp" -#include "api/concatenation.hpp" +#include "cldnn/primitives/concatenation.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/constant.cpp b/inference-engine/src/cldnn_engine/ops/constant.cpp index e8630f67a6b..fea42f31d98 100644 --- a/inference-engine/src/cldnn_engine/ops/constant.cpp +++ b/inference-engine/src/cldnn_engine/ops/constant.cpp @@ -17,7 +17,7 @@ #include "ngraph/op/variadic_split.hpp" #include "ngraph/op/util/op_types.hpp" -#include "api/data.hpp" +#include "cldnn/primitives/data.hpp" namespace CLDNNPlugin { @@ -169,9 +169,10 @@ void CreateConstantOp(Program& p, const std::shared_ptrsecond; } else { - auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false); - auto tmpPointer = mem.pointer(); // implicitly maps buffer - unmap in destructor - auto buf = tmpPointer.data(); + cldnn::memory::ptr mem = p.GetEngine().allocate_memory(constLayout, false); + auto& stream = p.GetEngine().get_program_stream(); + cldnn::mem_lock lock{mem, stream}; + auto buf = lock.data(); auto bufSize = constLayout.bytes_count(); // Do actual weights reorder and change O and I channels order diff --git a/inference-engine/src/cldnn_engine/ops/convert.cpp b/inference-engine/src/cldnn_engine/ops/convert.cpp index 29fb037258f..6af5bee759d 100644 --- a/inference-engine/src/cldnn_engine/ops/convert.cpp +++ b/inference-engine/src/cldnn_engine/ops/convert.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/convert.hpp" #include "ngraph/op/convert_like.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/reorder.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/convolution.cpp b/inference-engine/src/cldnn_engine/ops/convolution.cpp index 3207dab623a..83f536a68b7 100644 --- a/inference-engine/src/cldnn_engine/ops/convolution.cpp +++ b/inference-engine/src/cldnn_engine/ops/convolution.cpp @@ -13,11 +13,11 @@ #include "ngraph/op/fake_quantize.hpp" #include "ngraph/op/util/op_types.hpp" -#include "api/convolution.hpp" -#include "api/deconvolution.hpp" -#include "api/binary_convolution.hpp" -#include "api/permute.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/convolution.hpp" +#include "cldnn/primitives/deconvolution.hpp" +#include "cldnn/primitives/binary_convolution.hpp" +#include "cldnn/primitives/permute.hpp" +#include "cldnn/primitives/reorder.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp b/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp index 28873607245..c8bd8d54e07 100644 --- a/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp +++ b/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp @@ -8,9 +8,9 @@ #include "ngraph/op/ctc_greedy_decoder.hpp" #include "ngraph/op/ctc_greedy_decoder_seq_len.hpp" -#include "api/ctc_greedy_decoder.hpp" -#include "api/reorder.hpp" -#include "api/mutable_data.hpp" +#include "cldnn/primitives/ctc_greedy_decoder.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/mutable_data.hpp" #include "transformations/utils/utils.hpp" @@ -58,7 +58,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_size(); - std::vector shared_memory; + std::vector shared_memory; if (num_output == 2) { auto mutable_precision = op->get_output_element_type(1); if (mutable_precision == ngraph::element::i64) { @@ -70,7 +70,7 @@ void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_shape(1).size()), CldnnTensorFromIEDims(op->get_output_shape(1))); - shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayout)); + shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayout)); cldnn::primitive_id ctc_gd_mutable_id_w = layer_type_name_ID(op) + "_md_write"; auto ctc_gd_mutable_prim = cldnn::mutable_data(ctc_gd_mutable_id_w, shared_memory[0]); diff --git a/inference-engine/src/cldnn_engine/ops/cum_sum.cpp b/inference-engine/src/cldnn_engine/ops/cum_sum.cpp index 6252a258269..1bdcec2957e 100644 --- a/inference-engine/src/cldnn_engine/ops/cum_sum.cpp +++ b/inference-engine/src/cldnn_engine/ops/cum_sum.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/cum_sum.hpp" #include "ngraph/op/constant.hpp" -#include "api/cum_sum.hpp" +#include "cldnn/primitives/cum_sum.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/custom.cpp b/inference-engine/src/cldnn_engine/ops/custom.cpp index 5211fc94efd..85945bfbdb9 100644 --- a/inference-engine/src/cldnn_engine/ops/custom.cpp +++ b/inference-engine/src/cldnn_engine/ops/custom.cpp @@ -9,8 +9,8 @@ #include "ngraph/attribute_visitor.hpp" #include "ngraph/node.hpp" -#include "api/custom_gpu_primitive.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/custom_gpu_primitive.hpp" +#include "cldnn/primitives/reorder.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp b/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp index 4c407a126d5..b53262ab23d 100644 --- a/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp +++ b/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/depth_to_space.hpp" -#include "api/depth_to_space.hpp" +#include "cldnn/primitives/depth_to_space.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/detection_output.cpp b/inference-engine/src/cldnn_engine/ops/detection_output.cpp index 8bbe102bcfd..aa2b505f0e7 100644 --- a/inference-engine/src/cldnn_engine/ops/detection_output.cpp +++ b/inference-engine/src/cldnn_engine/ops/detection_output.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/detection_output.hpp" -#include "api/detection_output.hpp" +#include "cldnn/primitives/detection_output.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/eltwise.cpp b/inference-engine/src/cldnn_engine/ops/eltwise.cpp index 66dcca7b459..817512d5bf9 100644 --- a/inference-engine/src/cldnn_engine/ops/eltwise.cpp +++ b/inference-engine/src/cldnn_engine/ops/eltwise.cpp @@ -25,10 +25,10 @@ #include "ngraph/op/power.hpp" #include "ngraph/op/floor_mod.hpp" -#include "api/activation.hpp" -#include "api/eltwise.hpp" -#include "api/reorder.hpp" -#include "api/reshape.hpp" +#include "cldnn/primitives/activation.hpp" +#include "cldnn/primitives/eltwise.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/reshape.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp b/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp index 35b9f00096d..2e97a60aebf 100644 --- a/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp +++ b/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp @@ -9,8 +9,8 @@ #include "ngraph/op/embeddingbag_offsets_sum.hpp" #include "ngraph/op/embeddingbag_packedsum.hpp" -#include "api/embedding_bag.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/embedding_bag.hpp" +#include "cldnn/primitives/reorder.hpp" #include "transformations/utils/utils.hpp" diff --git a/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp b/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp index 42f7c4e8e99..23b5f014320 100644 --- a/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp +++ b/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/extractimagepatches.hpp" -#include "api/extract_image_patches.hpp" +#include "cldnn/primitives/extract_image_patches.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp b/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp index ca3d950694c..345a70f34bb 100644 --- a/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp +++ b/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/fake_quantize.hpp" -#include "api/quantize.hpp" +#include "cldnn/primitives/quantize.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/gather tree.cpp b/inference-engine/src/cldnn_engine/ops/gather tree.cpp index a6c806bcbe8..6b73131fd29 100644 --- a/inference-engine/src/cldnn_engine/ops/gather tree.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather tree.cpp @@ -7,8 +7,8 @@ #include "ngraph/op/gather_tree.hpp" -#include "api/gather_tree.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/gather_tree.hpp" +#include "cldnn/primitives/reorder.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/gather.cpp b/inference-engine/src/cldnn_engine/ops/gather.cpp index b80e26661e5..362854cc32a 100644 --- a/inference-engine/src/cldnn_engine/ops/gather.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather.cpp @@ -7,8 +7,8 @@ #include "ngraph/op/gather.hpp" -#include "api/gather.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/gather.hpp" +#include "cldnn/primitives/reorder.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/gather_nd.cpp b/inference-engine/src/cldnn_engine/ops/gather_nd.cpp index 6a1cd651329..cbdc5659bb3 100644 --- a/inference-engine/src/cldnn_engine/ops/gather_nd.cpp +++ b/inference-engine/src/cldnn_engine/ops/gather_nd.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/gather_nd.hpp" #include "ngraph/op/constant.hpp" -#include "api/gather_nd.hpp" +#include "cldnn/primitives/gather_nd.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/grn.cpp b/inference-engine/src/cldnn_engine/ops/grn.cpp index 3eb750f85a9..960dd034947 100644 --- a/inference-engine/src/cldnn_engine/ops/grn.cpp +++ b/inference-engine/src/cldnn_engine/ops/grn.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/grn.hpp" -#include "api/grn.hpp" +#include "cldnn/primitives/grn.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/interpolate.cpp b/inference-engine/src/cldnn_engine/ops/interpolate.cpp index f9241b8ca0f..b77999289c6 100644 --- a/inference-engine/src/cldnn_engine/ops/interpolate.cpp +++ b/inference-engine/src/cldnn_engine/ops/interpolate.cpp @@ -9,7 +9,7 @@ #include "ngraph/op/interpolate.hpp" #include "ngraph/op/constant.hpp" -#include "api/resample.hpp" +#include "cldnn/primitives/resample.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/lrn.cpp b/inference-engine/src/cldnn_engine/ops/lrn.cpp index f8e7c601645..c13c17daaeb 100644 --- a/inference-engine/src/cldnn_engine/ops/lrn.cpp +++ b/inference-engine/src/cldnn_engine/ops/lrn.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/lrn.hpp" #include "ngraph/op/constant.hpp" -#include "api/lrn.hpp" +#include "cldnn/primitives/lrn.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/matmul.cpp b/inference-engine/src/cldnn_engine/ops/matmul.cpp index 5fe6f143258..a8818c9e6f6 100644 --- a/inference-engine/src/cldnn_engine/ops/matmul.cpp +++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp @@ -9,11 +9,11 @@ #include "ngraph/op/constant.hpp" #include "ngraph/op/fake_quantize.hpp" -#include "api/gemm.hpp" -#include "api/fully_connected.hpp" -#include "api/reshape.hpp" -#include "api/reorder.hpp" -#include "api/permute.hpp" +#include "cldnn/primitives/gemm.hpp" +#include "cldnn/primitives/fully_connected.hpp" +#include "cldnn/primitives/reshape.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/permute.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/mvn.cpp b/inference-engine/src/cldnn_engine/ops/mvn.cpp index 79001b812e1..b9cb376a24e 100644 --- a/inference-engine/src/cldnn_engine/ops/mvn.cpp +++ b/inference-engine/src/cldnn_engine/ops/mvn.cpp @@ -8,7 +8,8 @@ #include "ngraph/op/mvn.hpp" #include "ngraph/op/constant.hpp" -#include "api/mvn.hpp" +#include "cldnn/primitives/mvn.hpp" + #include namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp b/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp index 8b124309467..a3d4834c51e 100644 --- a/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp +++ b/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp @@ -9,9 +9,9 @@ #include #include -#include "api/reorder.hpp" -#include "api/mutable_data.hpp" -#include "api/non_max_suppression.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/non_max_suppression.hpp" namespace CLDNNPlugin { @@ -62,7 +62,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptrget_output_size(); - std::vector shared_memory; + std::vector shared_memory; switch (num_output) { case 3: { auto mutable_precision_second = op->get_output_element_type(2); @@ -74,7 +74,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptrget_output_shape(2).size()), CldnnTensorFromIEDims(op->get_output_shape(2))); - shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutSecond)); + shared_memory.emplace_back(p.GetEngine().allocate_memory(mutableLayoutSecond)); cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second"; auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back()); @@ -91,7 +91,7 @@ void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr(op->get_output_element_type(0), ngraph::Shape{1}, std::vector{1.0}); cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1}); - auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false); - auto tmpPointer = mem.pointer(); // implicitly maps buffer - unmap in destructor + auto mem = p.GetEngine().allocate_memory(constLayout, false); + cldnn::mem_lock tmpPointer{mem, p.GetEngine().get_program_stream()}; auto buf = tmpPointer.data(); auto bufSize = scale->get_output_tensor(0).size(); diff --git a/inference-engine/src/cldnn_engine/ops/one_hot.cpp b/inference-engine/src/cldnn_engine/ops/one_hot.cpp index 1076bf595ef..3d792bda8ae 100644 --- a/inference-engine/src/cldnn_engine/ops/one_hot.cpp +++ b/inference-engine/src/cldnn_engine/ops/one_hot.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/one_hot.hpp" -#include "api/one_hot.hpp" +#include "cldnn/primitives/one_hot.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/pad.cpp b/inference-engine/src/cldnn_engine/ops/pad.cpp index a3503318a77..0d409414b58 100644 --- a/inference-engine/src/cldnn_engine/ops/pad.cpp +++ b/inference-engine/src/cldnn_engine/ops/pad.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/pad.hpp" -#include "api/border.hpp" +#include "cldnn/primitives/border.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/parameter.cpp b/inference-engine/src/cldnn_engine/ops/parameter.cpp index 7c61d8261d7..b68593dd0a5 100644 --- a/inference-engine/src/cldnn_engine/ops/parameter.cpp +++ b/inference-engine/src/cldnn_engine/ops/parameter.cpp @@ -7,10 +7,10 @@ #include "ngraph/op/parameter.hpp" -#include "api/input_layout.hpp" -#include "api/reorder.hpp" -#include "api/data.hpp" -#include "api/concatenation.hpp" +#include "cldnn/primitives/input_layout.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/concatenation.hpp" using namespace InferenceEngine; @@ -158,8 +158,8 @@ void CreateParameterOp(Program& p, const std::shared_ptrsecond; } else { - auto mem = cldnn::memory::allocate(p.GetEngine(), meanBlobLayout, 0, false); - auto tmpPointer = mem.pointer(); // implicitly maps buffer - unmap in destructor + auto mem = p.GetEngine().allocate_memory(meanBlobLayout, false); + cldnn::mem_lock tmpPointer{ mem, p.GetEngine().get_program_stream() }; auto buf = tmpPointer.data(); auto bufSize = meanBlobLayout.bytes_count(); diff --git a/inference-engine/src/cldnn_engine/ops/pooling.cpp b/inference-engine/src/cldnn_engine/ops/pooling.cpp index 16ca93a6879..f1bf6952292 100644 --- a/inference-engine/src/cldnn_engine/ops/pooling.cpp +++ b/inference-engine/src/cldnn_engine/ops/pooling.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/max_pool.hpp" #include "ngraph/op/avg_pool.hpp" -#include "api/pooling.hpp" +#include "cldnn/primitives/pooling.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/prior_box.cpp b/inference-engine/src/cldnn_engine/ops/prior_box.cpp index 07c6a4ca3ee..6cf0aaa6535 100644 --- a/inference-engine/src/cldnn_engine/ops/prior_box.cpp +++ b/inference-engine/src/cldnn_engine/ops/prior_box.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/prior_box.hpp" #include "ngraph/op/prior_box_clustered.hpp" -#include "api/prior_box.hpp" +#include "cldnn/primitives/prior_box.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/proposal.cpp b/inference-engine/src/cldnn_engine/ops/proposal.cpp index 70ff3d154f1..d5b906e5e6e 100644 --- a/inference-engine/src/cldnn_engine/ops/proposal.cpp +++ b/inference-engine/src/cldnn_engine/ops/proposal.cpp @@ -7,8 +7,8 @@ #include "ngraph/op/proposal.hpp" -#include "api/proposal.hpp" -#include "api/mutable_data.hpp" +#include "cldnn/primitives/proposal.hpp" +#include "cldnn/primitives/mutable_data.hpp" namespace CLDNNPlugin { @@ -62,7 +62,7 @@ void CreateProposalOp(Program& p, const std::shared_ptrget_output_shape(1).size()), CldnnTensorFromIEDims(op->get_output_shape(1))); - auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout); + auto shared_memory = p.GetEngine().allocate_memory(mutableLayout); cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write"; auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory); diff --git a/inference-engine/src/cldnn_engine/ops/reduce.cpp b/inference-engine/src/cldnn_engine/ops/reduce.cpp index 26343ffb813..b336a2e78fa 100644 --- a/inference-engine/src/cldnn_engine/ops/reduce.cpp +++ b/inference-engine/src/cldnn_engine/ops/reduce.cpp @@ -16,9 +16,9 @@ #include "ngraph/op/max.hpp" #include "ngraph/op/constant.hpp" -#include "api/reduce.hpp" -#include "api/reorder.hpp" -#include "api/reshape.hpp" +#include "cldnn/primitives/reduce.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/reshape.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/region_yolo.cpp b/inference-engine/src/cldnn_engine/ops/region_yolo.cpp index 6e5fd660e3f..348dd0f7eeb 100644 --- a/inference-engine/src/cldnn_engine/ops/region_yolo.cpp +++ b/inference-engine/src/cldnn_engine/ops/region_yolo.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/region_yolo.hpp" -#include "api/region_yolo.hpp" +#include "cldnn/primitives/region_yolo.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp b/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp index 7ddc45221a6..4a7f54cf810 100644 --- a/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp +++ b/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/reorg_yolo.hpp" -#include "api/reorg_yolo.hpp" +#include "cldnn/primitives/reorg_yolo.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/reshape.cpp b/inference-engine/src/cldnn_engine/ops/reshape.cpp index b2111e77f64..f0084bb6a1c 100644 --- a/inference-engine/src/cldnn_engine/ops/reshape.cpp +++ b/inference-engine/src/cldnn_engine/ops/reshape.cpp @@ -9,8 +9,8 @@ #include "ngraph/op/squeeze.hpp" #include "ngraph/op/unsqueeze.hpp" -#include "api/reshape.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/reshape.hpp" +#include "cldnn/primitives/reorder.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/result.cpp b/inference-engine/src/cldnn_engine/ops/result.cpp index 4974f1ec56c..fe0d0f05658 100644 --- a/inference-engine/src/cldnn_engine/ops/result.cpp +++ b/inference-engine/src/cldnn_engine/ops/result.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/result.hpp" -#include "api/reorder.hpp" +#include "cldnn/primitives/reorder.hpp" using namespace InferenceEngine; diff --git a/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp b/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp index 4537d3d34b7..766bbc89a31 100644 --- a/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp +++ b/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/reverse_sequence.hpp" -#include "api/reverse_sequence.hpp" +#include "cldnn/primitives/reverse_sequence.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/rnn.cpp b/inference-engine/src/cldnn_engine/ops/rnn.cpp index a1bab946db4..2d4705f1a91 100644 --- a/inference-engine/src/cldnn_engine/ops/rnn.cpp +++ b/inference-engine/src/cldnn_engine/ops/rnn.cpp @@ -8,12 +8,12 @@ #include "ngraph/op/lstm_cell.hpp" #include "ngraph/op/lstm_sequence.hpp" -#include "api/reshape.hpp" -#include "api/reorder.hpp" -#include "api/fully_connected.hpp" -#include "api/lstm.hpp" -#include "api/crop.hpp" -#include "api/concatenation.hpp" +#include "cldnn/primitives/reshape.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/fully_connected.hpp" +#include "cldnn/primitives/lstm.hpp" +#include "cldnn/primitives/crop.hpp" +#include "cldnn/primitives/concatenation.hpp" namespace CLDNNPlugin { cldnn::activation_func GetActivationFunc(std::string name) { diff --git a/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp b/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp index 8dc57e3a5c6..f2087756405 100644 --- a/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp +++ b/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp @@ -9,7 +9,7 @@ #include "ngraph/op/psroi_pooling.hpp" #include "ngraph/op/deformable_psroi_pooling.hpp" -#include "api/roi_pooling.hpp" +#include "cldnn/primitives/roi_pooling.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp b/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp index 98c22e2a199..19f63dbf3a3 100644 --- a/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp +++ b/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/scatter_elements_update.hpp" #include "ngraph/op/constant.hpp" -#include "api/scatter_elements_update.hpp" +#include "cldnn/primitives/scatter_elements_update.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/scatter_nd_update.cpp b/inference-engine/src/cldnn_engine/ops/scatter_nd_update.cpp index 1b422736575..cf5d059772a 100644 --- a/inference-engine/src/cldnn_engine/ops/scatter_nd_update.cpp +++ b/inference-engine/src/cldnn_engine/ops/scatter_nd_update.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/scatter_nd_update.hpp" #include "ngraph/op/constant.hpp" -#include "api/scatter_nd_update.hpp" +#include "cldnn/primitives/scatter_nd_update.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/scatter_update.cpp b/inference-engine/src/cldnn_engine/ops/scatter_update.cpp index 85388032de8..c6fb4e9c0aa 100644 --- a/inference-engine/src/cldnn_engine/ops/scatter_update.cpp +++ b/inference-engine/src/cldnn_engine/ops/scatter_update.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/scatter_update.hpp" #include "ngraph/op/constant.hpp" -#include "api/scatter_update.hpp" +#include "cldnn/primitives/scatter_update.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/select.cpp b/inference-engine/src/cldnn_engine/ops/select.cpp index 0d3ae8e182c..6b3866ac5db 100644 --- a/inference-engine/src/cldnn_engine/ops/select.cpp +++ b/inference-engine/src/cldnn_engine/ops/select.cpp @@ -7,9 +7,9 @@ #include "ngraph/op/select.hpp" -#include "api/select.hpp" -#include "api/reorder.hpp" -#include "api/reshape.hpp" +#include "cldnn/primitives/select.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/reshape.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp b/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp index 0c0cafd03ef..f3066ace26a 100644 --- a/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp +++ b/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/shuffle_channels.hpp" -#include "api/shuffle_channels.hpp" +#include "cldnn/primitives/shuffle_channels.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/softmax.cpp b/inference-engine/src/cldnn_engine/ops/softmax.cpp index 566aca800ea..cbaffc04acc 100644 --- a/inference-engine/src/cldnn_engine/ops/softmax.cpp +++ b/inference-engine/src/cldnn_engine/ops/softmax.cpp @@ -8,8 +8,8 @@ #include "ngraph/op/softmax.hpp" #include "ngraph/op/log_softmax.hpp" -#include "api/softmax.hpp" -#include "api/activation.hpp" +#include "cldnn/primitives/softmax.hpp" +#include "cldnn/primitives/activation.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp b/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp index 7846375dff7..fa57d178038 100644 --- a/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp +++ b/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/space_to_batch.hpp" #include "ngraph/op/constant.hpp" -#include "api/space_to_batch.hpp" +#include "cldnn/primitives/space_to_batch.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp b/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp index a8d73eea79d..df4a25b4698 100644 --- a/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp +++ b/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/space_to_depth.hpp" -#include "api/space_to_depth.hpp" +#include "cldnn/primitives/space_to_depth.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/split.cpp b/inference-engine/src/cldnn_engine/ops/split.cpp index 6706ccebd8b..33b48c6d17f 100644 --- a/inference-engine/src/cldnn_engine/ops/split.cpp +++ b/inference-engine/src/cldnn_engine/ops/split.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/split.hpp" #include "ngraph/op/variadic_split.hpp" -#include "api/crop.hpp" +#include "cldnn/primitives/crop.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/strided_slice.cpp b/inference-engine/src/cldnn_engine/ops/strided_slice.cpp index 0bf5adb74e2..8c796290a74 100644 --- a/inference-engine/src/cldnn_engine/ops/strided_slice.cpp +++ b/inference-engine/src/cldnn_engine/ops/strided_slice.cpp @@ -8,9 +8,9 @@ #include "ngraph/op/strided_slice.hpp" #include "ngraph/op/constant.hpp" -#include "api/strided_slice.hpp" -#include "api/reshape.hpp" -#include "api/crop.hpp" +#include "cldnn/primitives/strided_slice.hpp" +#include "cldnn/primitives/reshape.hpp" +#include "cldnn/primitives/crop.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/tensor_iterator.cpp b/inference-engine/src/cldnn_engine/ops/tensor_iterator.cpp index e4e78208c02..d9843481979 100644 --- a/inference-engine/src/cldnn_engine/ops/tensor_iterator.cpp +++ b/inference-engine/src/cldnn_engine/ops/tensor_iterator.cpp @@ -13,11 +13,11 @@ #include "ngraph/op/constant.hpp" #include "ngraph/op/util/sub_graph_base.hpp" -#include "api/loop.hpp" -#include "api/mutable_data.hpp" -#include "api/data.hpp" -#include "api/reorder.hpp" -#include "api/topology.hpp" +#include "cldnn/primitives/loop.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/graph/topology.hpp" #include #include @@ -28,9 +28,8 @@ namespace CLDNNPlugin { template static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { - auto mem = cldnn::memory::allocate(p.GetEngine(), - { cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); - auto ptr = mem.pointer(); + auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); + cldnn::mem_lock ptr{mem, p.GetEngine().get_program_stream()}; *ptr.begin() = num; return {id, mem}; } @@ -42,7 +41,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size()); const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); - auto mem = cldnn::memory::allocate(p.GetEngine(), output_layout); + auto mem = p.GetEngine().allocate_memory(output_layout); auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency return md; } diff --git a/inference-engine/src/cldnn_engine/ops/tile.cpp b/inference-engine/src/cldnn_engine/ops/tile.cpp index a4856ad80dc..aa91fbd3d5a 100644 --- a/inference-engine/src/cldnn_engine/ops/tile.cpp +++ b/inference-engine/src/cldnn_engine/ops/tile.cpp @@ -7,7 +7,7 @@ #include "ngraph/op/tile.hpp" -#include "api/tile.hpp" +#include "cldnn/primitives/tile.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/topk.cpp b/inference-engine/src/cldnn_engine/ops/topk.cpp index d527aee1d61..3d8f7e6521a 100644 --- a/inference-engine/src/cldnn_engine/ops/topk.cpp +++ b/inference-engine/src/cldnn_engine/ops/topk.cpp @@ -7,8 +7,8 @@ #include "ngraph/op/topk.hpp" -#include "api/arg_max_min.hpp" -#include "api/mutable_data.hpp" +#include "cldnn/primitives/arg_max_min.hpp" +#include "cldnn/primitives/mutable_data.hpp" namespace CLDNNPlugin { @@ -71,7 +71,7 @@ void CreateTopKOp(Program& p, const std::shared_ptr& op) { DefaultFormatForDims(op->get_output_shape(1).size()), CldnnTensorFromIEDims(op->get_output_shape(1))); - auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout); + auto shared_memory = p.GetEngine().allocate_memory(mutableLayout); cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write"; auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory); diff --git a/inference-engine/src/cldnn_engine/ops/transpose.cpp b/inference-engine/src/cldnn_engine/ops/transpose.cpp index a1c8ce63cae..f5de62923a2 100644 --- a/inference-engine/src/cldnn_engine/ops/transpose.cpp +++ b/inference-engine/src/cldnn_engine/ops/transpose.cpp @@ -8,7 +8,7 @@ #include "ngraph/op/transpose.hpp" #include "ngraph/op/constant.hpp" -#include "api/permute.hpp" +#include "cldnn/primitives/permute.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/src/cldnn_engine/ops/unary.cpp b/inference-engine/src/cldnn_engine/ops/unary.cpp index 334d68c5f23..9a277a8be2a 100644 --- a/inference-engine/src/cldnn_engine/ops/unary.cpp +++ b/inference-engine/src/cldnn_engine/ops/unary.cpp @@ -41,7 +41,7 @@ #include "ngraph/op/hsigmoid.hpp" #include "ngraph/op/round.hpp" -#include "api/activation.hpp" +#include "cldnn/primitives/activation.hpp" namespace CLDNNPlugin { diff --git a/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt b/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt index d96dd234c1f..f175b4c1a1f 100644 --- a/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt +++ b/inference-engine/tests/functional/plugin/gpu/CMakeLists.txt @@ -17,7 +17,7 @@ addIeTargetTest( AutoPlugin LINK_LIBRARIES funcSharedTests - ${CLDNN__OCL_ICD_LIBPATH} + ${OpenCL_LIBRARY} ADD_CPPLINT LABELS GPU diff --git a/inference-engine/thirdparty/clDNN/CMakeLists.txt b/inference-engine/thirdparty/clDNN/CMakeLists.txt index d14da47860c..41833b58dc5 100644 --- a/inference-engine/thirdparty/clDNN/CMakeLists.txt +++ b/inference-engine/thirdparty/clDNN/CMakeLists.txt @@ -83,6 +83,9 @@ set(CLDNN__API_EXTENSION_DIR "${CMAKE_CURRENT_SOURCE_DIR}/api_extension") # Path which points to directory with interface for framework. set(CLDNN__KERNEL_SELECTOR_DIR "${CMAKE_CURRENT_SOURCE_DIR}/kernel_selector") +# Path which points to directory with runtime +set(CLDNN__RUNTIME_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime") + # Path which points to directory with binaries for Khronos OpenCL ICD Loader (Installable Client Driver). set(CLDNN__OCL_ICD_DIR "${CLDNN__COMMON_DIR}/khronos_ocl_icd") @@ -270,12 +273,6 @@ mark_as_advanced(CLDNN__INCLUDE_TESTS) # ====================================================================================================== -# Include and build: Core Internal Tests (unit tests and small acceptance tests) for core internal clDNN framework mechanisms. -set(CLDNN__INCLUDE_CORE_INTERNAL_TESTS ON CACHE BOOL "Include and build: clDNN framework's core internal tests.") -mark_as_advanced(CLDNN__INCLUDE_CORE_INTERNAL_TESTS) - -# ====================================================================================================== - # Include and build: clDNN tutorial. set(CLDNN__INCLUDE_TUTORIAL ON CACHE BOOL "Include and build: clDNN Tutorial.") @@ -289,12 +286,6 @@ mark_as_advanced(CLDNN__RUN_TESTS) # ====================================================================================================== -# Run (requires CLDNN__INCLUDE_CORE_INTERNAL_TESTS to be true): Tests (unit tests and small acceptance core internal tests) for clDNN framework. -set(CLDNN__RUN_CORE_INTERNAL_TESTS OFF CACHE BOOL "Run: clDNN framework's core internal tests.") -mark_as_advanced(CLDNN__RUN_CORE_INTERNAL_TESTS) - -# ====================================================================================================== - # Compile / Link: Use static C++ Runtime library. set(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME OFF CACHE BOOL "Compile / Link: Use static version of C++ Runtime library instead of shared one.") mark_as_advanced(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME) @@ -330,14 +321,6 @@ endif() # ====================================================================================================== -# Checking whether tests can be run. -if((NOT CLDNN__INCLUDE_CORE_INTERNAL_TESTS) AND CLDNN__RUN_CORE_INTERNAL_TESTS) - message(WARNING "[clDNN] CLDNN__INCLUDE_CORE_INTERNAL_TESTS: Selected running of core internal tests, but test are not built. Option will be disabled.") - set(CLDNN__RUN_CORE_INTERNAL_TESTS OFF) -endif() - -# ====================================================================================================== - # Check for python 3 interpreter (required tool). find_package(PythonInterp 3 QUIET) if(NOT PYTHONINTERP_FOUND) @@ -375,19 +358,26 @@ set(CLDNN__OCL_ICD_ROOT "${CLDNN__OCL_ICD_DIR}" CACHE INTERNAL "Path to Khronos set(CLDNN__OCL_ICD_INCDIRS "${CLDNN__KHR_CLHPP_DIR}" CACHE INTERNAL "Paths to interface headers for OpenCL.") set(CLDNN__OCL_ICD_STLDIRS "${CLDNN__OCL_ICD_ROOT}/${__CLDNN_TargetOs}/${CLDNN__TARGET_CFG_VAR}/lib/${__CLDNN_TargetCpuDir}" CACHE INTERNAL "Paths to static libraries for OpenCL ICD Loader.") set(CLDNN__OCL_ICD_SHLDIRS "${CLDNN__OCL_ICD_ROOT}/${__CLDNN_TargetOs}/${CLDNN__TARGET_CFG_VAR}/bin/${__CLDNN_TargetCpuDir}" CACHE INTERNAL "Paths to shared libraries for OpenCL ICD Loader.") +include_directories(SYSTEM "${CLDNN__KHR_CLHPP_DIR}") # Select link directory based on targeted OS. # - on Windows: static libraries directory. # - on others: shared libraries directory. if(__CLDNN_TargetOs MATCHES "^windows$") - set(CLDNN__OCL_ICD_LIBDIRS ${CLDNN__OCL_ICD_STLDIRS} CACHE INTERNAL "Paths to libraries to link for OpenCL ICD Loader.") - set(CLDNN__OCL_ICD_LIBPATH ${CLDNN__OCL_ICD_LIBDIRS}/${CMAKE_STATIC_LIBRARY_PREFIX}OpenCL${CMAKE_STATIC_LIBRARY_SUFFIX} CACHE INTERNAL "") +set(CLDNN__OCL_ICD_LIBDIRS ${CLDNN__OCL_ICD_STLDIRS} CACHE INTERNAL "Paths to libraries to link for OpenCL ICD Loader.") +set(CLDNN__OCL_ICD_LIBPATH ${CLDNN__OCL_ICD_LIBDIRS}/${CMAKE_STATIC_LIBRARY_PREFIX}OpenCL${CMAKE_STATIC_LIBRARY_SUFFIX} CACHE INTERNAL "") else() - set(CLDNN__OCL_ICD_LIBDIRS ${CLDNN__OCL_ICD_SHLDIRS} CACHE INTERNAL "Paths to libraries to link for OpenCL ICD Loader.") - set(CLDNN__OCL_ICD_LIBPATH ${CLDNN__OCL_ICD_LIBDIRS}/${CMAKE_SHARED_LIBRARY_PREFIX}OpenCL${CMAKE_SHARED_LIBRARY_SUFFIX} CACHE INTERNAL "") +set(CLDNN__OCL_ICD_LIBDIRS ${CLDNN__OCL_ICD_SHLDIRS} CACHE INTERNAL "Paths to libraries to link for OpenCL ICD Loader.") +set(CLDNN__OCL_ICD_LIBPATH ${CLDNN__OCL_ICD_LIBDIRS}/${CMAKE_SHARED_LIBRARY_PREFIX}OpenCL${CMAKE_SHARED_LIBRARY_SUFFIX} CACHE INTERNAL "") endif() - +set(OpenCL_LIBRARY "${CLDNN__OCL_ICD_LIBPATH}" CACHE INTERNAL "Paths to shared libraries for OpenCL ICD Loader.") +set(OpenCL_INCLUDE_DIR "${CLDNN__OCL_ICD_INCDIRS}" CACHE INTERNAL "Paths to interface headers for OpenCL.") +add_library(clDNN_OpenCL UNKNOWN IMPORTED) +set_target_properties(clDNN_OpenCL + PROPERTIES + IMPORTED_LOCATION ${CLDNN__OCL_ICD_LIBPATH} + ) # ====================================================================================================== set(CLDNN_UTILS__RAPIDJSON_INCDIRS "utils/rapidjson" CACHE INTERNAL "Paths to interface headers for rapidjson.") # ====================================== Version Calculation =========================================== @@ -414,11 +404,9 @@ message(STATUS "[clDNN]") message(STATUS "[clDNN] - Include/Build cldnn core: ${CLDNN__INCLUDE_CORE}") message(STATUS "[clDNN] - Include/Build kernel selector: ${CLDNN__INCLUDE_KERNEL_SELECTOR}") message(STATUS "[clDNN] - Include/Build tests: ${CLDNN__INCLUDE_TESTS}") -message(STATUS "[clDNN] - Include/Build core internal tests: ${CLDNN__INCLUDE_CORE_INTERNAL_TESTS}") message(STATUS "[clDNN] - Include/Build tutorial: ${CLDNN__INCLUDE_TUTORIAL}") message(STATUS "[clDNN]") message(STATUS "[clDNN] - Run tests: ${CLDNN__RUN_TESTS}") -message(STATUS "[clDNN] - Run core internal tests: ${CLDNN__RUN_CORE_INTERNAL_TESTS}") message(STATUS "[clDNN]") message(STATUS "[clDNN] - Use static C++ Runtime: ${CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME}") message(STATUS "[clDNN] - Allow unsafe size opts: ${CLDNN__COMPILE_LINK_ALLOW_UNSAFE_SIZE_OPT}") @@ -781,30 +769,18 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS include_directories( ${CLDNN_UTILS__RAPIDJSON_INCDIRS} "${CLDNN__CODEGEN_INCDIR}" + "${CLDNN__API_DIR}" ) -include_directories(SYSTEM - "${CLDNN__KHR_CLHPP_DIR}" - ) - -add_library(clDNN_OpenCL UNKNOWN IMPORTED) -set_target_properties(clDNN_OpenCL - PROPERTIES - IMPORTED_LOCATION ${CLDNN__OCL_ICD_LIBPATH} - ) - - # =================================== Link targets and dependencies ==================================== if(CLDNN__INCLUDE_CORE) add_subdirectory(src) + add_subdirectory(runtime) add_subdirectory(api_test_builds) endif() if(CLDNN__INCLUDE_TESTS) add_subdirectory(tests) endif() -if(CLDNN__INCLUDE_CORE_INTERNAL_TESTS) - add_subdirectory(tests_core_internal) -endif() if(CLDNN__INCLUDE_KERNEL_SELECTOR) add_subdirectory(kernel_selector) endif() diff --git a/inference-engine/thirdparty/clDNN/api/cldnn.hpp b/inference-engine/thirdparty/clDNN/api/cldnn.hpp deleted file mode 100644 index 4b531ab050b..00000000000 --- a/inference-engine/thirdparty/clDNN/api/cldnn.hpp +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/*! @mainpage clDNN Documentation -* @section intro Introduction -* Compute Library for Deep Neural Networks (clDNN) is a middle-ware software -* for accelerating DNN inference on Intel® HD and Iris™ Pro Graphics. -* This project includes CNN primitives implementations on Intel GPUs with C and C++ interfaces. -* -* clDNN Library implements set of primitives: -* - Convolution -* - Fully connected (inner product) -* - Pooling -* * average -* * maximum -* - Normalization -* * across channel -* * within channel -* * batch -* - Activation -* * logistic -* * tanh -* * rectified linear unit (ReLU) -* * softplus (softReLU) -* * abs -* * square -* * sqrt -* * linear -* - Softmax -* - Crop -* - Deconvolution -* - Depth concatenation -* - Eltwise -* - ROI pooling -* - Simpler NMS -* - Prior box -* - Detection output -* -* With this primitive set, user can build and execute most common image recognition, semantic segmentation and object detection networks topologies like: -* - Alexnet -* - Googlenet(v1-v3) -* - ResNet -* - VGG -* - faster-rCNN -* and other. -* -* -* @section model Programming Model -* Intel® clDNN is graph oriented library. To execute CNN you have to build, compile graph/topology and run to get results. -* -* Terminology: -* - Primitive - dnn base functionality i.e. convolution, pooling, softmax. -* - Data - special primitive type representing primitive parameters (weights and biases), inputs and outputs -* - Engine - type of accelerator that is executing network. Currently ocl engine is the only available. -* - Topology - container of primitives, data, and relations between them. Topology represents graph. -* - Program - optional step between Topology and Network. It is compiled Topology without memory allocation. -* - Network - compiled Topology with memory allocation. Ready to be executed. During compilation, buidling parameters trigger special optimizations like fusing, data reordering. -* -* Execution Steps: -* -* \image html workflow.jpg -* -# Create Engine -* -# Declare or define primitives parameters (weights and biases) if needed. -* -# Create primitives. It is required to provide name for each primitive. This is a name of primitive which output will be input to current one. Name can be used before primitive definition. -* -# Create topology -* -# Add primitives to topology -* -# Build Network from topology -* -# Set Inputs data -* -# Execute Network -* -* -* @section graph_compilation Graph compilation -* -* If user choose build option optimize_data when program is being created - explicit or implicit over network creation, clDNN perform some graph optimizations as follows: -* * Stage 0: Graph initiation: -* * build nodes from primitives -* * node replacement: -* * replace each split node with series of crop nodes. Name of crop primitive will be concatenation of split + port names. -* * replace upsampling node with deconvolution node if upsampling mode is bilinear. -* * set outputs - mark nodes that are defined by user as output (blocks fusing etc) or have no users (leafs). -* * calculate processing order - using dfs on graph to establish processing order -* * Stage 1: Priorboxes: -* * priorbox is primitive that is executed during network compilation. Node is removed from a network execution. -* * Stage 2: Graph analysis: -* * mark constatns -* * mark data flow -* * Stage 3: Trimming: -* * apply backward bfs on each output to find unnecessary nodes/branches, then remove those. -* * Stage 4: Inputs and biases: -* * reorder input - format of convolution's input/output is being selected. -* * reorder biases for conv,fc and deconv nodes -* * Stage 5: Redundant reorders: -* * previous stages can provide additional reorders due to format changes per primitive. This stage removes redundant and fuses series of reorders into one. -* * Stage 6: Constant propagation: -* * prepare padding - goes thrugh all primitves and checks if its user requires padding, if so, set output padding. -* * prepare depthwise separable opt - if split param is greater than 16 and number of IFM <= 8*split in conv or deconv, this stage changes execution from multi kernels into one. -* * constant propagation - replace constant nodes, that are not outputs with data type nodes. Constant primitive is the primitive that doesn't depend on any non-constant primitive and doesn't have to be executed: priorbox, data. -* * Stage 7: Fusing: -* * buffer fusing -* * concat - if concatenation is the only user of its dependencies then remove concat node and setting proper output paddings in every dependencies. -* * crop - if crop has only one dependecy, and its users doesn't require padding, remove crop and set proper output padding in its dependecy. -* * reorder - if primitive before reorder supports different input vs output type reorder can be fused with previous node. -* * primitive fusing - right now this stage fuses activation node with previous node only, only if previous node supports activation fusing. -* * Stage 8: Compile graph: -* * at this stage using kernel selector, graph chooses the best kernel implementation for each node. -* * Stage 9: reorder weights: -* * at this stage weights are converted into format suitable for selected kernel implementation. -* * Stage 10 & 11: Redundant reorders and constant propagation: -* * check again if whole graph compilation didn't provide any redundant reorders and constants. -* * Stage 12: Compile program: -* * at this stage engine compiles cl_kernels. -* -* @section example C++ API Example MNIST network -* @include example_cldnn.cpp -*/ - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace cldnn { - -/// @addtogroup cpp_api C++ API -/// @{ - -float half_to_float(uint16_t value); -uint16_t float_to_half(float value); - -// There is no portable half precision floating point support. -// Using wrapped integral type with the same size and alignment restrictions. -class half_impl { -public: - half_impl() = default; - - template ::value>::type> - explicit half_impl(T data, int /*direct_creation_tag*/) : _data(data) {} - - operator uint16_t() const { return _data; } - operator float() const { - return half_to_float(_data); - } - - explicit half_impl(float value) - : _data(float_to_half(value)) - {} - - template ::value>::type> - explicit half_impl(T value) - : half_impl(static_cast(value)) - {} - -private: - uint16_t _data; -}; - -// Use complete implementation if necessary. -#if defined HALF_HALF_HPP -using half_t = half; -#else -using half_t = half_impl; -#endif - -/// @cond CPP_HELPERS - -/// @defgroup cpp_helpers Helpers -/// @{ - -#define CLDNN_API_CLASS(the_class) static_assert(std::is_standard_layout::value, #the_class " has to be 'standard layout' class"); - -template -typename std::enable_if::value, T>::type align_to(T size, size_t align) { - return static_cast((size % align == 0) ? size : size - size % align + align); -} - -template -typename std::enable_if::value, T>::type pad_to(T size, size_t align) { - return static_cast((size % align == 0) ? 0 : align - size % align); -} - -template -typename std::enable_if::value, bool>::type is_aligned_to(T size, size_t align) { - return !(size % align); -} - -/// Computes ceil(@p val / @p divider) on unsigned integral numbers. -/// -/// Computes division of unsigned integral numbers and rounds result up to full number (ceiling). -/// The function works for unsigned integrals only. Signed integrals are converted to corresponding -/// unsigned ones. -/// -/// @tparam T1 Type of @p val. Type must be integral (SFINAE). -/// @tparam T2 Type of @p divider. Type must be integral (SFINAE). -/// -/// @param val Divided value. If value is signed, it will be converted to corresponding unsigned type. -/// @param divider Divider value. If value is signed, it will be converted to corresponding unsigned type. -/// -/// @return Result of ceil(@p val / @p divider). The type of result is determined as if in normal integral -/// division, except each operand is converted to unsigned type if necessary. -template -constexpr auto ceil_div(T1 val, T2 divider) --> typename std::enable_if::value && std::is_integral::value, - decltype(std::declval::type>() / std::declval::type>())>::type { - typedef typename std::make_unsigned::type UT1; - typedef typename std::make_unsigned::type UT2; - typedef decltype(std::declval() / std::declval()) RetT; - - return static_cast((static_cast(val) + static_cast(divider) - 1U) / static_cast(divider)); -} - -/// Rounds @p val to nearest multiply of @p rounding that is greater or equal to @p val. -/// -/// The function works for unsigned integrals only. Signed integrals are converted to corresponding -/// unsigned ones. -/// -/// @tparam T1 Type of @p val. Type must be integral (SFINAE). -/// @tparam T2 Type of @p rounding. Type must be integral (SFINAE). -/// -/// @param val Value to round up. If value is signed, it will be converted to corresponding unsigned type. -/// @param rounding Rounding value. If value is signed, it will be converted to corresponding unsigned type. -/// -/// @return @p val rounded up to nearest multiply of @p rounding. The type of result is determined as if in normal integral -/// division, except each operand is converted to unsigned type if necessary. -template -constexpr auto round_up_to(T1 val, T2 rounding) --> typename std::enable_if::value && std::is_integral::value, - decltype(std::declval::type>() / std::declval::type>())>::type { - typedef typename std::make_unsigned::type UT1; - typedef typename std::make_unsigned::type UT2; - typedef decltype(std::declval() / std::declval()) RetT; - - return static_cast(ceil_div(val, rounding) * static_cast(rounding)); -} - -/// @} -/// @endcond -/// @} -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/network.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp similarity index 79% rename from inference-engine/thirdparty/clDNN/api/network.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp index ee089e7e42f..a6829259d79 100644 --- a/inference-engine/thirdparty/clDNN/api/network.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/network.hpp @@ -3,12 +3,14 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// + #pragma once -#include "cldnn.hpp" -#include "compounds.h" -#include "memory.hpp" + +#include "cldnn/runtime/compounds.hpp" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/stream.hpp" #include "program.hpp" -#include "event.hpp" #include #include @@ -28,18 +30,25 @@ namespace cldnn { /// @brief Represents network output returned by @ref network::get_output(). struct network_output { /// @brief Returns @ref event associated with the output. - event get_event() const { return _event; } + event::ptr get_event() const { return _event; } /// @brief Returns @ref memory object of the output. Blocked until associated @ref event is not complete. - memory get_memory() const { - _event.wait(); + memory::ptr get_memory() const { + // TODO: in_order queue doesn't create proper output event in some cases which leads to syncronization issues with user app + // So call finish for associated stream to enusre that the output data is ready. + if (_stream.get_queue_type() == queue_types::in_order) { + _stream.finish(); + } else { + _event->wait(); + } return _result; } private: - event _event; - memory _result; - network_output(event evt, memory mem) : _event(evt), _result(mem) {} + event::ptr _event; + memory::ptr _result; + stream& _stream; + network_output(event::ptr evt, memory::ptr mem, stream& stream) : _event(evt), _result(mem), _stream(stream) {} friend struct network; }; @@ -58,52 +67,49 @@ struct network { /// @param topology /// @param options /// @param options - network(const engine& engine, + network(engine& engine, const topology& topology, const build_options& options = build_options(), uint16_t stream_id = 0) : network(program(engine, topology, options), stream_id) {} /// @brief Constructs network object from C API @ref cldnn_network. - explicit network(network_impl* impl) : _impl(impl) { + explicit network(std::shared_ptr impl) : _impl(impl) { if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null"); } /// @brief Copy construction. - network(const network& other) : _impl(other._impl) { retain(); } + network(const network& other) : _impl(other._impl) { } /// @brief Copy assignment. network& operator=(const network& other) { if (_impl == other._impl) return *this; - release(); _impl = other._impl; - retain(); return *this; } - /// @brief Releases wrapped C API @ref cldnn_network. - ~network() { release(); } - friend bool operator==(const network& lhs, const network& rhs) { return lhs._impl == rhs._impl; } friend bool operator!=(const network& lhs, const network& rhs) { return !(lhs == rhs); } /// @brief Returns @ref engine by which network was built. - engine get_engine() const; + engine& get_engine() const; /// @brief Returns network internal @ref program. program get_program() const; /// @brief Provides @ref memory for @ref input_layout primitives defined by user in source @ref topology. - void set_input_data(const primitive_id& id, const memory& mem) const; + void set_input_data(const primitive_id& id, memory::ptr mem) const; /// @brief Provides user-supplied @ref memory for output primitives defined by user in source @ref topology. - void set_output_memory(const primitive_id& id, const memory& mem) const; + void set_output_memory(const primitive_id& id, memory::ptr mem) const; /// @brief Return stream id. uint16_t get_stream_id(); + stream& get_stream() const; + /// @brief Return internal network id. uint32_t get_id(); @@ -131,18 +137,18 @@ struct network { std::vector get_output_ids() const; /// @brief Returns @ref memory object for particular @p output. Can be called before network execution - memory get_output_memory(const primitive_id& output_id) const; + memory::ptr get_output_memory(const primitive_id& output_id) const; /// @brief Returns @ref event object for particular @p primitive. Can't be called before network execution - event get_primitive_event(const primitive_id& output_id) const; + event::ptr get_primitive_event(const primitive_id& output_id) const; /// @brief Returns @ref network_output object for particular @p output. Can't be called before network execution network_output get_output(const primitive_id& output_id) const { - return network_output(get_primitive_event(output_id), get_output_memory(output_id)); + return network_output(get_primitive_event(output_id), get_output_memory(output_id), get_stream()); } /// @brief Returns the list of @ref event for the primitives that were executed in network. - std::map get_executed_primitives() const { + std::map get_executed_primitives() const { auto primitive_ids = get_executed_primitive_ids(); auto all_primitive_ids = get_all_primitive_ids(); auto all_primitive_org_ids = get_all_primitive_org_ids(); @@ -152,7 +158,7 @@ struct network { if (all_primitive_ids[i] == "_optimized_") optimized_primitives.push_back(all_primitive_org_ids[i]); } - std::map result; + std::map result; for (auto& id : primitive_ids) { if (std::find(optimized_primitives.begin(), optimized_primitives.end(), id) == optimized_primitives.end()) result.emplace(id, get_primitive_event(id)); @@ -177,16 +183,13 @@ struct network { /// @param dependencies List of @ref event objects to be waited before network execution. /// @note User should call set_input_data() for every @ref input_layout defined in source @ref topology /// before network execution. - std::map execute(const std::vector& dependencies = {}) const; + std::map execute(const std::vector& dependencies = {}) const; /// @brief Returns wrapped C API @ref cldnn_network handler. - network_impl* get() const { return _impl; } + network_impl* get() const { return _impl.get(); } private: - network_impl* _impl; - - void retain(); - void release(); + std::shared_ptr _impl; }; CLDNN_API_CLASS(network) /// @} diff --git a/inference-engine/thirdparty/clDNN/api/program.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp similarity index 97% rename from inference-engine/thirdparty/clDNN/api/program.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp index b50754e1fd0..95dfca9fc14 100644 --- a/inference-engine/thirdparty/clDNN/api/program.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp @@ -3,11 +3,12 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// + #pragma once -#include "cldnn.hpp" + +#include "cldnn/runtime/engine.hpp" + #include "topology.hpp" -#include "engine.hpp" -#include #include #include @@ -477,21 +478,19 @@ public: /// @param[in] engine The engine which will be used to build the program. /// @param[in] topology The user-defined topology on which the network will be based. /// @param[in] options Program build options. See @ref build_option and @ref build_options for details. - program(engine const& engine, topology const& topology, build_options const& options = build_options()); + program(engine& engine, const topology& topology, const build_options& options = build_options()); /// @brief Copy constructor. - program(program const& other) : _impl(other._impl) { retain(); } + program(const program& other) : _impl(other._impl) { } /// @brief Dereferences the counter of the underlying C API @ref cldnn_program handler. - ~program() { release(); } + ~program() { } /// @brief Assigns new value by releasing previously referenced C API @ref cldnn_program handler and retaining the one referenced by @p other. program& operator=(const program& other) { if (_impl == other._impl) return *this; - release(); _impl = other._impl; - retain(); return *this; } @@ -500,19 +499,15 @@ public: /// @brief Checks whether @p lhs and @p rhs reference different C API @ref cldnn_program handlers friend bool operator!=(const program& lhs, const program& rhs) { return !(lhs == rhs); } - /// @brief Returns wrapped C API @ref cldnn_program handler. - program_impl* get() const { return _impl; } + std::shared_ptr get() const { return _impl; } private: - program_impl* _impl; + std::shared_ptr _impl; - explicit program(program_impl* impl) : _impl(impl) { + explicit program(std::shared_ptr impl) : _impl(impl) { if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null"); } - - void retain(); - void release(); }; /// @} /// @} diff --git a/inference-engine/thirdparty/clDNN/api/topology.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp similarity index 84% rename from inference-engine/thirdparty/clDNN/api/topology.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp index 78e4fc875cf..4155c7ac529 100644 --- a/inference-engine/thirdparty/clDNN/api/topology.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/topology.hpp @@ -3,11 +3,13 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// + #pragma once + +#include "cldnn/runtime/compounds.hpp" +#include "cldnn/primitives/primitive.hpp" + #include -#include "cldnn.hpp" -#include "compounds.h" -#include "primitive.hpp" #include #include @@ -33,26 +35,24 @@ struct topology { } /// @brief Copy construction. - topology(const topology& other) : _impl(other._impl) { retain(); } + topology(const topology& other) : _impl(other._impl) { } /// @brief Copy assignment. topology& operator=(const topology& other) { if (_impl == other._impl) return *this; - release(); _impl = other._impl; - retain(); return *this; } /// Construct C++ topology based on C API @p cldnn_topology - explicit topology(topology_impl* other) : _impl(other) { + explicit topology(std::shared_ptr other) : _impl(other) { if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null"); } /// @brief Releases wrapped C API @ref cldnn_topology. - ~topology() { release(); } + ~topology() { } friend bool operator==(const topology& lhs, const topology& rhs) { return lhs._impl == rhs._impl; } friend bool operator!=(const topology& lhs, const topology& rhs) { return !(lhs == rhs); } @@ -73,7 +73,7 @@ struct topology { } /// @brief Returns wrapped implementation pointer. - topology_impl* get() const { return _impl; } + std::shared_ptr get() const { return _impl; } const std::vector get_primitive_ids() const; @@ -82,12 +82,9 @@ struct topology { const std::shared_ptr& at(const primitive_id& id) const; private: - friend struct engine; + friend class engine; friend struct network; - topology_impl* _impl; - - void retain(); - void release(); + std::shared_ptr _impl; }; CLDNN_API_CLASS(topology) diff --git a/inference-engine/thirdparty/clDNN/api/activation.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/activation.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/activation.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/activation.hpp diff --git a/inference-engine/thirdparty/clDNN/api/arg_max_min.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/arg_max_min.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/arg_max_min.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/arg_max_min.hpp diff --git a/inference-engine/thirdparty/clDNN/api/average_unpooling.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/average_unpooling.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/average_unpooling.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/average_unpooling.hpp diff --git a/inference-engine/thirdparty/clDNN/api/batch_to_space.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/batch_to_space.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/batch_to_space.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/batch_to_space.hpp diff --git a/inference-engine/thirdparty/clDNN/api/binary_convolution.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/binary_convolution.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/binary_convolution.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/binary_convolution.hpp diff --git a/inference-engine/thirdparty/clDNN/api/border.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/border.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/border.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/border.hpp diff --git a/inference-engine/thirdparty/clDNN/api/broadcast.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/broadcast.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/broadcast.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/broadcast.hpp diff --git a/inference-engine/thirdparty/clDNN/api/concatenation.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/concatenation.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/concatenation.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/concatenation.hpp diff --git a/inference-engine/thirdparty/clDNN/api/condition.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/condition.hpp similarity index 98% rename from inference-engine/thirdparty/clDNN/api/condition.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/condition.hpp index 1becbc1aff5..aa63586c4a3 100644 --- a/inference-engine/thirdparty/clDNN/api/condition.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/condition.hpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" -#include "topology.hpp" +#include "cldnn/graph/topology.hpp" #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api/convolution.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/convolution.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/convolution.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/convolution.hpp diff --git a/inference-engine/thirdparty/clDNN/api/crop.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/crop.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/crop.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/crop.hpp diff --git a/inference-engine/thirdparty/clDNN/api/ctc_greedy_decoder.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/ctc_greedy_decoder.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/ctc_greedy_decoder.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/ctc_greedy_decoder.hpp diff --git a/inference-engine/thirdparty/clDNN/api/cum_sum.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/cum_sum.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/cum_sum.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/cum_sum.hpp diff --git a/inference-engine/thirdparty/clDNN/api/custom_gpu_primitive.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/custom_gpu_primitive.hpp similarity index 98% rename from inference-engine/thirdparty/clDNN/api/custom_gpu_primitive.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/custom_gpu_primitive.hpp index 22b43fedd3c..25da079a5de 100644 --- a/inference-engine/thirdparty/clDNN/api/custom_gpu_primitive.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/custom_gpu_primitive.hpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" -#include "memory.hpp" +#include "cldnn/runtime/memory.hpp" #include #include diff --git a/inference-engine/thirdparty/clDNN/api/data.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/data.hpp similarity index 92% rename from inference-engine/thirdparty/clDNN/api/data.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/data.hpp index 68e43ee1abd..e7fe1f889fc 100644 --- a/inference-engine/thirdparty/clDNN/api/data.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/data.hpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" -#include "memory.hpp" +#include "cldnn/runtime/memory.hpp" namespace cldnn { /// @addtogroup cpp_api C++ API @@ -26,12 +26,12 @@ struct data : public primitive_base { /// @param id This primitive id. /// @param mem @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - data(const primitive_id& id, const memory& mem) + data(const primitive_id& id, memory::ptr mem) : primitive_base(id, {}, padding()), mem(mem) {} /// @brief @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - memory mem; + memory::ptr mem; }; /// @} /// @} diff --git a/inference-engine/thirdparty/clDNN/api/deconvolution.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/deconvolution.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/deconvolution.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/deconvolution.hpp diff --git a/inference-engine/thirdparty/clDNN/api/depth_to_space.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/depth_to_space.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/depth_to_space.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/depth_to_space.hpp diff --git a/inference-engine/thirdparty/clDNN/api/detection_output.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/detection_output.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/detection_output.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/detection_output.hpp diff --git a/inference-engine/thirdparty/clDNN/api/eltwise.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/eltwise.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/eltwise.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/eltwise.hpp diff --git a/inference-engine/thirdparty/clDNN/api/embedding_bag.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/embedding_bag.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/embedding_bag.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/embedding_bag.hpp diff --git a/inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/extract_image_patches.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/extract_image_patches.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/extract_image_patches.hpp diff --git a/inference-engine/thirdparty/clDNN/api/fully_connected.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/fully_connected.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/fully_connected.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/fully_connected.hpp diff --git a/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/fused_conv_eltwise.hpp similarity index 98% rename from inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/fused_conv_eltwise.hpp index ff1c274fb1c..6aed6cf6a75 100644 --- a/inference-engine/thirdparty/clDNN/api_extension/fused_conv_eltwise.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/fused_conv_eltwise.hpp @@ -4,8 +4,8 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/primitive.hpp" -#include "api/eltwise.hpp" +#include "cldnn/primitives/primitive.hpp" +#include "cldnn/primitives/eltwise.hpp" #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api/gather.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/gather.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather.hpp diff --git a/inference-engine/thirdparty/clDNN/api/gather_nd.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_nd.hpp similarity index 63% rename from inference-engine/thirdparty/clDNN/api/gather_nd.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_nd.hpp index ebf3953cf7f..885f7297510 100644 --- a/inference-engine/thirdparty/clDNN/api/gather_nd.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_nd.hpp @@ -1,20 +1,7 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" diff --git a/inference-engine/thirdparty/clDNN/api/gather_tree.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_tree.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/gather_tree.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/gather_tree.hpp diff --git a/inference-engine/thirdparty/clDNN/api/gemm.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/gemm.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/gemm.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/gemm.hpp diff --git a/inference-engine/thirdparty/clDNN/api/grn.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/grn.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/grn.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/grn.hpp diff --git a/inference-engine/thirdparty/clDNN/api/input_layout.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/input_layout.hpp similarity index 97% rename from inference-engine/thirdparty/clDNN/api/input_layout.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/input_layout.hpp index 61cc5544e7a..c6be1bf5b39 100644 --- a/inference-engine/thirdparty/clDNN/api/input_layout.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/input_layout.hpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" -#include "memory.hpp" +#include "cldnn/runtime/memory.hpp" namespace cldnn { /// @addtogroup cpp_api C++ API diff --git a/inference-engine/thirdparty/clDNN/api/loop.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/loop.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/api/loop.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/loop.hpp index 8268b728e12..63215f3ad98 100644 --- a/inference-engine/thirdparty/clDNN/api/loop.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/loop.hpp @@ -7,7 +7,7 @@ #include #include #include "primitive.hpp" -#include "topology.hpp" +#include "cldnn/graph/topology.hpp" #define DEFAULT_MAX_NUM_ITERATION 256 namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api/lrn.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lrn.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/lrn.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/lrn.hpp diff --git a/inference-engine/thirdparty/clDNN/api/lstm.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/lstm.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm.hpp diff --git a/inference-engine/thirdparty/clDNN/api/lstm_dynamic.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/lstm_dynamic.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic.hpp diff --git a/inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_input.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic_input.hpp similarity index 98% rename from inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_input.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic_input.hpp index 1a3d0fb7bff..3733a989fe6 100644 --- a/inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_input.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic_input.hpp @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/primitive.hpp" +#include "cldnn/primitives/primitive.hpp" #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_timeloop.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic_timeloop.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_timeloop.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic_timeloop.hpp index 5449c95b01b..c7b77fb1afe 100644 --- a/inference-engine/thirdparty/clDNN/api_extension/lstm_dynamic_timeloop.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/lstm_dynamic_timeloop.hpp @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/primitive.hpp" +#include "cldnn/primitives/primitive.hpp" #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api/max_unpooling.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/max_unpooling.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/max_unpooling.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/max_unpooling.hpp diff --git a/inference-engine/thirdparty/clDNN/api/mutable_data.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/mutable_data.hpp similarity index 92% rename from inference-engine/thirdparty/clDNN/api/mutable_data.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/mutable_data.hpp index 6a8acc0770a..a5aca1f4392 100644 --- a/inference-engine/thirdparty/clDNN/api/mutable_data.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/mutable_data.hpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" -#include "memory.hpp" +#include "cldnn/runtime/memory.hpp" #include namespace cldnn { @@ -31,7 +31,7 @@ struct mutable_data : public primitive_base { /// @param mem @ref memory object which contains data. /// @param filler_type @ref data filling function, default is zero /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - mutable_data(const primitive_id& id, const memory& mem, filler_type fill_type = filler_type::no_fill) + mutable_data(const primitive_id& id, memory::ptr mem, filler_type fill_type = filler_type::no_fill) : primitive_base(id, {}, padding()), mem(mem), fill_type(fill_type) {} /// @brief Constructs mutable_data primitive with inputs. @@ -42,13 +42,13 @@ struct mutable_data : public primitive_base { /// @param filler_type @ref data filling function, default is zero mutable_data(const primitive_id& id, const std::vector& input, - const memory& mem, + memory::ptr mem, filler_type fill_type = filler_type::no_fill) : primitive_base(id, {input}, padding()), mem(mem), fill_type(fill_type) {} /// @brief @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - memory mem; + memory::ptr mem; /// @brief Specifies function which will be used to fill weights. filler_type fill_type; diff --git a/inference-engine/thirdparty/clDNN/api/mvn.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/mvn.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/mvn.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/mvn.hpp diff --git a/inference-engine/thirdparty/clDNN/api/non_max_suppression.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/non_max_suppression.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/non_max_suppression.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/non_max_suppression.hpp diff --git a/inference-engine/thirdparty/clDNN/api/normalize.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/normalize.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/normalize.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/normalize.hpp diff --git a/inference-engine/thirdparty/clDNN/api/one_hot.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/one_hot.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/one_hot.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/one_hot.hpp diff --git a/inference-engine/thirdparty/clDNN/api/permute.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/permute.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/permute.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/permute.hpp diff --git a/inference-engine/thirdparty/clDNN/api/pooling.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/pooling.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/pooling.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/pooling.hpp diff --git a/inference-engine/thirdparty/clDNN/api/primitive.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/primitive.hpp similarity index 98% rename from inference-engine/thirdparty/clDNN/api/primitive.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/primitive.hpp index 1c8ac408493..856df2e5649 100644 --- a/inference-engine/thirdparty/clDNN/api/primitive.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/primitive.hpp @@ -3,11 +3,11 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// + #pragma once -#include "cldnn.hpp" -#include "compounds.h" -#include "layout.hpp" +#include "cldnn/runtime/compounds.hpp" +#include "cldnn/runtime/layout.hpp" #include #include diff --git a/inference-engine/thirdparty/clDNN/api/prior_box.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/prior_box.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/prior_box.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/prior_box.hpp diff --git a/inference-engine/thirdparty/clDNN/api/proposal.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/proposal.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/proposal.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/proposal.hpp diff --git a/inference-engine/thirdparty/clDNN/api/pyramid_roi_align.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/pyramid_roi_align.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/pyramid_roi_align.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/pyramid_roi_align.hpp diff --git a/inference-engine/thirdparty/clDNN/api/quantize.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/quantize.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/quantize.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/quantize.hpp diff --git a/inference-engine/thirdparty/clDNN/api/reduce.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/reduce.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/reduce.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/reduce.hpp diff --git a/inference-engine/thirdparty/clDNN/api/region_yolo.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/region_yolo.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/region_yolo.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/region_yolo.hpp diff --git a/inference-engine/thirdparty/clDNN/api/reorder.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/reorder.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/api/reorder.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/reorder.hpp index ab597d7de18..c980c34f0d4 100644 --- a/inference-engine/thirdparty/clDNN/api/reorder.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/reorder.hpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once #include "primitive.hpp" -#include "memory.hpp" +#include "cldnn/runtime/memory.hpp" #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api/reorg_yolo.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/reorg_yolo.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/reorg_yolo.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/reorg_yolo.hpp diff --git a/inference-engine/thirdparty/clDNN/api/resample.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/resample.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/resample.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/resample.hpp diff --git a/inference-engine/thirdparty/clDNN/api/reshape.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/reshape.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/reshape.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/reshape.hpp diff --git a/inference-engine/thirdparty/clDNN/api/reverse_sequence.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/reverse_sequence.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/reverse_sequence.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/reverse_sequence.hpp diff --git a/inference-engine/thirdparty/clDNN/api/roi_pooling.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/roi_pooling.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/roi_pooling.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/roi_pooling.hpp diff --git a/inference-engine/thirdparty/clDNN/api/scale.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/scale.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/scale.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/scale.hpp diff --git a/inference-engine/thirdparty/clDNN/api/scatter_elements_update.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/scatter_elements_update.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/scatter_elements_update.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/scatter_elements_update.hpp diff --git a/inference-engine/thirdparty/clDNN/api/scatter_nd_update.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/scatter_nd_update.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/scatter_nd_update.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/scatter_nd_update.hpp diff --git a/inference-engine/thirdparty/clDNN/api/scatter_update.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/scatter_update.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/scatter_update.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/scatter_update.hpp diff --git a/inference-engine/thirdparty/clDNN/api/select.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/select.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/select.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/select.hpp diff --git a/inference-engine/thirdparty/clDNN/api/shuffle_channels.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/shuffle_channels.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/shuffle_channels.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/shuffle_channels.hpp diff --git a/inference-engine/thirdparty/clDNN/api/softmax.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/softmax.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/softmax.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/softmax.hpp diff --git a/inference-engine/thirdparty/clDNN/api/space_to_batch.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/space_to_batch.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/space_to_batch.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/space_to_batch.hpp diff --git a/inference-engine/thirdparty/clDNN/api/space_to_depth.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/space_to_depth.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/space_to_depth.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/space_to_depth.hpp diff --git a/inference-engine/thirdparty/clDNN/api/split.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/split.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/split.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/split.hpp diff --git a/inference-engine/thirdparty/clDNN/api/strided_slice.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/strided_slice.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/strided_slice.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/strided_slice.hpp diff --git a/inference-engine/thirdparty/clDNN/api/tile.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/tile.hpp similarity index 100% rename from inference-engine/thirdparty/clDNN/api/tile.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/primitives/tile.hpp diff --git a/inference-engine/thirdparty/clDNN/api/compounds.h b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/compounds.hpp similarity index 98% rename from inference-engine/thirdparty/clDNN/api/compounds.h rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/compounds.hpp index 45429e584f9..ade492705a5 100644 --- a/inference-engine/thirdparty/clDNN/api/compounds.h +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/compounds.hpp @@ -4,6 +4,8 @@ #pragma once +#include "utils.hpp" + #include #include #include @@ -11,7 +13,6 @@ #include #include -#include "meta_utils.hpp" namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device.hpp new file mode 100644 index 00000000000..7f5e1f13d7d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "device_info.hpp" +#include "memory_caps.hpp" + +#include + +namespace cldnn { + +/// @brief Represents detected GPU device object. Use device_query to get list of available objects. +struct device { +public: + using ptr = std::shared_ptr; + virtual device_info get_info() const = 0; + virtual memory_capabilities get_mem_caps() const = 0; + + virtual ~device() = default; +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp new file mode 100644 index 00000000000..60ac318340d --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_info.hpp @@ -0,0 +1,76 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace cldnn { +/// @addtogroup cpp_api C++ API +/// @{ + +/// @defgroup cpp_device GPU Device +/// @{ + +/// @brief Enumeration of supported device types +enum class device_type { + integrated_gpu = 0, + discrete_gpu = 1 +}; + +/// @brief Defines version of GFX IP +struct gfx_version { + uint16_t major; + uint8_t minor; + uint8_t revision; +}; + +/// @brief Information about the device properties and capabilities. +struct device_info { + uint32_t execution_units_count; ///< Number of available execution units. + uint32_t gpu_frequency; ///< Clock frequency in MHz. + uint32_t max_threads_per_execution_unit; ///< Number of available HW threads on EU. + uint32_t max_threads_per_device; ///< Maximum number of HW threads on device. + + uint64_t max_work_group_size; ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model. + uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes. + uint64_t max_global_mem_size; ///< Maximum size of global device memory in bytes. + uint64_t max_alloc_mem_size; ///< Maximum size of memory object allocation in bytes. + + uint64_t max_image2d_width; ///< Maximum image 2d width supported by the device. + uint64_t max_image2d_height; ///< Maximum image 2d height supported by the device. + + bool supports_fp16; ///< Does engine support FP16. + bool supports_fp64; ///< Does engine support FP64. + bool supports_fp16_denorms; ///< Does engine support denormalized FP16. + bool supports_subgroups; ///< Does engine support cl_intel_subgroups extension. + bool supports_subgroups_short; ///< Does engine support cl_intel_subgroups_short extension. + bool supports_subgroups_char; ///< Does engine support cl_intel_subgroups_char extension. + bool supports_local_block_io; ///< Does engine support cl_intel_subgroup_local_block_io extension. + bool supports_image; ///< Does engine support images (CL_DEVICE_IMAGE_SUPPORT cap). + + bool supports_imad; ///< Does engine support int8 mad. + bool supports_immad; ///< Does engine support int8 multi mad. + + bool supports_usm; ///< Does engine support unified shared memory. + + uint32_t vendor_id; ///< Vendor ID + std::string dev_name; ///< Device ID string + std::string driver_version; ///< Version of OpenCL driver + + device_type dev_type; ///< Defines type of current GPU device (integrated or discrete) + + gfx_version gfx_ver; ///< Defines GFX IP version + uint32_t device_id; ///< ID of current GPU + uint32_t num_slices; ///< Number of slices + uint32_t num_sub_slices_per_slice; ///< Number of subslices in a slice + uint32_t num_eus_per_sub_slice; ///< Number of execution units per subslice + uint32_t num_threads_per_eu; ///< Number of hardware threads per execution unit +}; + +/// @} + +/// @} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_query.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_query.hpp new file mode 100644 index 00000000000..af458360288 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/device_query.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "device.hpp" +#include "engine_configuration.hpp" + +#include +#include +#include + +namespace cldnn { + +// Fetches all available gpu devices with specific runtime and engine types and (optionally) user context/device handles +struct device_query { +public: + explicit device_query(engine_types engine_type, runtime_types runtime_type, void* user_context = nullptr, void* user_device = nullptr); + + std::map get_available_devices() const { + return _available_devices; + } + + ~device_query() = default; +private: + std::map _available_devices; +}; +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp new file mode 100644 index 00000000000..b81f626c5e6 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp @@ -0,0 +1,150 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "device.hpp" +#include "engine_configuration.hpp" +#include "event.hpp" +#include "memory_caps.hpp" +#include "memory_pool.hpp" +#include "layout.hpp" + +#include +#include +#include +#include + +namespace cldnn { + +class stream; + +using memory_ptr = std::shared_ptr; +using stream_ptr = std::shared_ptr; + +using primitive_id = std::string; + +class engine { +public: + /// Default destructor + virtual ~engine() = default; + + /// Returns type of the engine + virtual engine_types type() const = 0; + /// Returns runtime type used in the engine + virtual runtime_types runtime_type() const = 0; + + /// Create memory object with specified @p layout and allocation @p type for primitive with @p id + /// Underlying memory handle can be reused with other primitives from memory pool based on @p dependencies + memory_ptr get_memory_from_pool(const layout& layout, + primitive_id id, + uint32_t network_id, + std::set dependencies, + allocation_type type, + bool reusable = true); + + /// Create memory object attached to the buffer allocated by user. + /// @param ptr The pointer to user allocated buffer. + /// @note Size (in bytes) of the buffer should be equal to @p layout.bytes_count() + /// User is responsible for buffer deallocation. Buffer lifetime should be bigger than lifetime of the memory object. + memory_ptr attach_memory(const layout& layout, void* ptr); + + /// Allocate gpu memory using specified @p layout and alloation @p type + virtual memory_ptr allocate_memory(const layout& layout, allocation_type type, bool reset = true) = 0; + + /// Allocate gpu memory using specified @p layout. Allocation type is selected automatically based on engine/device configuration + memory_ptr allocate_memory(const layout& layout, bool reset = true); + + /// Created memory object from memory @p params and reinterpred the data using specified @p layout + virtual memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) = 0; + + /// Created memory object from the other @p memory and reinterpred the data using specified @p new_layout + virtual memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) = 0; + + /// Create shared memory object using user-supplied memory buffer @p buf using specified @p layout + memory_ptr share_buffer(const layout& layout, shared_handle buf); + + /// Create shared memory object using user-supplied 2D image @p img using specified @p layout + memory_ptr share_image(const layout& layout, shared_handle img); + + /// Create shared memory object over specified @p plane of video decoder surface @p surf using specified @p layout +#ifdef _WIN32 + memory_ptr share_surface(const layout& layout, shared_handle surf, uint32_t plane); + memory_ptr share_dx_buffer(const layout& layout, shared_handle res); +#else + memory_ptr share_surface(const layout& layout, shared_surface surf, uint32_t plane); +#endif + + /// Checks whether two memory objects represents the same physical memory + virtual bool is_the_same_buffer(const memory& mem1, const memory& mem2) = 0; + + /// Returns basic allocation type which will be used as a fallback when allocation type is not specified or device doesn't support some features. + virtual allocation_type get_default_allocation_type() const = 0; + + /// Returns preferred allocation type which can be mapped to host ptr + allocation_type get_lockable_preffered_memory_allocation_type(bool is_image_layout = false) const; + + /// Checks if the current engine supports speicied allocation @p type + bool supports_allocation(allocation_type type) const; + + /// Returns configuration of current engine + const engine_configuration& configuration() const { return _configuration; } + + /// Returns device structure which represents stores device capabilities + device_info get_device_info() const; + + /// Returns device object associated with the engine + const device::ptr get_device() const; + + /// Returns memory pool for the engine + memory_pool& get_memory_pool(); + + /// Returns user context handle which was used to create the engine + virtual void* get_user_context() const = 0; + + /// Returns the maximum amount of GPU memory that engine allocated in current process + uint64_t get_max_used_device_memory() const; + + /// Returns the amount of GPU memory currently used by the engine + uint64_t get_used_device_memory() const; + + /// Returns true if USM is enabled in engine config and device/driver supports required features + bool use_unified_shared_memory() const; + + /// Create stream object for current engine + virtual stream_ptr create_stream() const = 0; + + /// Returns service stream which can be used during program build and optimizations + virtual stream& get_program_stream() const = 0; + + /// Factory method which creates engine object with impl configured by @p engine_type + /// @param engine_type requested engine type + /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported + /// @param device specifies the device which the engine is created for + /// @param configuration options for the engine + static std::shared_ptr create(engine_types engine_type, + runtime_types runtime_type, + const device::ptr device, + const engine_configuration& configuration = engine_configuration()); + + /// Factory method which creates engine object with impl configured by @p engine_type + /// @param engine_type requested engine type + /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported + /// @param configuration options for the engine + /// @note engine is created for the first device returned by devices query + static std::shared_ptr create(engine_types engine_type, + runtime_types runtime_type, + const engine_configuration& configuration = engine_configuration()); + +protected: + /// Create engine for given @p device and @p configuration + engine(const device::ptr device, const engine_configuration& configuration); + + // TODO: Consider moving memory pool to cldnn::network + std::unique_ptr _memory_pool; + const device::ptr _device; + engine_configuration _configuration; +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine_configuration.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine_configuration.hpp new file mode 100644 index 00000000000..2cbaca414ab --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine_configuration.hpp @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "utils.hpp" + +#include +#include +#include + +namespace cldnn { + +/// @addtogroup cpp_api C++ API +/// @{ + +/// @defgroup cpp_engine Execution Engine +/// @{ + +/// @brief Defines available engine types +enum class engine_types : int32_t { + ocl, +}; + +/// @brief Defines available runtime types +enum class runtime_types : int32_t { + ocl, +}; + +/// @brief Defines available priority mode types +enum class priority_mode_types : int16_t { + disabled, + low, + med, + high +}; + +/// @brief Defines available throttle mode types +enum class throttle_mode_types : int16_t { + disabled, + low, + med, + high +}; + +/// @brief Defines supported queue types +enum class queue_types : int16_t { + in_order, + out_of_order +}; + +/// @brief Configuration parameters for created engine. +struct engine_configuration { + const bool enable_profiling; ///< Enable per-primitive profiling. + const queue_types queue_type; ///< Specifies type of queue used by the runtime + const std::string sources_dumps_dir; ///< Specifies a directory where sources of cldnn::program objects should be dumped. + ///< Empty by default (means no dumping). + const priority_mode_types priority_mode; ///< Priority mode (support of priority hints in command queue). If cl_khr_priority_hints extension + ///< is not supported by current OpenCL implementation, the value must be set to cldnn_priority_disabled. + + const throttle_mode_types throttle_mode; ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension + ///< is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled. + + bool use_memory_pool; ///< Enables memory usage optimization. memory objects will be reused when possible + ///< (switched off for older drivers then NEO). + bool use_unified_shared_memory; ///< Enables USM usage + const std::string kernels_cache_path; ///< Path to compiled kernels cache + uint16_t n_threads; ///< Max number of host threads used in gpu plugin + const std::string tuning_cache_path; ///< Path to tuning kernel cache + + /// @brief Constructs engine configuration with specified options. + /// @param enable_profiling Enable per-primitive profiling. + /// @param queue_type Specifies type of queue used by the runtime + /// @param sources_dumps_dir Specifies a directory where sources of cldnn::program objects should be dumped + /// @param priority_mode Priority mode for all streams created within the engine + /// @param throttle_mode Throttle mode for all streams created within the engine + /// @param use_memory_pool Controls whether engine is allowed to reuse intermediate memory buffers whithin a network + /// @param use_unified_shared_memory If this option it true and device supports USM, then engine will use USM for all memory allocations + /// @param kernels_cache_path Path to existing directory where plugin can cache compiled kernels + /// @param n_threads Max number of host threads used in gpu plugin + /// @param tuning_cache_path Path to tuning kernel cache + engine_configuration( + bool enable_profiling = false, + queue_types queue_type = queue_types::out_of_order, + const std::string& sources_dumps_dir = std::string(), + priority_mode_types priority_mode = priority_mode_types::disabled, + throttle_mode_types throttle_mode = throttle_mode_types::disabled, + bool use_memory_pool = true, + bool use_unified_shared_memory = true, + const std::string& kernels_cache_path = "", + uint16_t n_threads = std::max(static_cast(std::thread::hardware_concurrency()), static_cast(1)), + const std::string& tuning_cache_path = "cache.json") + : enable_profiling(enable_profiling) + , queue_type(queue_type) + , sources_dumps_dir(sources_dumps_dir) + , priority_mode(priority_mode) + , throttle_mode(throttle_mode) + , use_memory_pool(use_memory_pool) + , use_unified_shared_memory(use_unified_shared_memory) + , kernels_cache_path(kernels_cache_path) + , n_threads(n_threads) + , tuning_cache_path(tuning_cache_path) { } +}; + +/// @} + +/// @} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/error_handler.h b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/error_handler.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/src/include/error_handler.h rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/error_handler.hpp index 1ce66e39c9f..7f57d64861c 100644 --- a/inference-engine/thirdparty/clDNN/src/include/error_handler.h +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/error_handler.hpp @@ -2,14 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once + +#include "layout.hpp" + #include #include #include #include #include -#include "api/layout.hpp" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/event_impl.h b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/event.hpp similarity index 82% rename from inference-engine/thirdparty/clDNN/src/include/event_impl.h rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/event.hpp index c87e34a3f84..db2eda1a501 100644 --- a/inference-engine/thirdparty/clDNN/src/include/event_impl.h +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/event.hpp @@ -2,21 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/event.hpp" -#include "refcounted_obj.h" + +#include "profiling.hpp" #include #include #include +#include +#include namespace cldnn { struct user_event; -struct event_impl : public refcounted_obj { +/// @brief user-defined event handler callback. +using event_handler = std::function; + +struct event { public: - event_impl() = default; + using ptr = std::shared_ptr; + event() = default; void wait(); bool is_set(); @@ -27,10 +32,11 @@ public: _profiling_captured = false; _profiling_info.clear(); } + // returns true if handler has been successfully added bool add_event_handler(event_handler handler, void* data); - const std::list& get_profiling_info(); + std::vector get_profiling_info(); private: std::mutex _handlers_mutex; @@ -54,7 +60,7 @@ protected: virtual bool get_profiling_info_impl(std::list&) { return true; } }; -struct user_event : virtual public event_impl { +struct user_event : virtual public event { public: explicit user_event(bool set = false) { _set = set; } diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/half.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/half.hpp new file mode 100644 index 00000000000..785cc711d8f --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/half.hpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace cldnn { + +/// @addtogroup cpp_api C++ API +/// @{ + +float half_to_float(uint16_t value); +uint16_t float_to_half(float value); + +// There is no portable half precision floating point support. +// Using wrapped integral type with the same size and alignment restrictions. +class half_impl { +public: + half_impl() = default; + + template ::value>::type> + explicit half_impl(T data, int /*direct_creation_tag*/) : _data(data) {} + + operator uint16_t() const { return _data; } + operator float() const { + return half_to_float(_data); + } + + explicit half_impl(float value) + : _data(float_to_half(value)) + {} + + template ::value>::type> + explicit half_impl(T value) + : half_impl(static_cast(value)) + {} + +private: + uint16_t _data; +}; + +// Use complete implementation if necessary. +#if defined HALF_HALF_HPP +using half_t = half; +#else +using half_t = half_impl; +#endif + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel.hpp new file mode 100644 index 00000000000..0d75173e772 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_args.hpp" +#include "event.hpp" + +#include +#include + +namespace cldnn { + +using kernel_id = std::string; + +class kernel { +public: + using ptr = std::shared_ptr; + virtual std::shared_ptr clone() const = 0; + virtual ~kernel() = default; +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel_args.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel_args.hpp new file mode 100644 index 00000000000..f70d04ee33a --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/kernel_args.hpp @@ -0,0 +1,133 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "memory.hpp" + +#include +#include + +namespace cldnn { + +struct work_group_sizes { + std::vector global; + std::vector local; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Scalar +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct scalar_desc { + union ValueT { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + int8_t s8; + int16_t s16; + int32_t s32; + int64_t s64; + float f32; + double f64; + }; + + enum class Types { + UINT8, + UINT16, + UINT32, + UINT64, + INT8, + INT16, + INT32, + INT64, + FLOAT32, + FLOAT64, + }; + + Types t; + ValueT v; +}; + +using scalars_desc = std::vector; + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ArgumentDescpirtor +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct argument_desc { + enum class Types { + INPUT, + OUTPUT, + WEIGHTS, + BIAS, + SCALE_TABLE, + SLOPE, + SPLIT, + INTERNAL_BUFFER, + SCALAR, + RECURRENT, // RNN/LSTM/GRU recurrent weights + HIDDEN, // RNN/LSTM/GRU hidden input + CELL, // LSTM cell input + LSTM_PACK, // LSTM packed output + WEIGHTS_ZERO_POINTS, + ACTIVATIONS_ZERO_POINTS, + COMPENSATION, + INPUT_OF_FUSED_PRIMITIVE + }; + + Types t; + uint32_t index; +}; + +using arguments_desc = std::vector; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// KernelParams +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct kernel_arguments_desc { + work_group_sizes workGroups; + arguments_desc arguments; + scalars_desc scalars; + std::string layerID; +}; + +struct kernel_arguments_data { + std::vector inputs; + std::vector intermediates; + memory::cptr output; + memory::cptr weights; + memory::cptr recurrent; + memory::cptr hidden; + memory::cptr cell; + memory::cptr bias; + memory::cptr weights_zero_points; + memory::cptr activations_zero_points; + memory::cptr compensation; + memory::cptr lookup_table; + memory::cptr scale_table; + memory::cptr slope; + + std::vector fused_op_inputs; + int32_t split = 0; + const scalars_desc* scalars = nullptr; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// KernelString +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct kernel_string { + std::string str; + std::string jit; + std::string undefs; + std::string options; + std::string entry_point; + bool batch_compilation; + + kernel_string() : str(""), jit(""), undefs(""), options(""), entry_point(""), batch_compilation(false) {} + + std::string get_hash() { return str + jit + undefs + options + entry_point; } +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/layout.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/layout.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/api/layout.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/layout.hpp index ab4b17f03c9..4962cd00963 100644 --- a/inference-engine/thirdparty/clDNN/api/layout.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/layout.hpp @@ -2,9 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once + #include "tensor.hpp" +#include "half.hpp" + #include #include #include diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory.hpp new file mode 100644 index 00000000000..e054e90a477 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory.hpp @@ -0,0 +1,150 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "layout.hpp" +#include "memory_caps.hpp" +#include "event.hpp" +#include "engine_configuration.hpp" + +namespace cldnn { + +class engine; +class stream; + +struct memory { + using ptr = std::shared_ptr; + using cptr = std::shared_ptr; + memory(engine* engine, const layout& layout, allocation_type type, bool reused = false); + + virtual ~memory(); + virtual void* lock(const stream& stream) = 0; + virtual void unlock(const stream& stream) = 0; + virtual event::ptr fill(stream& stream, unsigned char pattern) = 0; + virtual event::ptr fill(stream& stream) = 0; + + size_t size() const { return _bytes_count; } + size_t count() const { return _layout.count(); } + virtual shared_mem_params get_internal_params() const = 0; + virtual bool is_allocated_by(const engine& engine) const { return &engine == _engine; } + engine* get_engine() const { return _engine; } + const layout& get_layout() const { return _layout; } + allocation_type get_allocation_type() const { return _type; } + // TODO: must be moved outside memory class + virtual bool is_memory_reset_needed(layout l) { + // To avoid memory reset, output memory must meet the following requirements: + // - To be Weights format (Data memory can be reused by memory_pool, which can lead to errors) + // - To have zero paddings + // - To be completely filled with data + if ((!format::is_weights_format(l.format) && !format::is_simple_data_format(l.format)) || + format::is_winograd(l.format) || format::is_image_2d(l.format)) { + return true; + } + + if (l.data_padding.lower_size() != tensor(0) || l.data_padding.upper_size() != tensor(0)) { + return true; + } + + if (_bytes_count == (l.data_type == data_types::bin ? ceil_div(l.count(), 32) : l.count()) * data_type_traits::size_of(l.data_type)) { + return false; + } + + return true; + } + + virtual event::ptr copy_from(stream& /* stream */, const memory& /* other */) = 0; + virtual event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */) = 0; + +protected: + engine* _engine; + const layout _layout; + // layout bytes count, needed because of traits static map destruction + // before run of memory destructor, when engine is static + size_t _bytes_count; + +private: + allocation_type _type; + bool _reused; +}; + +struct simple_attached_memory : memory { + simple_attached_memory(const layout& layout, void* pointer) + : memory(nullptr, layout, allocation_type::unknown), _pointer(pointer) {} + + void* lock(const stream& /* stream */) override { return _pointer; } + void unlock(const stream& /* stream */) override {} + event::ptr fill(stream& /* stream */, unsigned char) override { return nullptr; } + event::ptr fill(stream& /* stream */) override { return nullptr; } + shared_mem_params get_internal_params() const override { return { shared_mem_type::shared_mem_empty, nullptr, nullptr, nullptr, +#ifdef _WIN32 + nullptr, +#else + 0, +#endif + 0}; }; + + event::ptr copy_from(stream& /* stream */, const memory& /* other */) override { return nullptr; }; + event::ptr copy_from(stream& /* stream */, const void* /* host_ptr */) override { return nullptr; } + +private: + void* _pointer; +}; + +template +struct mem_lock { + explicit mem_lock(memory::ptr mem, const stream& stream) : _mem(mem), _stream(stream), _ptr(reinterpret_cast(_mem->lock(_stream))) {} + + ~mem_lock() { + _ptr = nullptr; + _mem->unlock(_stream); + } + + size_t size() const { return _mem->size() / sizeof(T); } + + mem_lock(const mem_lock& other) = delete; + mem_lock& operator=(const mem_lock& other) = delete; + +#if defined(_SECURE_SCL) && (_SECURE_SCL > 0) + auto begin() & { return stdext::make_checked_array_iterator(_ptr, size()); } + auto end() & { return stdext::make_checked_array_iterator(_ptr, size(), size()); } +#else + T* begin() & { return _ptr; } + T* end() & { return _ptr + size(); } +#endif + + /// @brief Provides indexed access to pointed memory. + T& operator[](size_t idx) const& { + assert(idx < size()); + return _ptr[idx]; + } + + T* data() const & { return _ptr; } + + /// Prevents to use mem_lock as temporary object + T* data() && = delete; + /// Prevents to use mem_lock as temporary object + T* begin() && = delete; + /// Prevents to use mem_lock as temporary object + T* end() && = delete; + /// Prevents to use mem_lock as temporary object + T& operator[](size_t idx) && = delete; + +private: + memory::ptr _mem; + const stream& _stream; + T* _ptr; +}; + +struct surfaces_lock { + surfaces_lock() = default; + virtual ~surfaces_lock() = default; + + surfaces_lock(const surfaces_lock& other) = delete; + surfaces_lock& operator=(const surfaces_lock& other) = delete; + + static std::unique_ptr create(engine_types engine_type, std::vector mem, const stream& stream); +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp new file mode 100644 index 00000000000..98f9eee6581 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_caps.hpp @@ -0,0 +1,84 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +namespace cldnn { + +enum class allocation_type { + unknown, // Not specified (i.e simple_attached_memory class). + cl_mem, // Use standard OpenCL cl_mem allocations. + usm_host, // Accessible by host and device. Not Migratable + usm_shared, // Accessible by host and device. Migrtable. + usm_device, // Accessible only by device. Not migratable. +}; + +class memory_capabilities { +public: + memory_capabilities(std::vector supported_allocation_types) : _caps(supported_allocation_types) {} + bool supports_usm() const { + return find_in_caps(allocation_type::usm_host) || + find_in_caps(allocation_type::usm_shared) || + find_in_caps(allocation_type::usm_device); + } + bool support_allocation_type(allocation_type type) const { return find_in_caps(type); } + + static bool is_usm_type(allocation_type type) { + if (type == allocation_type::usm_host || + type == allocation_type::usm_shared || + type == allocation_type::usm_device) + return true; + return false; + } + +private: + std::vector _caps; + + bool find_in_caps(const allocation_type& type) const { + return std::find_if(_caps.begin(), _caps.end(), [&](const allocation_type& t) { return t == type; }) != _caps.end(); + } +}; + + +/// @brief Shared memory descriptor type. +enum class shared_mem_type { + /// @brief Structure unitialized or contains no information. + shared_mem_empty, + + /// @brief Structure describes shared CL buffer. + shared_mem_buffer, + + /// @brief Structure describes shared CL image. + shared_mem_image, + + /// @brief Structure describes shared VA/DXVA surface + shared_mem_vasurface, + + /// @brief Structure describes shared D3D11 buffer + shared_mem_dxbuffer +}; + +using shared_handle = void*; +using shared_surface = uint32_t; + +/// @brief Low-level API handles required for using cldnn memory objects in external API calls. +struct shared_mem_params { + shared_mem_type mem_type; ///< shared buffer type + shared_handle context; ///< OpenCL context for external operations + shared_handle user_device; ///< DX/VA device for external operations + shared_handle mem; ///< memory object handle +#ifdef _WIN32 + shared_handle surface; ///< VA/DXVA surface handle +#else + shared_surface surface; +#endif + uint32_t plane; ///< shared surface plane +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_pool.h b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_pool.hpp similarity index 52% rename from inference-engine/thirdparty/clDNN/src/include/memory_pool.h rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_pool.hpp index 739cf23515a..6c3d5d4d766 100644 --- a/inference-engine/thirdparty/clDNN/src/include/memory_pool.h +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/memory_pool.hpp @@ -2,28 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/layout.hpp" -#include "api/primitive.hpp" -#include "device_impl.h" -#include "refcounted_obj.h" + +#include "layout.hpp" +#include "memory_caps.hpp" #include #include #include #include #include +#include namespace cldnn { -struct memory_impl; +struct memory; struct shared_mem_params; -struct engine_impl; -struct program_impl; +class engine; struct memory_user; struct memory_user_comparer; using memory_set = std::set; +using primitive_id = std::string; + +using memory_ptr = std::shared_ptr; struct memory_user { primitive_id _id; @@ -48,10 +49,10 @@ struct memory_user_comparer { struct memory_record { memory_set _users; // list of primitives that already use this memory object - refcounted_obj_ptr _memory; + memory_ptr _memory; uint32_t _network_id; allocation_type _type; - memory_record(memory_set users, refcounted_obj_ptr& memory, uint32_t net_id, allocation_type type); + memory_record(memory_set users, memory_ptr& memory, uint32_t net_id, allocation_type type); }; struct padded_pool_comparer { @@ -83,55 +84,49 @@ struct padded_pool_comparer { // - immutable - if user request for non reusable resource don't use pool, return // TODO list: -// - resolve engine <--> memory_pool circular dependency -// - add padded buffers pool -// - add decreasing memory limit in gpu_buffer/image dctor -// - add support for multi networks reuse +// - Move from runtime to graph part +// - Improve memory consumption class memory_pool { memory_pool(); - refcounted_obj_ptr alloc_memory(const layout& layout, allocation_type type, uint32_t network_id, bool reset = true); + memory_ptr alloc_memory(const layout& layout, allocation_type type); static bool has_conflict(const memory_set&, const std::set&, uint32_t network_id); std::multimap _non_padded_pool; std::map, padded_pool_comparer> _padded_pool; std::multimap _no_reusable_pool; - engine_impl* _engine; + engine* _engine; std::atomic _temp_memory_used; std::atomic _max_peak_memory_used; public: - explicit memory_pool(engine_impl& engine); + explicit memory_pool(engine& engine); ~memory_pool(); - refcounted_obj_ptr get_memory(const layout& layout, - const primitive_id& id, - uint32_t network_id, - const std::set& restrictions, - allocation_type type, - bool reusable = true); // get from pool or create memory allocation - refcounted_obj_ptr get_memory(const layout& layout, allocation_type type, uint32_t network_id, bool reset = true); - refcounted_obj_ptr get_memory(const layout& layout, const shared_mem_params* params, uint32_t network_id); - refcounted_obj_ptr get_from_non_padded_pool(const layout& layout, - const primitive_id& id, - uint32_t network_id, - const std::set&, - allocation_type type); - refcounted_obj_ptr get_from_padded_pool(const layout& layout, - const primitive_id& id, - uint32_t network_id, - const std::set& restrictions, - allocation_type type); - refcounted_obj_ptr get_from_across_networks_pool(const layout& layout, - const primitive_id& id, - uint32_t network_id, - allocation_type type); + memory_ptr get_memory(const layout& layout, + const primitive_id& id, + uint32_t network_id, + const std::set& restrictions, + allocation_type type, + bool reusable = true); // get from pool or create memory allocation + memory_ptr get_memory(const layout& layout, allocation_type type); + memory_ptr get_from_non_padded_pool(const layout& layout, + const primitive_id& id, + uint32_t network_id, + const std::set&, + allocation_type type); + memory_ptr get_from_padded_pool(const layout& layout, + const primitive_id& id, + uint32_t network_id, + const std::set& restrictions, + allocation_type type); + memory_ptr get_from_across_networks_pool(const layout& layout, + const primitive_id& id, + uint32_t network_id, + allocation_type type); void clear_pool(); void clear_pool_for_network(uint32_t network_id); - void release_memory(memory_impl* memory, - const primitive_id& id); - void color_graph(const program_impl&); - void dump_memory_pool(const program_impl&, std::string&, std::string&); + void release_memory(memory* memory, const primitive_id& id, uint32_t network_id); uint64_t get_temp_memory_used() const { return _temp_memory_used; } uint64_t get_max_peak_device_memory_used() const { return _max_peak_memory_used; } diff --git a/inference-engine/thirdparty/clDNN/api/profiling.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/api/profiling.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp index 575fb06defa..98c18a483f9 100644 --- a/inference-engine/thirdparty/clDNN/api/profiling.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/profiling.hpp @@ -3,6 +3,7 @@ // #pragma once + #include #include #include diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/stream.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/stream.hpp new file mode 100644 index 00000000000..10575a9d6c4 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/stream.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "event.hpp" +#include "kernel.hpp" +#include "kernel_args.hpp" + +#include +#include + +namespace cldnn { + +class stream { +public: + using ptr = std::shared_ptr; + explicit stream(queue_types queue_type) : queue_type(queue_type) {} + virtual ~stream() = default; + + virtual void flush() const = 0; + virtual void finish() const = 0; + + virtual void set_arguments(kernel& kernel, const kernel_arguments_desc& args_desc, const kernel_arguments_data& args) = 0; + virtual event::ptr enqueue_kernel(kernel& kernel, + const kernel_arguments_desc& args_desc, + const kernel_arguments_data& args, + std::vector const& deps, + bool is_output_event = false) = 0; + virtual event::ptr enqueue_marker(std::vector const& deps, bool is_output_event = false) = 0; + virtual void enqueue_barrier() = 0; + virtual event::ptr group_events(std::vector const& deps) = 0; + virtual void wait_for_events(const std::vector& events) = 0; + virtual void reset_events() = 0; + virtual event::ptr create_user_event(bool set) = 0; + virtual event::ptr create_base_event() = 0; + virtual void release_events_pool() = 0; + + queue_types get_queue_type() const { return queue_type; } + +protected: + queue_types queue_type; +}; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/tensor.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/tensor.hpp similarity index 99% rename from inference-engine/thirdparty/clDNN/api/tensor.hpp rename to inference-engine/thirdparty/clDNN/api/cldnn/runtime/tensor.hpp index 4bed1bcdb3a..9c6a02d9e2e 100644 --- a/inference-engine/thirdparty/clDNN/api/tensor.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/tensor.hpp @@ -2,11 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "cldnn.hpp" -#include "compounds.h" -#include "meta_utils.hpp" + +#include "compounds.hpp" +#include "utils.hpp" #include #include diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/utils.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/utils.hpp new file mode 100644 index 00000000000..7b66ff43a7b --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/utils.hpp @@ -0,0 +1,144 @@ +// Copyright (C) 2017-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +namespace cldnn { + +struct primitive; + +namespace meta { + +// helper struct to tell wheter type T is any of given types U... +// termination case when U... is empty -> return std::false_type +template +struct is_any_of : public std::false_type {}; + +// helper struct to tell whether type is any of given types (U, Rest...) +// recurrence case when at least one type U is present -> returns std::true_type if std::same::value is true, +// otherwise call is_any_of recurrently +template +struct is_any_of + : public std::conditional::value, std::true_type, is_any_of>::type {}; + +template +struct always_false : public std::false_type {}; + +template +struct always_false_ty_val : public std::false_type {}; + +template +struct val_tuple {}; + +template +struct all : public std::true_type {}; + +template +struct all : public std::integral_constant::value> {}; + +} // namespace meta + +/// @cond CPP_HELPERS + +/// @defgroup cpp_helpers Helpers +/// @{ + +#define CLDNN_API_CLASS(the_class) static_assert(std::is_standard_layout::value, #the_class " has to be 'standard layout' class"); + +template +typename std::enable_if::value, T>::type align_to(T size, size_t align) { + return static_cast((size % align == 0) ? size : size - size % align + align); +} + +template +typename std::enable_if::value, T>::type pad_to(T size, size_t align) { + return static_cast((size % align == 0) ? 0 : align - size % align); +} + +template +typename std::enable_if::value, bool>::type is_aligned_to(T size, size_t align) { + return !(size % align); +} + +/// Computes ceil(@p val / @p divider) on unsigned integral numbers. +/// +/// Computes division of unsigned integral numbers and rounds result up to full number (ceiling). +/// The function works for unsigned integrals only. Signed integrals are converted to corresponding +/// unsigned ones. +/// +/// @tparam T1 Type of @p val. Type must be integral (SFINAE). +/// @tparam T2 Type of @p divider. Type must be integral (SFINAE). +/// +/// @param val Divided value. If value is signed, it will be converted to corresponding unsigned type. +/// @param divider Divider value. If value is signed, it will be converted to corresponding unsigned type. +/// +/// @return Result of ceil(@p val / @p divider). The type of result is determined as if in normal integral +/// division, except each operand is converted to unsigned type if necessary. +template +constexpr auto ceil_div(T1 val, T2 divider) +-> typename std::enable_if::value && std::is_integral::value, + decltype(std::declval::type>() / std::declval::type>())>::type { + typedef typename std::make_unsigned::type UT1; + typedef typename std::make_unsigned::type UT2; + typedef decltype(std::declval() / std::declval()) RetT; + + return static_cast((static_cast(val) + static_cast(divider) - 1U) / static_cast(divider)); +} + +/// Rounds @p val to nearest multiply of @p rounding that is greater or equal to @p val. +/// +/// The function works for unsigned integrals only. Signed integrals are converted to corresponding +/// unsigned ones. +/// +/// @tparam T1 Type of @p val. Type must be integral (SFINAE). +/// @tparam T2 Type of @p rounding. Type must be integral (SFINAE). +/// +/// @param val Value to round up. If value is signed, it will be converted to corresponding unsigned type. +/// @param rounding Rounding value. If value is signed, it will be converted to corresponding unsigned type. +/// +/// @return @p val rounded up to nearest multiply of @p rounding. The type of result is determined as if in normal integral +/// division, except each operand is converted to unsigned type if necessary. +template +constexpr auto round_up_to(T1 val, T2 rounding) +-> typename std::enable_if::value && std::is_integral::value, + decltype(std::declval::type>() / std::declval::type>())>::type { + typedef typename std::make_unsigned::type UT1; + typedef typename std::make_unsigned::type UT2; + typedef decltype(std::declval() / std::declval()) RetT; + + return static_cast(ceil_div(val, rounding) * static_cast(rounding)); +} + +template +std::unique_ptr make_unique(Args&& ... args) { + return std::unique_ptr( new T(std::forward(args)...) ); +} + +template ::value, int>::type = 0> +inline derived_type* downcast(base_type* base) { + if (auto casted = dynamic_cast(base)) + return casted; + + throw std::runtime_error("Unable to cast pointer from base to derived type"); +} + +template ::value, int>::type = 0> +inline derived_type& downcast(base_type& base) { + try { + return dynamic_cast(base); + } catch (std::bad_cast& /* ex */) { + throw std::runtime_error("Unable to cast reference from base to derived type"); + } + throw std::runtime_error("downcast failed with unhadnled exception"); +} + +/// @} +/// @endcond +/// @} +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/device.hpp b/inference-engine/thirdparty/clDNN/api/device.hpp deleted file mode 100644 index e8889d5e86d..00000000000 --- a/inference-engine/thirdparty/clDNN/api/device.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "cldnn.hpp" -#include -#include -#include -#include - -namespace cldnn { -/// @addtogroup cpp_api C++ API -/// @{ - -/// @defgroup cpp_device GPU Device -/// @{ - -/// @brief Enumeration of supported device types -enum class device_type { - integrated_gpu = 0, - discrete_gpu = 1 -}; - -struct gfx_version { - uint16_t major; - uint8_t minor; - uint8_t revision; -}; - -/// @brief Information about the device properties and capabilities. -struct device_info { - uint32_t cores_count; ///< Number of available HW cores. - uint32_t core_frequency; ///< Clock frequency in MHz. - uint32_t max_threads_per_execution_unit; ///< Number of available HW threads on EU. - uint32_t max_threads_per_device; ///< Maximum number of HW threads on device. - - uint64_t max_work_group_size; ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model. - uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes. - uint64_t max_global_mem_size; ///< Maximum size of global device memory in bytes. - uint64_t max_alloc_mem_size; ///< Maximum size of memory object allocation in bytes. - - uint64_t max_image2d_width; ///< Maximum image 2d width supported by the device. - uint64_t max_image2d_height; ///< Maximum image 2d height supported by the device. - - // Flags (for layout compatibility fixed size types are used). - uint8_t supports_fp16; ///< Does engine support FP16. - uint8_t supports_fp16_denorms; ///< Does engine support denormalized FP16. - uint8_t supports_subgroups_short; ///< Does engine support cl_intel_subgroups_short. - uint8_t supports_image; ///< Does engine support images (CL_DEVICE_IMAGE_SUPPORT cap). - - uint8_t supports_imad; ///< Does engine support int8 mad. - uint8_t supports_immad; ///< Does engine support int8 multi mad. - - uint8_t supports_usm; ///< Does engine support unified shared memory. - - std::string dev_name; ///< Device ID string - std::string driver_version; ///< Version of OpenCL driver - - device_type dev_type; ///< Defines type of current GPU device (integrated or discrete) - - gfx_version gfx_ver; - uint32_t device_id; - uint32_t num_slices; - uint32_t num_sub_slices_per_slice; - uint32_t num_eus_per_sub_slice; - uint32_t num_threads_per_eu; -}; - -struct device_impl; - -/// @brief Represents clDNN detected device object. Use device_query to get list of available objects. -struct device { - static device create_default(); - - explicit device(device_impl* data) - : _impl(data) { - if (_impl == nullptr) - throw std::invalid_argument("implementation pointer should not be null"); - } - - /// @brief Returns information about properties and capabilities of the device. - device_info get_info() const; - - // TODO add move construction/assignment - device(const device& other) : _impl(other._impl) { - retain(); - } - - device& operator=(const device& other) { - if (_impl == other._impl) return *this; - release(); - _impl = other._impl; - retain(); - return *this; - } - - ~device() { - release(); - } - - friend bool operator==(const device& lhs, const device& rhs) { return lhs._impl == rhs._impl; } - friend bool operator!=(const device& lhs, const device& rhs) { return !(lhs == rhs); } - - device_impl* get() const { return _impl; } - -private: - device_impl* _impl; - - void retain(); - void release(); -}; - -struct device_query_impl; - -/// @brief Represents clDNN object, which allows to query for list of devices. -struct device_query { - /// @brief Constructs engine configuration with specified options. - /// @param Query only for devices, which supports out of order execution (default in cldnn). - /// @param Query for devices in user provided opencl context. - explicit device_query(void* clcontext = nullptr, void* user_device = nullptr); - // TODO add move construction/assignment - device_query(const device_query& other) : _impl(other._impl) { - retain(); - } - - /// Returns map of {device_id, device object} of available devices on system. - /// Device_id is string. First device will have id: "0", second "1" etc. - std::map get_available_devices() const; - - device_query& operator=(const device_query& other) { - if (_impl == other._impl) return *this; - release(); - _impl = other._impl; - retain(); - return *this; - } - - ~device_query() { - release(); - } - - friend bool operator==(const device_query& lhs, const device_query& rhs) { return lhs._impl == rhs._impl; } - friend bool operator!=(const device_query& lhs, const device_query& rhs) { return !(lhs == rhs); } - - device_query_impl* get() const { return _impl; } - -private: - device_query_impl* _impl; - - void retain(); - void release(); -}; -CLDNN_API_CLASS(device_query) - -/// @} - -/// @} - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/engine.hpp b/inference-engine/thirdparty/clDNN/api/engine.hpp deleted file mode 100644 index c65d89df601..00000000000 --- a/inference-engine/thirdparty/clDNN/api/engine.hpp +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "cldnn.hpp" -#include "device.hpp" -#include -#include -#include -#include -#include - -namespace cldnn { - -/// @addtogroup cpp_api C++ API -/// @{ - -/// @defgroup cpp_engine Execution Engine -/// @{ - -/// @brief Defines available engine types -enum class engine_types : int32_t { - ocl -}; - -/// @brief Defines available priority mode types -enum class priority_mode_types : int16_t { - disabled, - low, - med, - high -}; - -/// @brief Defines available priority mode types -enum class throttle_mode_types : int16_t { - disabled, - low, - med, - high -}; - -/// @brief Configuration parameters for created engine. -struct engine_configuration { - const bool enable_profiling; ///< Enable per-primitive profiling. - const bool meaningful_kernels_names; ///< Generate meaniful names fo OpenCL kernels. - const bool dump_custom_program; ///< Dump the user OpenCL programs to files - const std::string compiler_options; ///< OpenCL compiler options string. - const std::string single_kernel_name; ///< If provided, runs specific layer. - const bool enable_parallelisation; ///< Enables parallel execution of primitives which don't depend on each other. Disabled by default. - const std::string engine_log; ///< Specifies a file to which engine log should be dumped. Empty by default (means no logging). - const std::string sources_dumps_dir; ///< Specifies a directory where sources of cldnn::program objects should be dumped. - ///< Empty by default (means no dumping). - const priority_mode_types priority_mode; ///< Priority mode (support of priority hints in command queue). If cl_khr_priority_hints extension - ///< is not supported by current OpenCL implementation, the value must be set to cldnn_priority_disabled. - - const throttle_mode_types throttle_mode; ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension - ///< is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled. - - bool enable_memory_pool; ///< Enables memory usage optimization. memory objects will be reused when possible - ///< (switched off for older drivers then NEO). - uint16_t n_streams; ///< Number of queues executed in parallel - const std::string kernels_cache_path; ///< Path to compiled kernels cache - uint16_t n_threads; ///< Number of threads - const std::string tuning_cache_path; ///< Path to tuning kernel cache - - /// @brief Constructs engine configuration with specified options. - /// @param profiling Enable per-primitive profiling. - /// @param decorate_kernel_names Generate meaniful names fo OpenCL kernels. - /// @param dump_custom_program Dump the custom OpenCL programs to files - /// @param options OpenCL compiler options string. - /// @param single_kernel If provided, runs specific layer. - engine_configuration( - bool profiling = false, - bool decorate_kernel_names = false, - bool dump_custom_program = false, - const std::string& options = std::string(), - const std::string& single_kernel = std::string(), - bool primitives_parallelisation = true, - const std::string& engine_log = std::string(), - const std::string& sources_dumps_dir = std::string(), - priority_mode_types priority_mode = priority_mode_types::disabled, - throttle_mode_types throttle_mode = throttle_mode_types::disabled, - bool memory_pool = true, - uint16_t n_streams = 1, - const std::string& kernels_cache_path = "", - uint16_t n_threads = std::max(static_cast(std::thread::hardware_concurrency()), static_cast(1)), - const std::string& tuning_cache_path = "cache.json") - : enable_profiling(profiling) - , meaningful_kernels_names(decorate_kernel_names) - , dump_custom_program(dump_custom_program) - , compiler_options(options) - , single_kernel_name(single_kernel) - , enable_parallelisation(primitives_parallelisation) - , engine_log(engine_log) - , sources_dumps_dir(sources_dumps_dir) - , priority_mode(priority_mode) - , throttle_mode(throttle_mode) - , enable_memory_pool(memory_pool) - , n_streams(n_streams) - , kernels_cache_path(kernels_cache_path) - , n_threads(n_threads) - , tuning_cache_path(tuning_cache_path) { - if (n_streams == 0) { - throw std::invalid_argument("Invalid streams count set in engine config"); - } - } -}; - -struct engine_impl; - -/// @brief Represents clDNN engine object. -struct engine { - /// @brief Constructs @p OpenCL engine - explicit engine(const engine_configuration& configuration = engine_configuration()) - : engine(engine_types::ocl, device::create_default(), configuration) {} - - /// @brief Constructs @p OpenCL engine - explicit engine(const device& device, const engine_configuration& configuration = engine_configuration()) - : engine(engine_types::ocl, device, configuration) {} - - /// @brief Construct engine of the specified @p type, @p engine_num, and @p configuration options. - /// @param[in] type Engine type @ref cldnn_engine_type. Only OCL engine is supported. - /// @param[in] engine_num Engine index. Should be 0. - /// @param[in] configuration Engine configuration options. - engine(engine_types type, const device& device, const engine_configuration& configuration = engine_configuration()); - - // TODO add move construction/assignment - engine(const engine& other) : _impl(other._impl) { - retain(); - } - - engine& operator=(const engine& other) { - if (_impl == other._impl) return *this; - release(); - _impl = other._impl; - retain(); - return *this; - } - - ~engine() { - release(); - } - - friend bool operator==(const engine& lhs, const engine& rhs) { return lhs._impl == rhs._impl; } - friend bool operator!=(const engine& lhs, const engine& rhs) { return !(lhs == rhs); } - - /// @brief Returns number of available engines of the particular @p type. - static uint32_t engine_count(engine_types type); - - /// @brief Release pending memory allocated in OpenCL context. - void release_pending_memory(uint32_t net_id) const; - - /// @brief Returns information about properties and capabilities of the device used for allocation of the engine. - device_info get_info() const; - - /// @brief Returns OpenCL context handle of the engine. - void* get_context() const; - - /// @brief Returns total size of all resources allocated using given engine - uint64_t get_max_used_device_memory_size() const; - - /// @brief Returns total size of currently resources allocated using given engine - uint64_t get_temp_used_device_memory_size() const; - - /// @brief Returns type of the engine. - engine_types get_type() const; - - /// @brief get C API engine handler. - engine_impl* get() const { return _impl; } - -private: - friend struct network; - friend struct memory; - friend struct event; - - engine_impl* _impl; - - void retain(); - void release(); -}; -CLDNN_API_CLASS(engine) - -/// @} - -/// @} - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/event.hpp b/inference-engine/thirdparty/clDNN/api/event.hpp deleted file mode 100644 index 3b54171d745..00000000000 --- a/inference-engine/thirdparty/clDNN/api/event.hpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "cldnn.hpp" -#include "engine.hpp" -#include "profiling.hpp" -#include -#include -#include -#include -#include -#include - -namespace cldnn { - -/// @addtogroup cpp_api C++ API -/// @{ - -/// @addtogroup cpp_event Events Support -/// @{ - -struct event_impl; - -/// @brief user-defined event handler callback. -using event_handler = std::function; - -/// @brief Represents an clDNN Event object -struct event { - /// @brief Create an event which can be set to 'completed' by user. - static event create_user_event(const engine& engine, uint32_t net_id); - - /// @brief Construct from C API handler @ref ::cldnn_event. - explicit event(event_impl* impl) : _impl(impl) { - if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null"); - } - - event(const event& other) : _impl(other._impl) { - retain(); - } - - event& operator=(const event& other) { - if (_impl == other._impl) return *this; - release(); - _impl = other._impl; - retain(); - return *this; - } - - ~event() { - release(); - } - - friend bool operator==(const event& lhs, const event& rhs) { return lhs._impl == rhs._impl; } - friend bool operator!=(const event& lhs, const event& rhs) { return !(lhs == rhs); } - - /// @brief Wait for event completion. - void wait() const; - - /// @brief Set event status to 'completed'. - void set() const; - - /// @brief Register call back to be called on event completion. - void set_event_handler(event_handler handler, void* param) const; - - /// @brief Get profiling info for the event associated with network output. - std::vector get_profiling_info() const; - - /// @brief Returns C API event handler. - event_impl* get() const { return _impl; } - -private: - event_impl* _impl; - void retain(); - void release(); -}; -CLDNN_API_CLASS(event) - -/// @} -/// @} -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/memory.hpp b/inference-engine/thirdparty/clDNN/api/memory.hpp deleted file mode 100644 index 30dfd5c7eef..00000000000 --- a/inference-engine/thirdparty/clDNN/api/memory.hpp +++ /dev/null @@ -1,278 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include -#include "cldnn.hpp" -#include "compounds.h" -#include "layout.hpp" -#include "engine.hpp" -#include -#include -#include - -namespace cldnn { - -/// @addtogroup cpp_api C++ API -/// @{ - -/// @defgroup cpp_memory Memory description and management -/// @{ - -template -struct pointer; - -struct memory_impl; - -/// @brief Shared memory descriptor type. -enum class shared_mem_type { - /// @brief Structure unitialized or contains no information. - shared_mem_empty, - - /// @brief Structure describes shared CL buffer. - shared_mem_buffer, - - /// @brief Structure describes shared CL image. - shared_mem_image, - - /// @brief Structure describes shared VA/DXVA surface - shared_mem_vasurface, - - /// @brief Structure describes shared D3D11 buffer - shared_mem_dxbuffer -}; - -using shared_handle = void*; -using shared_surface = uint32_t; - -/// @brief Low-level API handles required for using cldnn memory objects in external API calls. -struct shared_mem_params { - shared_mem_type mem_type; ///< shared buffer type - shared_handle context; ///< OpenCL context for external operations - shared_handle user_device; ///< DX/VA device for external operations - shared_handle mem; ///< memory object handle -#ifdef _WIN32 - shared_handle surface; ///< VA/DXVA surface handle -#else - shared_surface surface; -#endif - uint32_t plane; ///< shared surface plane -}; -/// @brief Represents buffer with particular @ref layout. -/// @details Usually allocated by @ref engine except cases when attached to user-allocated buffer. -struct memory { - friend struct data; - friend struct mutable_data; - friend struct network; - friend struct network_output; - - /// Allocate memory on @p engine using specified @p layout - static memory allocate(const engine& engine, const layout& layout, uint32_t net_id = 0, bool reset = true); - - /// Create shared memory object on @p engine using user-supplied memory buffer @p buf using specified @p layout - static memory share_buffer(const engine& engine, const layout& layout, shared_handle buf, uint32_t net_id = 0); - - /// Create shared memory object on @p engine using user-supplied 2D image @p img using specified @p layout - static memory share_image(const engine& engine, const layout& layout, shared_handle img, uint32_t net_id = 0); - - /// Create shared memory object on @p engine over specified @p plane of video decoder surface @p surf using specified @p layout -#ifdef _WIN32 - static memory share_surface(const engine& engine, const layout& layout, shared_handle surf, uint32_t plane, - uint32_t net_id = 0); - static memory share_dx_buffer(const engine& engine, const layout& layout, shared_handle res, uint32_t net_id = 0); -#else - static memory share_surface(const engine& engine, const layout& layout, shared_surface surf, uint32_t plane, - uint32_t net_id = 0); -#endif - - /// Create memory object attached to the buffer allocated by user. - /// @param ptr The pointer to user allocated buffer. - /// @param size Size (in bytes) of the buffer. Should be equal to @p layout.data_size() - /// @note User is responsible for buffer deallocation. Buffer lifetime should be bigger than lifetime of the memory object. - template - static memory attach(const cldnn::layout& layout, T* ptr, size_t size, uint32_t net_id = 0) { - if (!ptr) - throw std::invalid_argument("pointer should not be null"); - size_t data_size = size * sizeof(T); - if (data_size != layout.bytes_count()) { - std::string err_str("buffer size mismatch - input size " + std::to_string(data_size) + " layout size " + - std::to_string(layout.bytes_count())); - throw std::invalid_argument(err_str); - } - - return attach_impl(layout, static_cast(ptr), net_id); - } - - explicit memory(memory_impl* data) - : _impl(data) { - if (_impl == nullptr) - throw std::invalid_argument("implementation pointer should not be null"); - } - - memory(const memory& other) : _impl(other._impl) { - retain(); - } - - memory& operator=(const memory& other) { - if (_impl == other._impl) - return *this; - release(); - _impl = other._impl; - retain(); - return *this; - } - - ~memory() { release(); } - - friend bool operator==(const memory& lhs, const memory& rhs) { return lhs._impl == rhs._impl; } - friend bool operator!=(const memory& lhs, const memory& rhs) { return !(lhs == rhs); } - - /// number of elements of _layout.data_type stored in memory - size_t count() const; - - /// number of bytes used by memory - size_t size() const; - - /// Associated @ref layout - const layout& get_layout() const; - int get_net_id() const; - - /// Test if memory is allocated by @p engine - bool is_allocated_by(const engine& engine) const; - - bool is_the_same_buffer(const memory& other) const; - - shared_mem_params get_internal_params() const; - - /// Creates the @ref pointer object to get an access memory data - template - friend struct cldnn::pointer; - template - cldnn::pointer pointer() const; - - /// C API memory handle - memory_impl* get() const { return _impl; } - - void reset(); - -private: - friend struct engine; - memory_impl* _impl; - - template - T* lock() const { - if (data_type_traits::align_of(get_layout().data_type) % alignof(T) != 0) { - throw std::logic_error("memory data type alignment do not match"); - } - return static_cast(lock_impl()); - } - - void unlock() const; - - void* lock_impl() const; - static memory attach_impl(const cldnn::layout& layout, void* ptr, uint32_t net_id); - - void retain(); - void release(); -}; - -/// @brief Helper class to get an access @ref memory data -/// @details -/// This class provides an access to @ref memory data following RAII idiom and exposes basic C++ collection members. -/// @ref memory object is locked on construction of pointer and "unlocked" on descruction. -/// Objects of this class could be used in many STL utility functions like copy(), transform(), etc. -/// As well as in range-for loops. -template -struct pointer { - /// @brief Constructs pointer from @ref memory and locks @c (pin) ref@ memory object. - explicit pointer(const memory& mem) : _mem(mem), _size(_mem.size() / sizeof(T)), _ptr(_mem.lock()) {} - - /// @brief Unlocks @ref memory - ~pointer() { _mem.unlock(); } - - /// @brief Copy construction. - pointer(const pointer& other) : pointer(other._mem) {} - - /// @brief Copy assignment. - pointer& operator=(const pointer& other) { - if (this->_mem != other._mem) - do_copy(other._mem); - return *this; - } - - /// @brief Returns the number of elements (of type T) stored in memory - size_t size() const { return _size; } - -#if defined(_SECURE_SCL) && (_SECURE_SCL > 0) - typedef stdext::checked_array_iterator iterator; - typedef stdext::checked_array_iterator const_iterator; - - iterator begin() & { return stdext::make_checked_array_iterator(_ptr, size()); } - iterator end() & { return stdext::make_checked_array_iterator(_ptr, size(), size()); } - - const_iterator begin() const& { return stdext::make_checked_array_iterator(_ptr, size()); } - const_iterator end() const& { return stdext::make_checked_array_iterator(_ptr, size(), size()); } -#else - typedef T* iterator; - typedef const T* const_iterator; - iterator begin() & { return _ptr; } - iterator end() & { return _ptr + size(); } - const_iterator begin() const& { return _ptr; } - const_iterator end() const& { return _ptr + size(); } -#endif - - /// @brief Provides indexed access to pointed memory. - T& operator[](size_t idx) const& { - assert(idx < _size); - return _ptr[idx]; - } - - /// @brief Returns the raw pointer to pointed memory. - T* data() & { return _ptr; } - /// @brief Returns the constant raw pointer to pointed memory - const T* data() const& { return _ptr; } - - friend bool operator==(const pointer& lhs, const pointer& rhs) { return lhs._mem == rhs._mem; } - friend bool operator!=(const pointer& lhs, const pointer& rhs) { return !(lhs == rhs); } - - // do not use this class as temporary object - // ReSharper disable CppMemberFunctionMayBeStatic, CppMemberFunctionMayBeConst - /// Prevents to use pointer as temporary object - void data() && {} - /// Prevents to use pointer as temporary object - void begin() && {} - /// Prevents to use pointer as temporary object - void end() && {} - /// Prevents to use pointer as temporary object - void operator[](size_t idx) && {} - // ReSharper restore CppMemberFunctionMayBeConst, CppMemberFunctionMayBeStatic - -private: - memory _mem; - size_t _size; - T* _ptr; - - // TODO implement exception safe code. - void do_copy(const memory& mem) { - auto ptr = mem.lock(); - _mem.unlock(); - _mem = mem; - _size = _mem.size() / sizeof(T); - _ptr = ptr; - } -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template -pointer memory::pointer() const { - return cldnn::pointer(*this); -} -#endif - -/// @} - -/// @} - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api/meta_utils.hpp b/inference-engine/thirdparty/clDNN/api/meta_utils.hpp deleted file mode 100644 index c71073a8dfd..00000000000 --- a/inference-engine/thirdparty/clDNN/api/meta_utils.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace cldnn { - -struct primitive; - -namespace meta { - -// helper struct to tell wheter type T is any of given types U... -// termination case when U... is empty -> return std::false_type -template -struct is_any_of : public std::false_type {}; - -// helper struct to tell whether type is any of given types (U, Rest...) -// recurrence case when at least one type U is present -> returns std::true_type if std::same::value is true, -// otherwise call is_any_of recurrently -template -struct is_any_of - : public std::conditional::value, std::true_type, is_any_of>::type {}; - -template -struct always_false : public std::false_type {}; - -template -struct always_false_ty_val : public std::false_type {}; - -template -struct val_tuple {}; - -template -struct all : public std::true_type {}; - -template -struct all : public std::integral_constant::value> {}; - -} // namespace meta -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/api_test_builds/CMakeLists.txt b/inference-engine/thirdparty/clDNN/api_test_builds/CMakeLists.txt index 38772c1e894..9e08c013ca1 100644 --- a/inference-engine/thirdparty/clDNN/api_test_builds/CMakeLists.txt +++ b/inference-engine/thirdparty/clDNN/api_test_builds/CMakeLists.txt @@ -30,28 +30,20 @@ intel_config_flag_apply_settings(CompilerOptions CMAKE_C_FLAGS ALL_PATTERN "" # ========================================= Source/Header files ======================================== set(__CLDNN_Label__api "api") -file(GLOB __CLDNN_Headers__api +file(GLOB_RECURSE __CLDNN_Headers__api "${CLDNN__API_DIR}/*.hpp" ) -set(__CLDNN_Directory__api__cpp "${CLDNN__API_DIR}/CPP") -set(__CLDNN_Label__api__cpp "${__CLDNN_Label__api}\\CPP") -file(GLOB __CLDNN_Headers__api__cpp - "${__CLDNN_Directory__api__cpp}/*.hpp" - ) - set(__CLDNN_Directory__test_builds "${CLDNN__CODEGEN_DIR}/test_builds") set(__CLDNN_CGDirectory__test_builds "${CLDNN__CODEGEN_BASEDIR}/test_builds") set(__CLDNN_Label__test_builds "test builds\\codegen") -set(__CLDNN_File__test_builds__api__cpp "api_cpp_test.cpp") +set(__CLDNN_File__test_builds__api "api_cpp_test.cpp") set(__CLDNN_Sources__test_builds - "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__cpp}" + "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api}" ) set(__CLDNN_AllSources ${__CLDNN_Headers__api} - ${__CLDNN_Headers__api__cpp} - ${__CLDNN_Headers__api__c} ${__CLDNN_Sources__test_builds} ) @@ -63,9 +55,9 @@ set_property(SOURCE ${__CLDNN_Sources__test_builds} PROPERTY GENERATED TRUE) file(MAKE_DIRECTORY "${__CLDNN_CGDirectory__test_builds}") # C++ API testing (C++11 compatibility). -set(__CLDNN_CGFile__api__cpp "${__CLDNN_CGDirectory__test_builds}/${__CLDNN_File__test_builds__api__cpp}") +set(__CLDNN_CGFile__api__cpp "${__CLDNN_CGDirectory__test_builds}/${__CLDNN_File__test_builds__api}") file(WRITE "${__CLDNN_CGFile__api__cpp}" "// This file is auto-generated. Please, do not modify it directly.\n\n") -foreach(__CLDNN_Header ${__CLDNN_Headers__api} ${__CLDNN_Headers__api__cpp}) +foreach(__CLDNN_Header ${__CLDNN_Headers__api}) string(REPLACE ";" "\;" __CLDNN_Header "${__CLDNN_Header}") # [WA#1] Must escape ; again if occurred in item. file(APPEND "${__CLDNN_CGFile__api__cpp}" "#include \"${__CLDNN_Header}\"\n") endforeach() @@ -74,21 +66,17 @@ file(APPEND "${__CLDNN_CGFile__api__cpp}" "\n") # =============================================== Filters ============================================== source_group("${__CLDNN_Label__api}" FILES ${__CLDNN_Headers__api}) -source_group("${__CLDNN_Label__api__cpp}" FILES ${__CLDNN_Headers__api__cpp}) source_group("${__CLDNN_Label__test_builds}" FILES ${__CLDNN_Sources__test_builds}) # ===================================== Include/Link directories ======================================= -include_directories( - "${CLDNN__MAIN_DIR}" - ) +include_directories("${CLDNN__MAIN_DIR}") # =================================== Link targets and dependencies ==================================== # Test static library. -add_library("${CLDNN_BUILD__PROJ}" STATIC - ${__CLDNN_AllSources} - ) +add_library("${CLDNN_BUILD__PROJ}" STATIC ${__CLDNN_AllSources}) + set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}") set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY ARCHIVE_OUTPUT_DIRECTORY "${CLDNN_BUILD__PROJ_OUTPUT_DIR}") set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME "${CLDNN_BUILD__PROJ_OUTPUT_NAME}") @@ -96,16 +84,14 @@ set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME "${ target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES}) -add_dependencies("${CLDNN_BUILD__PROJ__clDNN}" - "${CLDNN_BUILD__PROJ}" - ) +add_dependencies("${CLDNN_BUILD__PROJ__clDNN}" "${CLDNN_BUILD__PROJ}") # =================================== Custom pre- and post-steps ======================================= -add_custom_command(OUTPUT "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__cpp}" - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__CLDNN_CGFile__api__cpp}" "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api__cpp}" +add_custom_command(OUTPUT "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api}" + COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__CLDNN_CGFile__api__cpp}" "${__CLDNN_Directory__test_builds}/${__CLDNN_File__test_builds__api}" DEPENDS "${__CLDNN_CGFile__api__cpp}" ${__CLDNN_Headers__api} ${__CLDNN_Headers__api__cpp} - COMMENT "Updating file if the file changed (${__CLDNN_File__test_builds__api__cpp}) ..." + COMMENT "Updating file if the file changed (${__CLDNN_File__test_builds__api}) ..." ) # ====================================================================================================== diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt b/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt index da5dd9b2333..2f8271441bd 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt +++ b/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt @@ -124,6 +124,7 @@ source_group("${__CLDNN_Label__cg_cache}" FILES ${__CLDNN_Sources__cg_ca # ===================================== Include/Link directories ======================================= include_directories( + "${CLDNN__MAIN_DIR}" "${__CLDNN_Directory__main}" "${__CLDNN_Directory__core}" "${__CLDNN_Directory__core}/common" diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h index 19dbc972aba..f9c9b622e48 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h @@ -385,6 +385,8 @@ public: return differ; } + + virtual ~TensorBase() = default; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp index 0b31d6ce496..2a24a9f18df 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp @@ -81,7 +81,7 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); if (!newParams.inputActivationParams.empty()) { - kernel.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0}); } return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp index ab4e2068371..734dd1c9ebb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.cpp @@ -93,7 +93,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); if (orgParams.outputs_num == 2) { - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); } return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp index 6747a78d418..d7a7ba3ec37 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_base.cpp @@ -137,7 +137,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para uint32_t fused_deps_total = 0; for (auto& fused_dep : newParams.fused_ops) { for (int i = 0; i < static_cast(fused_dep.dep_size); i++) { - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT_OF_FUSED_PRIMITIVE, fused_deps_total}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT_OF_FUSED_PRIMITIVE, fused_deps_total}); fused_deps_total++; } } @@ -153,7 +153,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para !newParams.bias.empty(), 1, fused_deps_total); - kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); kd.autoTuneIndex = autoTuneIndex; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp index 649afada999..f1c20f88767 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.cpp @@ -111,17 +111,17 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params, auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i }); - kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i }); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); ScalarDescriptor s; s.t = ScalarDescriptor::Types::UINT32; s.v.u32 = lastOffset; - kernel.scalars.push_back(s); - kernel.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); + kernel.params.scalars.push_back(s); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp index 07912f47634..26397613b41 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_fs_b_yx_fsv32.cpp @@ -108,17 +108,17 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, optParams); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i}); - kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, (uint32_t)i}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); ScalarDescriptor s; s.t = ScalarDescriptor::Types::UINT32; s.v.u32 = lastOffset; - kernel.scalars.push_back(s); - kernel.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); + kernel.params.scalars.push_back(s); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SCALAR, 0}); lastOffset += (uint32_t)input.GetDims()[concatChannelIndex].v; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp index d9f630b9a57..618043fb52e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.cpp @@ -100,9 +100,9 @@ KernelsData ConcatenationKernelRef::GetKernelsData(const Params& params, const o auto& kernel = kd[0].kernels[i]; // to avoid cases when we execute with local work sizes 1x1x1 - if (kernel.workGroups.local[0] == 1 && kernel.workGroups.global[1] != 1) { - kernel.workGroups.global[1] = Align(kernel.workGroups.global[1], 32); - kernel.workGroups.local[1] = 32; + if (kernel.params.workGroups.local[0] == 1 && kernel.params.workGroups.global[1] != 1) { + kernel.params.workGroups.global[1] = Align(kernel.params.workGroups.global[1], 32); + kernel.params.workGroups.local[1] = 32; } } } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp index bcc7bbd29e7..a5674f56f55 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp @@ -230,24 +230,24 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params, 1); if (newParams.deformable_mode) { - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); } if (!newParams.weights_zero_points.empty()) - kernel.arguments.push_back({ArgumentDescriptor::Types::WEIGHTS_ZERO_POINTS, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::WEIGHTS_ZERO_POINTS, 1}); if (!newParams.activations_zero_points.empty()) - kernel.arguments.push_back({ArgumentDescriptor::Types::ACTIVATIONS_ZERO_POINTS, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::ACTIVATIONS_ZERO_POINTS, 1}); if (!newParams.compensation.empty()) - kernel.arguments.push_back({ArgumentDescriptor::Types::COMPENSATION, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::COMPENSATION, 1}); uint32_t fused_deps_total = 0; for (auto& fused_dep : newParams.fused_ops) { for (int i = 0; i < static_cast(fused_dep.dep_size); i++) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT_OF_FUSED_PRIMITIVE, fused_deps_total }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT_OF_FUSED_PRIMITIVE, fused_deps_total }); fused_deps_total++; } } - kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); kd.autoTuneIndex = autoTuneIndex; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp index db690ef39a4..664ed70fabc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.cpp @@ -73,7 +73,7 @@ KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& param GetFusedPrimitiveInputsCount(params)); if (orgParams.outputs_num == 2) { - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); } return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp index 085ce54e7a0..87d2a3cedcc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp @@ -44,9 +44,9 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, dispatchData.stage_1, params.engineInfo, kernelName, jit, entry_point); - kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.clear(); // Clear original output argument + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); kd.internalBufferSizes.push_back(newParams.output.PhysicalSizeInBytes()); } { @@ -59,9 +59,9 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param FillCLKernelData(kernel, dispatchData.stage_final, params.engineInfo, kernelName, jit, entry_point); - kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.params.arguments.clear(); // Clear original output argument + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); } kd.internalBufferDataType = Datatype::F32; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp index 5ec4e19da1e..383427b6075 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp @@ -137,7 +137,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const !newParams.bias.empty(), 1, GetFusedPrimitiveInputsCount(params)); - kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp index afaa202ffe0..73b404f89e9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp @@ -334,11 +334,11 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co auto& kernel = kd.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, GetFusedPrimitiveInputsCount(params)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp index 3f136eaf35b..451145b6326 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp @@ -47,11 +47,11 @@ KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, con auto& kernel = kd.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, GetFusedPrimitiveInputsCount(params)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp index 6a40df57a68..370efc6f969 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp @@ -621,11 +621,11 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const auto& kernel = kd.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, GetFusedPrimitiveInputsCount(params)); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp index f8b43ea01a9..91975fde6f1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_b_yx_fsv32.cpp @@ -97,11 +97,12 @@ KernelsData EltwiseKernel_fs_b_yx_fsv32::GetKernelsData(const Params& params, co size_t x = input.X().v; size_t global_size = featuresRoundedUp * batches * x * y; - kernel.workGroups.global = {std::max(global_size / 8, (size_t)1), 1, 1}; - kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global, params.engineInfo); + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); + kernel.params.workGroups.global = {std::max(global_size / 8, (size_t)1), 1, 1}; + kernel.params.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.params.workGroups.global, params.engineInfo); + + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp index 288453139cc..bf2ee107612 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_mixed_byxf_and_fs_b_yx_fsv32.cpp @@ -131,12 +131,12 @@ KernelsData EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32::GetKernelsData(const Par // in fs_b_yx_fsv32 format we will process 2 features per work item, so reads/writes are done in full writes for // fp16 - kernel.workGroups.global = {x, y, (featuresRoundedUp * batches) / 2}; + kernel.params.workGroups.global = {x, y, (featuresRoundedUp * batches) / 2}; - kernel.workGroups.local = {1, 1, 16}; + kernel.params.workGroups.local = {1, 1, 16}; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp index 05910383aae..a9470fbb0b7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp @@ -105,10 +105,10 @@ KernelsData EltwiseKernel_vload8::GetKernelsData(const Params& params, const opt } auto& kernel = kd.kernels[0]; - kernel.workGroups.global = {std::max(newParams.inputs[0].LogicalSize() / 8, (size_t)1), 1, 1}; - kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global, params.engineInfo); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); + kernel.params.workGroups.global = {std::max(newParams.inputs[0].LogicalSize() / 8, (size_t)1), 1, 1}; + kernel.params.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.params.workGroups.global, params.engineInfo); + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp index ee6087e5417..a1d86baf9a2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp @@ -282,12 +282,12 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p true, !newParams.bias.empty(), 1); - kernel.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SPLIT, 0}); // eltwise's second input if (newParams.second_input_in_output) { - kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); } else { - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); } kd.autoTuneIndex = autoTuneIndex; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp index 3da6f44337b..fa523d35ef9 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.cpp @@ -1,18 +1,6 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ #include "gather_nd_kernel_ref.h" #include "kernel_selector_utils.h" diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.h index 82fa0913413..ce449e229cb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_ref.h @@ -1,18 +1,6 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ #pragma once diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.cpp index 82c938d08c9..306f2b1a730 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.cpp @@ -1,18 +1,6 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ #include "gather_nd_kernel_selector.h" #include "gather_nd_kernel_ref.h" diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.h index e4d2ca1edb4..e97add6ee36 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_nd_kernel_selector.h @@ -1,18 +1,6 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ #pragma once diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp index d91ff0926bb..587832da828 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp @@ -80,12 +80,12 @@ KernelsData LSTMEltKernelBase::GetCommonKernelsData(const Params& params, const auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - kernel.workGroups.global = {out.X().v, out.Batch().v, 1}; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.params.workGroups.global = {out.X().v, out.Batch().v, 1}; + kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); if (orgParams.has_cell) { - kernel.arguments.push_back({ArgumentDescriptor::Types::CELL, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::CELL, 0}); } return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp index b7b4d4b0113..e4b547deacb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp @@ -50,17 +50,17 @@ KernelsData LSTMGemmKernelBase::GetCommonKernelsData(const Params& params, const auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options); auto jit = CreateJit(kernelName, cldnnJit, entryPoint); - kernel.workGroups.global = {out.X().v, out.Batch().v, 1}; - kernel.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::WEIGHTS, 0}); + kernel.params.workGroups.global = {out.X().v, out.Batch().v, 1}; + kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::OUTPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::WEIGHTS, 0}); if (orgParams.hasHidden) { - kernel.arguments.push_back({ArgumentDescriptor::Types::HIDDEN, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::RECURRENT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::HIDDEN, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::RECURRENT, 0}); } if (orgParams.hasBias) { - kernel.arguments.push_back({ArgumentDescriptor::Types::BIAS, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::BIAS, 0}); } return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_simd16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_simd16.cpp index b5c2d1a482a..f94f1c6bb41 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_simd16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_simd16.cpp @@ -36,7 +36,7 @@ KernelsData LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16::GetKernelsData(const Par const auto& out = orgParams.output; if ((input.Batch().v == 1) && (input.X().v >= 64) && (input.Y().v == 1)) - kernel.workGroups.global = {16, out.X().v, out.Batch().v}; + kernel.params.workGroups.global = {16, out.X().v, out.Batch().v}; return kernelsData; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_simd16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_simd16.cpp index 437fc621fbc..437dc458c26 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_simd16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_simd16.cpp @@ -36,7 +36,7 @@ KernelsData LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16::GetKernelsData(const Par const auto& out = orgParams.output; if ((input.Batch().v == 1) && (input.X().v >= 64) && (input.Y().v == 1)) - kernel.workGroups.global = {16, out.X().v, out.Batch().v}; + kernel.params.workGroups.global = {16, out.X().v, out.Batch().v}; return kernelsData; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp index 16271d23130..dfd13d8ce5b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_bfyx_opt.cpp @@ -89,9 +89,9 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params, auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = kd.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); SetKernelArguments(dlstm_params, kernel); return { kd }; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp index 8066bf61831..2a1491fb41b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_input_kernel_base.cpp @@ -36,12 +36,12 @@ LSTM_DynamicInputKernelBase::DispatchData LSTM_DynamicInputKernelBase::SetDefaul } void kernel_selector::LSTM_DynamicInputKernelBase::SetKernelArguments(const lstm_dynamic_input_params& params, clKernelData& kernel) const { - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::WEIGHTS, 0 }); if (!params.bias.empty()) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::BIAS, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::BIAS, 0 }); } } @@ -61,8 +61,8 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); + kernel.params.workGroups.global = dispatchData.gws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); SetKernelArguments(orgParams, kernel); return {k_data}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp index a39c724c927..8327a025c8e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm_dynamic/lstm_dynamic_timeloop_kernel_base.cpp @@ -87,21 +87,21 @@ LSTM_DynamicTimeloopKernelBase::DispatchData LSTM_DynamicTimeloopKernelBase::Set void kernel_selector::LSTM_DynamicTimeloopKernelBase::SetKernelArguments(const lstm_dynamic_timeloop_params& params, clKernelData& kernel) const { uint32_t input_idx = 0; - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::RECURRENT, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::RECURRENT, 0 }); if (params.has_hidden) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::HIDDEN, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::HIDDEN, 0 }); } if (params.has_cell) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::CELL, 0 }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::CELL, 0 }); } if (params.has_last_hidden_output) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); } if (params.has_last_cell_output) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); + kernel.params.arguments.push_back({ ArgumentDescriptor::Types::INPUT, input_idx++ }); } } @@ -122,9 +122,9 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto& kernel = k_data.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo); SetKernelArguments(org_params, kernel); return {k_data}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp index b34c674a755..5534217a9ce 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_base.cpp @@ -70,7 +70,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params, auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp index 670f831594d..32fd8e5744c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp @@ -200,9 +200,9 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par false, 0, 0); - kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.clear(); // Clear original output argument + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * dispatchData.item_groups * intermidiate_bytes); } @@ -224,9 +224,9 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par false, 0, 0); - kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kernel.params.arguments.clear(); // Clear original output argument + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes); } @@ -248,10 +248,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par false, 0, 0); - kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.clear(); // Clear original output argument + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); } if (params.mvnNormalizeVariance) { // Variance second stage @@ -271,9 +271,9 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par false, 0, 0); - kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + kernel.params.arguments.clear(); // Clear original output argument + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes); } @@ -296,9 +296,9 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par false, 1, GetFusedPrimitiveInputsCount(params)); - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); if (params.mvnNormalizeVariance) { - kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); } } kd.internalBufferDataType = Datatype::F32; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp index 2e36b028e28..a5c6dde629d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_base.cpp @@ -72,7 +72,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params, 1, GetFusedPrimitiveInputsCount(params)); - kernel.arguments.push_back({ArgumentDescriptor::Types::SCALE_TABLE, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::SCALE_TABLE, 0}); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp index eb4313f39bc..eb42f283703 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_base.cpp @@ -182,7 +182,7 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params, FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); if (orgParams.poolType == PoolType::MAX_WITH_ARGMAX) - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp index 457e90ba97e..b6a6a6a71db 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/quantize/quantize_kernel_base.cpp @@ -68,10 +68,10 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio auto& kernel = kd.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc(static_cast(newParams.inputs.size()), false, false); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); + kernel.params.arguments = GetArgsDesc(static_cast(newParams.inputs.size()), false, false); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_biplanar_nv12.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_biplanar_nv12.cpp index e88e3bb1019..2e8c89238eb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_biplanar_nv12.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_biplanar_nv12.cpp @@ -33,7 +33,7 @@ KernelsData reorder_biplanar_nv12::GetKernelsData(const Params& params, const op return {}; } KernelsData kd = GetCommonKernelsData(orgParams, options); - kd[0].kernels[0].arguments = GetArgsDesc(2, false, false); + kd[0].kernels[0].params.arguments = GetArgsDesc(2, false, false); return kd; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp index cf227d83504..9629aa2dbc1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp @@ -206,7 +206,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); - kernel.arguments = GetArgsDesc(1, false, false); + kernel.params.arguments = GetArgsDesc(1, false, false); return {kd}; } @@ -230,9 +230,9 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); - kernel.arguments = GetArgsDesc(1, false, false); + kernel.params.arguments = GetArgsDesc(1, false, false); if (newParams.mode == MeanSubtractMode::IN_BUFFER) { - kernel.arguments.push_back({ArgumentDescriptor::Types::BIAS, 0}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::BIAS, 0}); } return {kd}; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp index 0a76fed62c7..401ffc73d9c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp @@ -56,10 +56,10 @@ KernelsData ReshapeKernelRef::GetKernelsData(const Params& params, const optiona gws2 *= in_dims[i].v; } - kernel.workGroups.global = {gws0, gws1, gws2}; - kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global, params.engineInfo); - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc(1, false, false); + kernel.params.workGroups.global = {gws0, gws1, gws2}; + kernel.params.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.params.workGroups.global, params.engineInfo); + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); + kernel.params.arguments = GetArgsDesc(1, false, false); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp index 147c2551b28..271929559c4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp @@ -55,9 +55,9 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params, auto& kernel = kd.kernels[0]; FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 1}); if (orgParams.mode == PoolType::DEFORMABLE_BILINEAR && !orgParams.no_trans) - kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); + kernel.params.arguments.push_back({ArgumentDescriptor::Types::INPUT, 2}); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp index b827527d22a..4228d9fabfb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp @@ -131,11 +131,11 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o auto& kernel = kd.kernels[0]; - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); - kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); + kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT); + kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); return {kd}; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp index bdef07b6f29..0c39d4e6923 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp @@ -315,9 +315,11 @@ void AutoTuner::RemoveKernel(const std::string& cacheFilePath, } } -std::tuple AutoTuner::LoadKernelOffline(std::shared_ptr deviceCache, +std::tuple AutoTuner::LoadKernelOffline(TuningCache* deviceCache, const Params& params) { static const uint32_t defaultComputeUnits = 24; + if (!deviceCache) + return {}; auto result = deviceCache->LoadKernel(params, false); if (std::get<0>(result).empty() && params.engineInfo.computeUnitsCount != defaultComputeUnits) { result = deviceCache->LoadKernel(params, defaultComputeUnits); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h index 709f2955b6e..22284622c3b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h @@ -74,7 +74,7 @@ public: const int tuneIndex); void RemoveKernel(const std::string& cacheFilePath, const Params& params); - std::tuple LoadKernelOffline(std::shared_ptr cache, + std::tuple LoadKernelOffline(TuningCache* cache, const Params& params); private: diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_nd_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_nd_ref.cl index 91cb7d9be77..d59d93fec17 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_nd_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_nd_ref.cl @@ -1,16 +1,6 @@ -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. #include "include/fetch.cl" diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/max_unpooling_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/max_unpooling_gpu_ref.cl index 94e77fa113f..d8ad1abfd7a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/max_unpooling_gpu_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/max_unpooling_gpu_ref.cl @@ -4,7 +4,7 @@ #include "include/include_all.cl" -KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output, const __global float* arg_max) +KERNEL(pooling_gpu)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, const __global float* arg_max) { #if OUTPUT_LAYOUT_BFYX || OUTPUT_LAYOUT_BYXF const uint x = (uint)get_global_id(0); @@ -12,7 +12,7 @@ KERNEL(pooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output, const uint bf = (uint)get_global_id(2); const uint f = bf % INPUT0_FEATURE_NUM; const uint b = bf / INPUT0_FEATURE_NUM; - + if (x >= INPUT0_SIZE_X) { return; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_base_opencl.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_base_opencl.cpp index 392d0ed2bc2..aae0e195279 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_base_opencl.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_base_opencl.cpp @@ -182,9 +182,9 @@ void KernelBaseOpenCL::FillCLKernelData(clKernelData& kernel, int number_of_inputs, uint32_t number_of_inputs_for_fused_prims) const { KernelBase::CheckDispatchData(kernelMapName, dispatchData); - kernel.workGroups.global = dispatchData.gws; - kernel.workGroups.local = dispatchData.lws; - kernel.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode); - kernel.arguments = GetArgsDesc(number_of_inputs, weights, bias, number_of_inputs_for_fused_prims); + kernel.code.kernelString = GetKernelString(kernelMapName, jit, entryPoint, engine_info, exeMode); + kernel.params.workGroups.global = dispatchData.gws; + kernel.params.workGroups.local = dispatchData.lws; + kernel.params.arguments = GetArgsDesc(number_of_inputs, weights, bias, number_of_inputs_for_fused_prims); } } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.cpp similarity index 65% rename from inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.cpp rename to inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.cpp index 0d0b310a179..5a61c3c3131 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.cpp @@ -3,13 +3,13 @@ // #include "device_cache_reader.h" -#include "include/to_string_utils.h" #include "auto_tuner.h" #include #include "istreamwrapper.h" #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN +#define NOMINMAX #include #include #include @@ -25,17 +25,15 @@ #include #include -namespace cldnn { -namespace gpu { -namespace { +namespace kernel_selector { -std::shared_ptr get_cache_from_file(std::string tuning_cache_path) { +std::shared_ptr CreateTuningCacheFromFile(std::string tuning_cache_path) { if (tuning_cache_path.compare("cache.json") == 0) { #ifdef _WIN32 char path[MAX_PATH]; HMODULE hm = NULL; GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (LPCSTR)&get_cache_from_file, + (LPCSTR)&CreateTuningCacheFromFile, &hm); GetModuleFileName(hm, path, sizeof(path)); std::string bin_path(path); @@ -51,19 +49,4 @@ std::shared_ptr get_cache_from_file(std::string tu return std::make_shared(tuning_cache_path, false); } -} // namespace - -device_cache_reader::device_cache_reader(const std::string tuning_file_path) { - { - try { - _dev_cache = get_cache_from_file(tuning_file_path); - } - catch (...) { - std::cout << "[WARNING] error during parsing cache file, tuning data won't be used" << std::endl; - _dev_cache = std::make_shared(); - } - } -} - -} // namespace gpu -} // namespace cldnn +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.h new file mode 100644 index 00000000000..9621296ddae --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/device_cache_reader.h @@ -0,0 +1,14 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include +#include + +namespace kernel_selector { +class TuningCache; + +std::shared_ptr CreateTuningCacheFromFile(std::string tuning_cache_path); + +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp index 4d4a06b53fe..2105b043658 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp @@ -13,6 +13,7 @@ #include #include #include +#include // #define ENABLE_ENV // #define ENABLE_ENV_PRINT @@ -105,7 +106,7 @@ KernelsData kernel_selector_base::GetNaiveBestKernel(const Params& params, // TODO: find a better place to located this assignment if (kernelsData.size()) { kernelsData[0].kernelName = kernelName; - kernelsData[0].kernels[0].layerID = params.layerID; + kernelsData[0].kernels[0].params.layerID = params.layerID; } return kernelsData; @@ -123,7 +124,7 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, std::tuple cachedKernelConfig; if (options.tuningParams.mode == TuningMode::TUNING_DISABLED && !int8_kernel) { // Try to load kernel/config from offline cache #if ENABLE_OFFLINE_TUNING_CACHE - cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache, params); + cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache.get(), params); #else return GetNaiveBestKernel(params, options, kType); #endif @@ -145,7 +146,7 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, if (kds.size() && kds[0].kernels.size()) { kernelsData = kds; kernelsData[0].kernelName = cachedkernelName; - kernelsData[0].kernels[0].layerID = params.layerID; + kernelsData[0].kernels[0].params.layerID = params.layerID; } break; } @@ -218,7 +219,7 @@ KernelsData kernel_selector_base::GetAutoTuneBestKernel(const Params& params, if (kernelsData.size()) { kernelsData[0].kernelName = kernelName; - kernelsData[0].kernels[0].layerID = params.layerID; + kernelsData[0].kernels[0].params.layerID = params.layerID; autoTuner.StoreKernel(options.tuningParams.cacheFilePath, params, kernelName, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h index 5a0d62336cb..d0b0054f1b2 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h @@ -5,6 +5,7 @@ #pragma once #include "kernel_selector_params.h" +#include "cldnn/runtime/kernel_args.hpp" #include #include @@ -41,118 +42,28 @@ namespace kernel_selector { std::string GetStringEnv(const char* varName); -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// KernelString -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct KernelString { - std::string str; - std::string jit; - std::string undefs; - std::string options; - std::string entry_point; - bool batch_compilation; +using KernelString = cldnn::kernel_string; +using WorkGroupSizes = cldnn::work_group_sizes; +using ScalarDescriptor = cldnn::scalar_desc; +using Scalars = cldnn::scalars_desc; +using ArgumentDescriptor = cldnn::argument_desc; +using Arguments = cldnn::arguments_desc; +using KernelParams = cldnn::kernel_arguments_desc; - KernelString() : str(""), jit(""), undefs(""), options(""), entry_point(""), batch_compilation(false) {} - std::string get_hash() { return str + jit + undefs + options + entry_point; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// KernelCode +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +struct KernelCode { + std::shared_ptr kernelString; }; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// WorkGroupSizes -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct WorkGroupSizes { - std::vector global; - std::vector local; -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Scalar -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct ScalarDescriptor { - union ValueT { - uint8_t u8; - uint16_t u16; - uint32_t u32; - uint64_t u64; - int8_t s8; - int16_t s16; - int32_t s32; - int64_t s64; - float f32; - double f64; - }; - - enum class Types { - UINT8, - UINT16, - UINT32, - UINT64, - INT8, - INT16, - INT32, - INT64, - FLOAT32, - FLOAT64, - }; - - Types t; - ValueT v; -}; - -using Scalars = std::vector; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ArgumentDescpirtor -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct ArgumentDescriptor { - enum class Types { - INPUT, - OUTPUT, - WEIGHTS, - BIAS, - SCALE_TABLE, - SLOPE, - SPLIT, - INTERNAL_BUFFER, - SCALAR, - RECURRENT, // RNN/LSTM/GRU recurrent weights - HIDDEN, // RNN/LSTM/GRU hidden input - CELL, // LSTM cell input - LSTM_PACK, // LSTM packed output - WEIGHTS_ZERO_POINTS, - ACTIVATIONS_ZERO_POINTS, - COMPENSATION, - INPUT_OF_FUSED_PRIMITIVE - }; - - enum class ScalarTypes { - UINT8, - UINT16, - UINT32, - UINT64, - INT8, - INT16, - INT32, - INT64, - FLOAT32, - FLOAT64, - }; - - Types t; - uint32_t index; -}; - -using Arguments = std::vector; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // clKernelData //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct clKernelData { - std::shared_ptr kernelString; - WorkGroupSizes workGroups; - Arguments arguments; - Scalars scalars; - std::string layerID; // TODO: in order to support run single layer. think about more appropriate place + KernelCode code; + KernelParams params; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/runtime/CMakeLists.txt b/inference-engine/thirdparty/clDNN/runtime/CMakeLists.txt new file mode 100644 index 00000000000..1089d3f9bad --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/CMakeLists.txt @@ -0,0 +1,79 @@ +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# =================================== Name / Output settings from parent =============================== + +set(CLDNN_BUILD__PROJ "cldnn_runtime") +set(CLDNN_BUILD__PROJ_LABEL "${CLDNN_BUILD__PROJ}") +set(CLDNN_BUILD__PROJ_OUTPUT_NAME "${CLDNN_BUILD__PROJ}${CLDNN__OUT_CPU_SUFFIX}") + +# ========================================== Compiler options ========================================== + +intel_config_flag_apply_settings( + CompilerOptions + CMAKE_CXX_FLAGS + ALL_PATTERN "" + SET + RttiEnabled + ) + +# ========================================= Source/Header files ======================================== + +set(__CLDNN_Directory__main "${CMAKE_CURRENT_SOURCE_DIR}") +set(__CLDNN_Label__main "") +file(GLOB __CLDNN_Sources__main + "${__CLDNN_Directory__main}/*.h" + "${__CLDNN_Directory__main}/*.hpp" + "${__CLDNN_Directory__main}/*.cpp" + ) + +file(GLOB __CLDNN_Sources__ocl + "${__CLDNN_Directory__main}/ocl/*.h" + "${__CLDNN_Directory__main}/ocl/*.hpp" + "${__CLDNN_Directory__main}/ocl/*.cpp" +) + +set(__CLDNN_AllSources + ${__CLDNN_Sources__main} + ${__CLDNN_Sources__ocl} + ) + +# =============================================== Filters ============================================== + +source_group("${__CLDNN_Label__main}" FILES ${__CLDNN_Sources__main}) + +# ===================================== Include/Link directories ======================================= + +include_directories( + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${__CLDNN_Directory__main}" + ) + +# =================================== Link targets and dependencies ==================================== + +# Main static library. +add_library("${CLDNN_BUILD__PROJ}" STATIC + ${__CLDNN_AllSources} + ) + +set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}") +set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME "${CLDNN_BUILD__PROJ_OUTPUT_NAME}") + +if(COMMAND set_ie_threading_interface_for) + set_ie_threading_interface_for("${CLDNN_BUILD__PROJ}") +endif() + +target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE + clDNN_OpenCL + openvino::itt + ) + +if(WIN32) + target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE setupapi) +elseif((NOT ANDROID) AND (UNIX)) + target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE pthread) +endif() +target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE ${CLDNN__SYSTEM_LINK_LIBRARIES}) + +# ====================================================================================================== diff --git a/inference-engine/thirdparty/clDNN/src/include/cldnn_itt.h b/inference-engine/thirdparty/clDNN/runtime/cldnn_itt.hpp similarity index 92% rename from inference-engine/thirdparty/clDNN/src/include/cldnn_itt.h rename to inference-engine/thirdparty/clDNN/runtime/cldnn_itt.hpp index b28db96f295..e3ab2d3ddb4 100644 --- a/inference-engine/thirdparty/clDNN/src/include/cldnn_itt.h +++ b/inference-engine/thirdparty/clDNN/runtime/cldnn_itt.hpp @@ -4,7 +4,7 @@ /** * @brief Defines openvino domains for tracing - * @file cldnn_itt.h + * @file cldnn_itt.hpp */ #pragma once diff --git a/inference-engine/thirdparty/clDNN/runtime/device_query.cpp b/inference-engine/thirdparty/clDNN/runtime/device_query.cpp new file mode 100644 index 00000000000..46f2de82c8c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/device_query.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn/runtime/device_query.hpp" +#include "ocl/ocl_device_detector.hpp" + +#include +#include + +namespace cldnn { + +device_query::device_query(engine_types engine_type, runtime_types runtime_type, void* user_context, void* user_device) { + switch (engine_type) { + case engine_types::ocl: { + if (runtime_type != runtime_types::ocl) + throw std::runtime_error("Unsupported runtime type for ocl engine"); + + ocl::ocl_device_detector ocl_detector; + _available_devices = ocl_detector.get_available_devices(user_context, user_device); + break; + } + default: throw std::runtime_error("Unsupported engine type in device_query"); + } + + if (_available_devices.empty()) { + throw std::runtime_error("No suitable devices found for requested engine and runtime types"); + } +} +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/engine.cpp b/inference-engine/thirdparty/clDNN/runtime/engine.cpp new file mode 100644 index 00000000000..561f38f00f4 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/stream.hpp" +#include "cldnn/runtime/device_query.hpp" + +#include "ocl/ocl_engine_factory.hpp" + +#include +#include +#include +#include +#include + +namespace cldnn { + +engine::engine(const device::ptr device, const engine_configuration& configuration) +: _memory_pool(new memory_pool(*this)) +, _device(device) +, _configuration(configuration) {} + +device_info engine::get_device_info() const { + return _device->get_info(); +} + +const device::ptr engine::get_device() const { + return _device; +} + +bool engine::use_unified_shared_memory() const { + if (_device->get_mem_caps().supports_usm() && _configuration.use_unified_shared_memory) { + return true; + } + return false; +} + +bool engine::supports_allocation(allocation_type type) const { + if (memory_capabilities::is_usm_type(type) && !use_unified_shared_memory()) + return false; + if (allocation_type::usm_shared == type) + return false; + return _device->get_mem_caps().support_allocation_type(type); +} + +allocation_type engine::get_lockable_preffered_memory_allocation_type(bool is_image_layout) const { + if (!use_unified_shared_memory() || is_image_layout) + return get_default_allocation_type(); + + /* + We do not check device allocation here. + Device allocation is reserved for buffers of hidden layers. + Const buffers are propagated to device if possible. + */ + + bool support_usm_host = supports_allocation(allocation_type::usm_host); + bool support_usm_shared = supports_allocation(allocation_type::usm_shared); + + if (support_usm_shared) + return allocation_type::usm_shared; + if (support_usm_host) + return allocation_type::usm_host; + + throw std::runtime_error("[clDNN internal error] Could not find proper allocation type!"); +} + +memory::ptr engine::get_memory_from_pool(const layout& layout, + primitive_id id, + uint32_t network_id, + std::set dependencies, + allocation_type type, + bool reusable) { + if (_configuration.use_memory_pool) + return _memory_pool->get_memory(layout, id, network_id, dependencies, type, reusable); + return _memory_pool->get_memory(layout, type); +} + +memory::ptr engine::attach_memory(const layout& layout, void* ptr) { + return std::make_shared(layout, ptr); +} + +memory::ptr engine::allocate_memory(const layout& layout, bool reset) { + allocation_type type = get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d()); + return allocate_memory(layout, type, reset); +} + +memory_ptr engine::share_buffer(const layout& layout, shared_handle buf) { + shared_mem_params params = { shared_mem_type::shared_mem_buffer, nullptr, nullptr, buf, +#ifdef _WIN32 + nullptr, +#else + 0, +#endif + 0 }; + return reinterpret_handle(layout, params); +} + +memory::ptr engine::share_image(const layout& layout, shared_handle img) { + shared_mem_params params = { shared_mem_type::shared_mem_image, nullptr, nullptr, img, +#ifdef _WIN32 + nullptr, +#else + 0, +#endif + 0 }; + return reinterpret_handle(layout, params); +} + +#ifdef _WIN32 +memory_ptr engine::share_surface(const layout& layout, shared_handle surf, uint32_t plane) { + shared_mem_params params = { shared_mem_type::shared_mem_vasurface, nullptr, nullptr, nullptr, surf, plane }; + return reinterpret_handle(layout, params); +} + +memory_ptr engine::share_dx_buffer(const layout& layout, shared_handle res) { + shared_mem_params params = { shared_mem_type::shared_mem_dxbuffer, nullptr, nullptr, res, nullptr, 0 }; + return reinterpret_handle(layout, params); +} +#else +memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint32_t plane) { + shared_mem_params params = { shared_mem_type::shared_mem_vasurface, nullptr, nullptr, nullptr, surf, plane }; + return reinterpret_handle(layout, params); +} +#endif // _WIN32 + +memory_pool& engine::get_memory_pool() { + return *_memory_pool.get(); +} + +uint64_t engine::get_max_used_device_memory() const { + return _memory_pool->get_max_peak_device_memory_used(); +} + +uint64_t engine::get_used_device_memory() const { + return _memory_pool->get_temp_memory_used(); +} + +std::shared_ptr engine::create(engine_types engine_type, + runtime_types runtime_type, + const device::ptr device, + const engine_configuration& configuration) { + switch (engine_type) { + case engine_types::ocl: return ocl::create_ocl_engine(device, runtime_type, configuration); + default: throw std::runtime_error("Invalid engine type"); + } +} + +std::shared_ptr engine::create(engine_types engine_type, + runtime_types runtime_type, + const engine_configuration& configuration) { + device_query query(engine_type, runtime_type); + device::ptr default_device = query.get_available_devices().begin()->second; + + return engine::create(engine_type, runtime_type, default_device, configuration); +} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/event.cpp b/inference-engine/thirdparty/clDNN/runtime/event.cpp new file mode 100644 index 00000000000..b9a61138cf1 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/event.cpp @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/engine.hpp" + +#include +#include +#include + +namespace cldnn { + +void event::wait() { + if (_set) + return; + + // TODO: refactor in context of multiple simultaneous calls (for generic engine) + wait_impl(); + _set = true; + return; +} + +bool event::is_set() { + if (_set) + return true; + + // TODO: refactor in context of multiple simultaneous calls (for generic engine) + _set = is_set_impl(); + return _set; +} + +bool event::add_event_handler(event_handler handler, void* data) { + if (is_set()) { + handler(data); + return true; + } + + std::lock_guard lock(_handlers_mutex); + auto itr = _handlers.insert(_handlers.end(), {handler, data}); + auto ret = add_event_handler_impl(handler, data); + if (!ret) + _handlers.erase(itr); + + return ret; +} + +std::vector event::get_profiling_info() { + if (!_profiling_captured) { + _profiling_captured = get_profiling_info_impl(_profiling_info); + } + + std::vector result(_profiling_info.size()); + std::copy(_profiling_info.begin(), _profiling_info.end(), result.begin()); + return result; +} + +void event::call_handlers() { + std::lock_guard lock(_handlers_mutex); + for (auto& pair : _handlers) { + try { + pair.first(pair.second); + } catch (...) { + } + } + _handlers.clear(); +} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp similarity index 82% rename from inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp rename to inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp index 1451d68de5f..014486841f0 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.cpp @@ -2,9 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "kernels_cache.h" -#include "ocl_toolkit.h" +#include "kernels_factory.hpp" +#include "kernels_cache.hpp" +#include "ocl/ocl_engine.hpp" + #include #include #include @@ -13,8 +14,8 @@ #include #include #include -#include "kernel_selector_helper.h" -#include "cldnn_itt.h" + +#include "cldnn_itt.hpp" #if (CLDNN_THREADING == CLDNN_THREADING_TBB) #include #include @@ -137,10 +138,11 @@ inline bool does_options_support_batch_compilation(const std::string& options) { } // namespace namespace cldnn { -namespace gpu { + +std::mutex kernels_cache::_mutex; std::string kernels_cache::get_cache_path() const { - auto path = _context.get_configuration().kernels_cache_path; + auto path = _engine.configuration().kernels_cache_path; if (path.empty()) { return {}; } @@ -152,7 +154,7 @@ std::string kernels_cache::get_cache_path() const { } bool kernels_cache::is_cache_enabled() const { - return !_context.get_configuration().kernels_cache_path.empty(); + return !_engine.configuration().kernels_cache_path.empty(); } size_t kernels_cache::get_max_kernels_per_batch() const { @@ -171,7 +173,6 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, std::string options = code.kernel_strings->options; bool batch_compilation = code.kernel_strings->batch_compilation; bool dump_custom_program = code.dump_custom_program; - bool one_time_kernel = code.one_time_kernel; batch_compilation &= does_options_support_batch_compilation(options); @@ -189,9 +190,6 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, key += " __DUMP_CUSTOM_PROGRAM__"; // Adding label to key so it would be separated from other programs } - if (one_time_kernel) { - key += " __ONE_TIME__"; - } auto& current_bucket = program_buckets[key]; if (current_bucket.empty()) { // new bucket const auto& bucket_id = program_buckets.size() - 1; @@ -212,7 +210,6 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, auto& current_batch = current_bucket.back(); current_batch.dump_custom_program = dump_custom_program; - current_batch.one_time = one_time_kernel; current_batch.entry_point_to_id[entry_point] = code.id; assert(org_source_code.size() == 1); @@ -230,7 +227,7 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, auto options = c.first; auto& batches = c.second; for (auto& b : batches) { - std::string full_code = options + " " + _context.get_device_info().driver_version; + std::string full_code = options + " " + _engine.get_device_info().driver_version; for (auto& ss : b.source) full_code += ss; b.hash_value = std::hash()(full_code); @@ -239,20 +236,17 @@ void kernels_cache::get_program_source(const kernels_code& kernels_source_code, } } -kernels_cache::kernels_cache(gpu_toolkit& context, uint32_t prog_id) : _context(context), _prog_id(prog_id) { -} +kernels_cache::kernels_cache(engine& engine) : _engine(engine) { } -kernels_cache::kernel_id kernels_cache::set_kernel_source( - const std::shared_ptr& kernel_string, - bool dump_custom_program, - bool one_time_kernel) { - std::lock_guard lock(_context.get_cache_mutex()); +kernel_id kernels_cache::set_kernel_source( + const std::shared_ptr& kernel_string, + bool dump_custom_program) { // we need unique id in order to avoid conflict across topologies. const auto kernel_num = _kernels.size() + _kernels_code.size(); - kernels_cache::kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num); + kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num); - auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program, one_time_kernel); + auto res = _kernels_code.emplace(kernel_string, id, dump_custom_program); assert(_kernels.find(id) == _kernels.end()); if (res.second) { @@ -277,16 +271,19 @@ static std::vector getProgramBinaries(cl::Program program) { return program.getInfo().front(); } -void kernels_cache::build_batch(const batch_program& batch) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "KernelsCache::BuildProgram"); +// TODO: This build_batch method should be backend specific +void kernels_cache::build_batch(const engine& build_engine, const batch_program& batch) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "KernelsCache::build_batch"); - bool dump_sources = !_context.get_configuration().ocl_sources_dumps_dir.empty() || batch.dump_custom_program; + auto& cl_build_engine = dynamic_cast(build_engine); + + bool dump_sources = !_engine.configuration().sources_dumps_dir.empty() || batch.dump_custom_program; std::string err_log; // accumulated build log from all program's parts (only contains messages from parts which std::string current_dump_file_name = ""; if (dump_sources) { - current_dump_file_name = _context.get_configuration().ocl_sources_dumps_dir; + current_dump_file_name = _engine.configuration().sources_dumps_dir; if (!current_dump_file_name.empty() && current_dump_file_name.back() != '/') current_dump_file_name += '/'; @@ -318,10 +315,10 @@ void kernels_cache::build_batch(const batch_program& batch) { // Run compilation if (precompiled_kernels.empty()) { - cl::Program program(_context.context(), batch.source); + cl::Program program(cl_build_engine.get_cl_context(), batch.source); { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "KernelsCache::BuildProgram::RunCompilation"); - program.build(_context.device(), batch.options.c_str()); + program.build(cl_build_engine.get_cl_device(), batch.options.c_str()); } if (dump_sources && dump_file.good()) { @@ -342,23 +339,21 @@ void kernels_cache::build_batch(const batch_program& batch) { saveBinaryToFile(cached_bin_name, getProgramBinaries(program)); } } else { - cl::Program program(_context.context(), {_context.device()}, precompiled_kernels); - program.build(_context.device(), batch.options.c_str()); + cl::Program program(cl_build_engine.get_cl_context(), {cl_build_engine.get_cl_device()}, precompiled_kernels); + program.build(cl_build_engine.get_cl_device(), batch.options.c_str()); program.createKernels(&kernels); } { - std::lock_guard lock(_context.get_cache_mutex()); + std::lock_guard lock(_mutex); for (auto& k : kernels) { const auto& entry_point = k.getInfo(); const auto& k_id = batch.entry_point_to_id.find(entry_point); - const auto& k_type = kernel_type(k, _context.get_device_info().supports_usm); if (k_id != batch.entry_point_to_id.end()) { - const auto& kmap = std::make_pair(k_id->second, k_type); - if (batch.one_time) { - _one_time_kernels.insert(kmap); - } else { - _kernels.insert(kmap); - } + cl_kernel kern = k.get(); + cl_context context = cl_build_engine.get_cl_context().get(); + kernel::ptr kernel = kernels_factory::create(_engine, context, kern, entry_point); + const auto& kmap = std::make_pair(k_id->second, kernel); + _kernels.insert(kmap); } else { throw std::runtime_error("Could not find entry point"); } @@ -381,13 +376,12 @@ void kernels_cache::build_batch(const batch_program& batch) { } } -kernels_cache::kernel_type kernels_cache::get_kernel(kernel_id id, bool one_time_kernel) const { +kernel::ptr kernels_cache::get_kernel(kernel_id id) const { if (_pending_compilation) throw std::runtime_error("Kernel cache is not compiled, call build_all() first!"); - const auto& kernels = one_time_kernel ? _one_time_kernels : _kernels; - auto res = kernels.find(id); - if (kernels.end() == res) + auto res = _kernels.find(id); + if (_kernels.end() == res) throw std::runtime_error("Kernel " + id + " not found in the kernel cache!"); return res->second; } @@ -396,46 +390,50 @@ void kernels_cache::build_all() { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "KernelsCache::BuildAll"); if (!_pending_compilation) return; + + std::unique_ptr _build_engine = nullptr; + if (_engine.type() == engine_types::ocl) { + _build_engine = std::unique_ptr(new ocl::ocl_engine(_engine.get_device(), runtime_types::ocl, _engine.configuration())); + } std::vector batches; { - std::lock_guard lock(_context.get_cache_mutex()); + std::lock_guard lock(_mutex); get_program_source(_kernels_code, &batches); - _one_time_kernels.clear(); #if (CLDNN_THREADING == CLDNN_THREADING_TBB) - int n_threads = _context.get_configuration().n_threads; + int n_threads = _engine.configuration().n_threads; arena = std::unique_ptr(new tbb::task_arena()); arena->initialize(n_threads); #elif(CLDNN_THREADING == CLDNN_THREADING_THREADPOOL) - int n_threads = _context.get_configuration().n_threads; + int n_threads = _engine.configuration().n_threads; pool = std::unique_ptr(new thread_pool(n_threads)); #endif } #if (CLDNN_THREADING == CLDNN_THREADING_TBB) - arena->execute([this, &batches] { - tbb::parallel_for(tbb::blocked_range(0, batches.size()), [this, &batches](const tbb::blocked_range& r) { + arena->execute([this, &_build_engine, &batches] { + tbb::parallel_for(tbb::blocked_range(0, batches.size()), [this, &_build_engine, &batches](const tbb::blocked_range& r) { for (auto i = r.begin(); i != r.end(); ++i) { - build_batch(batches[i]); + build_batch(*_build_engine, batches[i]); } }); }); #elif(CLDNN_THREADING == CLDNN_THREADING_THREADPOOL) std::vector> builds; for (size_t i = 0; i < batches.size(); ++i) { - builds.push_back(pool->enqueue([this, &batches, i] () { - build_batch(batches[i]); + builds.push_back(pool->enqueue([this, &_build_engine, &batches, i] () { + build_batch(*_build_engine, batches[i]); })); } std::for_each(builds.begin(), builds.end(), [] (std::future& f) { f.wait(); }); #else // no parallel build for (const auto& batch : batches) { - build_batch(batch); + build_batch(*_build_engine, batch); } #endif { - std::lock_guard lock(_context.get_cache_mutex()); + std::lock_guard lock(_mutex); _kernels_code.clear(); _pending_compilation = false; #if (CLDNN_THREADING == CLDNN_THREADING_TBB) @@ -459,9 +457,8 @@ void kernels_cache::build_all() { void kernels_cache::reset() { _kernels.clear(); - _one_time_kernels.clear(); _kernels_code.clear(); _pending_compilation = false; } -} // namespace gpu + } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.hpp similarity index 70% rename from inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h rename to inference-engine/thirdparty/clDNN/runtime/kernels_cache.hpp index 7cf31598bbf..931d267e955 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.h +++ b/inference-engine/thirdparty/clDNN/runtime/kernels_cache.hpp @@ -2,8 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once + +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/kernel.hpp" + #include #include #include @@ -11,7 +14,6 @@ #include #include #include -#include #define CLDNN_THREADING_SEQ 0 #define CLDNN_THREADING_TBB 1 @@ -26,19 +28,8 @@ #include #endif -namespace cl { -class Kernel; -class KernelIntel; -} - -namespace kernel_selector { -using kernel_string = kernel_selector::KernelString; -} - namespace cldnn { -namespace gpu { -class gpu_toolkit; #if (CLDNN_THREADING == CLDNN_THREADING_THREADPOOL) class thread_pool { public: @@ -101,6 +92,7 @@ private: } }; #endif + class kernels_cache { public: using source_code = std::vector; @@ -112,24 +104,20 @@ public: uint32_t kernels_counter = 0; std::string options; bool dump_custom_program = false; - bool one_time = false; std::map entry_point_to_id; }; struct kernel_code { - std::shared_ptr kernel_strings; + std::shared_ptr kernel_strings; std::string id; bool dump_custom_program; - bool one_time_kernel; - kernel_code(const std::shared_ptr& _kernel_strings, + kernel_code(const std::shared_ptr& _kernel_strings, const std::string& _id, - bool _dump_custom_program, - bool _one_time_kernel) + bool _dump_custom_program) : kernel_strings(_kernel_strings), id(_id), - dump_custom_program(_dump_custom_program), - one_time_kernel(_one_time_kernel) {} + dump_custom_program(_dump_custom_program) {} bool operator == (const kernel_code& c2) const { return kernel_strings->get_hash() == c2.kernel_strings->get_hash(); @@ -142,44 +130,35 @@ public: } }; - typedef std::string kernel_id; - typedef cl::KernelIntel kernel_type; - using kernels_map = std::map; using kernels_code = std::unordered_set; private: - gpu_toolkit& _context; + static std::mutex _mutex; + engine& _engine; kernels_code _kernels_code; std::atomic _pending_compilation{false}; - std::map _kernels; - std::map _one_time_kernels; // These kernels are intended to be executed only once (can - // be removed later from the cache). - uint32_t _prog_id; + std::map _kernels; #if (CLDNN_THREADING == CLDNN_THREADING_TBB) std::unique_ptr arena; #elif(CLDNN_THREADING == CLDNN_THREADING_THREADPOOL) std::unique_ptr pool; #endif - void get_program_source(const kernels_code& kernels_source_code, std::vector*) const; - void build_batch(const batch_program& batch); + void build_batch(const engine& build_engine, const batch_program& batch); std::string get_cache_path() const; bool is_cache_enabled() const; size_t get_max_kernels_per_batch() const; public: - explicit kernels_cache(gpu_toolkit& context, uint32_t prog_id); - kernel_id set_kernel_source(const std::shared_ptr& kernel_string, - bool dump_custom_program, - bool one_time_kernel); - kernel_type get_kernel(kernel_id id, bool one_time_kernel) const; - gpu_toolkit& get_context() { return _context; } + explicit kernels_cache(engine& engine); + kernel_id set_kernel_source(const std::shared_ptr& kernel_string, + bool dump_custom_program); + kernel::ptr get_kernel(kernel_id id) const; // forces compilation of all pending kernels/programs void build_all(); void reset(); }; -} // namespace gpu } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/kernels_factory.cpp b/inference-engine/thirdparty/clDNN/runtime/kernels_factory.cpp new file mode 100644 index 00000000000..1cb366518a4 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/kernels_factory.cpp @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "kernels_factory.hpp" + +namespace cldnn { +namespace ocl { +std::shared_ptr create_ocl_kernel(engine& engine, cl_context context, cl_kernel kernel, std::string entry_point); +} // namespace ocl + +namespace kernels_factory { + +std::shared_ptr create(engine& engine, cl_context context, cl_kernel kernel, std::string entry_point) { + switch (engine.type()) { + case engine_types::ocl: return ocl::create_ocl_kernel(engine, context, kernel, entry_point); + default: throw std::runtime_error("Unsupported engine type in kernels_factory::create"); + } +} + +} // namespace kernels_factory +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/kernels_factory.hpp b/inference-engine/thirdparty/clDNN/runtime/kernels_factory.hpp new file mode 100644 index 00000000000..b3d03defcb2 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/kernels_factory.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/kernel.hpp" +#include "cldnn/runtime/engine.hpp" +#include "ocl/ocl_common.hpp" + +#include + +namespace cldnn { + +namespace kernels_factory { + +// Creates instance of kernel for selected engine type. +// For ocl engine it creates a copy of kernel object +std::shared_ptr create(engine& engine, cl_context context, cl_kernel kernel, kernel_id kernel_id); + +} // namespace kernels_factory +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/memory.cpp b/inference-engine/thirdparty/clDNN/runtime/memory.cpp new file mode 100644 index 00000000000..1a6ff65795b --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/memory.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/stream.hpp" + +#include "ocl/ocl_memory.hpp" + +#include +#include +#include +#include +#include + +namespace cldnn { + +memory::memory(engine* engine, const layout& layout, allocation_type type, bool reused) + : _engine(engine), _layout(layout), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) {} + +memory::~memory() { + if (!_reused && _engine) { + // TODO: Make memory usage tracker static in memory class + _engine->get_memory_pool().subtract_memory_used(_bytes_count); + } +} + +std::unique_ptr surfaces_lock::create(engine_types engine_type, std::vector mem, const stream& stream) { + switch (engine_type) { + case engine_types::ocl: return std::unique_ptr(new ocl::ocl_surfaces_lock(mem, stream)); + default: throw std::runtime_error("Unsupported engine type in surfaces_lock::create"); + } +} + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp b/inference-engine/thirdparty/clDNN/runtime/memory_pool.cpp similarity index 55% rename from inference-engine/thirdparty/clDNN/src/memory_pool.cpp rename to inference-engine/thirdparty/clDNN/runtime/memory_pool.cpp index f80e8e9cd98..85091ccdd18 100644 --- a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/memory_pool.cpp @@ -2,20 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// - #include #include #include -#include "memory_impl.h" -#include "memory_pool.h" -#include "engine_impl.h" -#include "program_impl.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/memory_pool.hpp" -#include "program_node.h" - -#include "gpu/memory_gpu.h" #include #include #include @@ -24,88 +18,17 @@ namespace cldnn { memory_record::memory_record(memory_set users, - refcounted_obj_ptr& memory, + std::shared_ptr& memory, uint32_t net_id, allocation_type type) : _users(users), _memory(memory), _network_id(net_id), _type(type) {} -memory_impl::ptr memory_pool::alloc_memory(const layout& layout, allocation_type type, uint32_t net_id, bool reset) { - auto context = _engine->get_context(); - if (layout.bytes_count() > context->get_device_info().max_alloc_mem_size) { - throw std::runtime_error("exceeded max size of memory object allocation"); - } - - add_memory_used(layout.bytes_count()); - - if (_max_peak_memory_used > context->get_device_info().max_global_mem_size) { +memory::ptr memory_pool::alloc_memory(const layout& layout, allocation_type type) { + if (_max_peak_memory_used > _engine->get_device_info().max_global_mem_size) { throw std::runtime_error("exceeded global device memory"); } - try { - if (layout.format.is_image_2d()) { - memory_impl::ptr mem_impl {new gpu::gpu_image2d(engine_impl::ptr(_engine), layout, net_id, reset), false}; - return mem_impl; - } else if (type == allocation_type::cl_mem) { - memory_impl::ptr mem_impl{ new gpu::gpu_buffer(engine_impl::ptr(_engine), layout, net_id, reset), false }; - return mem_impl; - } else { - memory_impl::ptr mem_impl{ new gpu::gpu_usm(engine_impl::ptr(_engine), layout, net_id, type, reset), false }; - return mem_impl; - } - } catch (const cl::Error& clErr) { - switch (clErr.err()) { - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - case CL_OUT_OF_RESOURCES: - case CL_OUT_OF_HOST_MEMORY: - case CL_INVALID_BUFFER_SIZE: - throw std::runtime_error("out of GPU resources"); - default: - throw std::runtime_error("GPU buffer allocation failed"); - } - } -} - -memory_impl::ptr memory_pool::get_memory(const layout& layout, const shared_mem_params* params, uint32_t net_id) { - try { - if (layout.format.is_image_2d() && params->mem_type == shared_mem_type::shared_mem_image) { - cl::Image2D img(static_cast(params->mem), true); - memory_impl::ptr mem_impl{ new gpu::gpu_image2d(engine_impl::ptr(_engine), layout, - img, - net_id), false }; - return mem_impl; - } else if (layout.format.is_image_2d() && params->mem_type == shared_mem_type::shared_mem_vasurface) { - memory_impl::ptr mem_impl{ new gpu::gpu_media_buffer(engine_impl::ptr(_engine), layout, - params, - net_id), false }; - return mem_impl; -#ifdef _WIN32 - } else if (params->mem_type == shared_mem_type::shared_mem_dxbuffer) { - memory_impl::ptr mem_impl{ new gpu::gpu_dx_buffer(engine_impl::ptr(_engine), layout, - params, - net_id), false }; - return mem_impl; -#endif - } else if (params->mem_type == shared_mem_type::shared_mem_buffer) { - cl::Buffer buf(static_cast(params->mem), true); - memory_impl::ptr mem_impl{ new gpu::gpu_buffer(engine_impl::ptr(_engine), layout, - buf, - net_id), false }; - return mem_impl; - } else { - throw std::runtime_error("unknown shared object fromat or type"); - } - } - catch (const cl::Error& clErr) { - switch (clErr.err()) { - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - case CL_OUT_OF_RESOURCES: - case CL_OUT_OF_HOST_MEMORY: - case CL_INVALID_BUFFER_SIZE: - throw std::runtime_error("out of GPU resources"); - default: - throw std::runtime_error("GPU buffer allocation failed"); - } - } + return _engine->allocate_memory(layout, type); } memory_pool::~memory_pool() {} @@ -129,12 +52,10 @@ bool memory_pool::has_conflict(const memory_set& a, return !intersection.empty(); } -void memory_pool::release_memory(memory_impl* mem, - const primitive_id& id) { +void memory_pool::release_memory(memory* mem, const primitive_id& id, uint32_t network_id) { // check nonpadded pool first auto _layout = mem->get_layout(); auto type = mem->get_allocation_type(); - auto network_id = mem->get_net_id(); { auto range = _non_padded_pool.equal_range(_layout.bytes_count()); @@ -198,11 +119,11 @@ void memory_pool::release_memory(memory_impl* mem, } } -memory_impl::ptr memory_pool::get_from_non_padded_pool(const layout& layout, - const primitive_id& id, - uint32_t network_id, - const std::set& restrictions, - allocation_type type) { +memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout, + const primitive_id& id, + uint32_t network_id, + const std::set& restrictions, + allocation_type type) { auto it = _non_padded_pool.lower_bound(layout.bytes_count()); while (it != _non_padded_pool.end()) { if (it->second._network_id == network_id && @@ -220,7 +141,7 @@ memory_impl::ptr memory_pool::get_from_non_padded_pool(const layout& layout, } } // didn't find anything for you? create new resource - auto mem = alloc_memory(layout, type, network_id); + auto mem = alloc_memory(layout, type); { _non_padded_pool.emplace(layout.bytes_count(), memory_record({{id, network_id}}, mem, network_id, type)); @@ -228,11 +149,11 @@ memory_impl::ptr memory_pool::get_from_non_padded_pool(const layout& layout, return mem; } -memory_impl::ptr memory_pool::get_from_padded_pool(const layout& layout, - const primitive_id& id, - uint32_t network_id, - const std::set& restrictions, - allocation_type type) { +memory::ptr memory_pool::get_from_padded_pool(const layout& layout, + const primitive_id& id, + uint32_t network_id, + const std::set& restrictions, + allocation_type type) { auto first_level_cache = _padded_pool.find(layout); if (first_level_cache != _padded_pool.end()) { @@ -252,12 +173,12 @@ memory_impl::ptr memory_pool::get_from_padded_pool(const layout& layout, return ret_mem; } } - auto mem = alloc_memory(layout, type, network_id); + auto mem = alloc_memory(layout, type); first_level_cache->second.emplace_back( memory_record({{id, network_id}}, mem, network_id, type)); return mem; } - auto mem = alloc_memory(layout, type, network_id); + auto mem = alloc_memory(layout, type); std::list list = {memory_record({{id, network_id}}, mem, network_id, type)}; _padded_pool.emplace(layout, std::move(list)); return mem; @@ -267,10 +188,10 @@ memory_impl::ptr memory_pool::get_from_padded_pool(const layout& layout, This is not reusable within one network or it's internal micronetworks. But we can use this memory records between networks. */ -memory_impl::ptr memory_pool::get_from_across_networks_pool(const layout& layout, - const primitive_id& id, - uint32_t network_id, - allocation_type type) { +memory::ptr memory_pool::get_from_across_networks_pool(const layout& layout, + const primitive_id& id, + uint32_t network_id, + allocation_type type) { auto it = _no_reusable_pool.lower_bound(layout.bytes_count()); while (it != _no_reusable_pool.end()) { @@ -284,7 +205,7 @@ memory_impl::ptr memory_pool::get_from_across_networks_pool(const layout& layout } ++it; } - auto mem = alloc_memory(layout, type, network_id); + auto mem = alloc_memory(layout, type); { _no_reusable_pool.emplace(layout.bytes_count(), memory_record({{id, network_id}}, mem, network_id, type)); @@ -292,16 +213,16 @@ memory_impl::ptr memory_pool::get_from_across_networks_pool(const layout& layout return mem; } -memory_impl::ptr memory_pool::get_memory(const layout& layout, allocation_type type, uint32_t net_id, bool reset) { - return alloc_memory(layout, type, net_id, reset); +memory::ptr memory_pool::get_memory(const layout& layout, allocation_type type) { + return alloc_memory(layout, type); } -memory_impl::ptr memory_pool::get_memory(const layout& layout, - const primitive_id& id, - uint32_t network_id, - const std::set& restrictions, - allocation_type type, - bool reusable_across_network) { +memory::ptr memory_pool::get_memory(const layout& layout, + const primitive_id& id, + uint32_t network_id, + const std::set& restrictions, + allocation_type type, + bool reusable_across_network) { if (reusable_across_network) { // reusable within the same network if (!layout.format.is_image() && layout.data_padding == padding{{0, 0, 0, 0}, 0}) { @@ -312,10 +233,10 @@ memory_impl::ptr memory_pool::get_memory(const layout& layout, return get_from_padded_pool(layout, id, network_id, restrictions, type); } else { // images (reuse not yet implemented) - return alloc_memory(layout, type, network_id); + return alloc_memory(layout, type); } } else { - return alloc_memory(layout, type, network_id); + return alloc_memory(layout, type); } } @@ -329,8 +250,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { while (itr != _non_padded_pool.end()) { auto& record = itr->second; - if (record._memory->get_net_id() == network_id && - record._network_id == network_id) { + if (record._network_id == network_id) { itr = _non_padded_pool.erase(itr); } else { itr++; @@ -347,8 +267,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { auto list_itr = list.begin(); while (list_itr != list.end()) { - if (list_itr->_memory->get_net_id() == network_id && - list_itr->_network_id == network_id) { + if (list_itr->_network_id == network_id) { list_itr = list.erase(list_itr); } else { list_itr++; @@ -370,8 +289,7 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { while (itr != _no_reusable_pool.end()) { auto& record = itr->second; - if (record._memory->get_net_id() == network_id && - record._network_id == network_id) { + if (record._network_id == network_id) { itr = _no_reusable_pool.erase(itr); } else { itr++; @@ -380,59 +298,14 @@ void memory_pool::clear_pool_for_network(uint32_t network_id) { } } -memory_pool::memory_pool(engine_impl& engine) : _engine(&engine), _temp_memory_used(0), _max_peak_memory_used(0) { -} - -void memory_pool::dump_memory_pool(const program_impl& program, std::string& path, std::string& dep) { - using namespace std; - ofstream log(path); - - log << "\nNon-padded pool:" << endl; - log << "Size\tUsers:" << endl; - for (const auto& record : _non_padded_pool) { - log << record.first; - for (const auto& usr : record.second._users) log << ", " << usr; - log << endl; - } - - log << "\n--- Padded pool: ---" << endl; - log << "Size\tUsers:" << endl; - for (const auto& record : _padded_pool) { - for (const auto& mem : record.second) { - log << mem._memory->size(); - for (const auto& usr : mem._users) log << ", " << usr; - log << endl; - } - } - log << dep; - log.close(); - color_graph(program); -} - -void memory_pool::color_graph(const program_impl& program) { - uint32_t color = 0; - for (const auto& record : _non_padded_pool) { - for (const auto& usr : record.second._users) { - if (program.has_node(usr._id)) - program.get_node(usr._id).set_reused_memory_color(color); - } - ++color; - } - - for (const auto& list : _padded_pool) { - for (const auto& record : list.second) { - if (record._users.size() > 1) { // one user doesn't mean reusing - for (const auto& usr : record._users) { - if (program.has_node(usr._id)) - program.get_node(usr._id).set_reused_memory_color(color); - } - } - ++color; - } - } -} +memory_pool::memory_pool(engine& engine) : _engine(&engine), _temp_memory_used(0), _max_peak_memory_used(0) { } void memory_pool::add_memory_used(size_t value) { + // std::cerr << "ADD MEM: " << value + // << " max: " << _engine->get_device_info().max_global_mem_size + // << " peak: " << _max_peak_memory_used + // << " tmp: " << _temp_memory_used << std::endl; + _temp_memory_used += value; if (_temp_memory_used > _max_peak_memory_used) { _max_peak_memory_used = _temp_memory_used.load(); @@ -441,6 +314,10 @@ void memory_pool::add_memory_used(size_t value) { void memory_pool::subtract_memory_used(size_t value) { _temp_memory_used -= value; + // std::cerr << "FREE MEM: " << value + // << " max: " << _engine->get_device_info().max_global_mem_size + // << " peak: " << _max_peak_memory_used + // << " tmp: " << _temp_memory_used << std::endl; } } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.cpp similarity index 93% rename from inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.cpp index 09e2cfda0b9..e08cfd6fa55 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.cpp @@ -2,14 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ocl_base_event.h" +#include "ocl_base_event.hpp" #include #include #include #include +#include + using namespace cldnn; -using namespace gpu; +using namespace ocl; namespace { bool is_event_profiled(const cl::Event& event) { @@ -48,9 +50,6 @@ void base_event::set_ocl_callback() { void base_event::wait_impl() { if (_event.get() != nullptr) { _event.wait(); - if (get_context()->logging_enabled()) { - get_context()->log(0, "Wait for event: " + std::to_string(_queue_stamp)); - } } } @@ -92,9 +91,6 @@ bool base_event::get_profiling_info_impl(std::listlogging_enabled()) { - get_context()->log(0, "Wait for event: " + std::to_string(_queue_stamp)); - } } } @@ -112,7 +108,7 @@ bool base_events::get_profiling_info_impl(std::list>> all_durations; for (size_t i = 0; i < _events.size(); i++) { - auto be = dynamic_cast(_events[i].get()); + auto be = downcast(_events[i].get()); if (!is_event_profiled(be->_event)) continue; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp similarity index 76% rename from inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp index 988628497da..85f32c0b0b6 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_base_event.hpp @@ -4,13 +4,16 @@ #pragma once -#include "ocl_toolkit.h" +#include "ocl_common.hpp" +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/utils.hpp" + #include #include #include namespace cldnn { -namespace gpu { +namespace ocl { struct profiling_period_ocl_start_stop { const char* name; @@ -18,7 +21,7 @@ struct profiling_period_ocl_start_stop { cl_profiling_info stop; }; -struct ocl_base_event : virtual public event_impl { +struct ocl_base_event : virtual public event { public: explicit ocl_base_event(uint64_t queue_stamp = 0, bool valid = false) : _queue_stamp(queue_stamp) { _attached = valid; } uint64_t get_queue_stamp() const { return _queue_stamp; } @@ -30,10 +33,10 @@ protected: struct base_event : virtual public ocl_base_event { public: - base_event(std::shared_ptr ctx, cl::Event const& ev, uint64_t queue_stamp = 0) - : ocl_base_event(queue_stamp, true), _ctx(ctx), _event(ev) {} + base_event(const cl::Context& /* ctx */, cl::Event const& ev, uint64_t queue_stamp = 0) + : ocl_base_event(queue_stamp, true), _event(ev) {} - explicit base_event(std::shared_ptr ctx) : ocl_base_event(0, false), _ctx(ctx) {} + base_event(const cl::Context& /* ctx */) : ocl_base_event(0, false) {} void attach_ocl_event(const cl::Event& ev, const uint64_t q_stamp) { _event = ev; @@ -42,11 +45,9 @@ public: _set = false; } - std::shared_ptr get_context() const { return _ctx; } cl::Event get() override { return _event; } private: - std::shared_ptr _ctx; bool _callback_set = false; void set_ocl_callback(); static void CL_CALLBACK ocl_event_completion_callback(cl_event, cl_int, void* me); @@ -65,14 +66,14 @@ protected: struct base_events : virtual public ocl_base_event { public: - base_events(std::shared_ptr ctx, std::vector const& ev) - : ocl_base_event(0, true), _ctx(ctx) { + base_events(const cl::Context& /* ctx */, std::vector const& ev) + : ocl_base_event(0, true) { process_events(ev); } - explicit base_events(std::shared_ptr ctx) : ocl_base_event(0, false), _ctx(ctx) {} + base_events(const cl::Context& /* ctx */) : ocl_base_event(0, false) {} - void attach_events(const std::vector& ev) { + void attach_events(const std::vector& ev) { if (_attached) throw std::runtime_error("Trying to attach events to valid event object."); process_events(ev); @@ -80,7 +81,6 @@ public: } cl::Event get() override { return _last_ocl_event; } - std::shared_ptr get_context() const { return _ctx; } void reset() override { ocl_base_event::reset(); @@ -91,7 +91,7 @@ private: void wait_impl() override; bool is_set_impl() override; - void process_events(const std::vector& ev) { + void process_events(const std::vector& ev) { for (size_t i = 0; i < ev.size(); i++) { auto multiple_events = dynamic_cast(ev[i].get()); if (multiple_events) { @@ -121,9 +121,8 @@ private: bool get_profiling_info_impl(std::list& info) override; cl::Event _last_ocl_event; - std::shared_ptr _ctx; - std::vector _events; + std::vector _events; }; -} // namespace gpu +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_command_queues_builder.cpp similarity index 80% rename from inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_command_queues_builder.cpp index 25f842cd4c0..c2772434304 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_command_queues_builder.cpp @@ -2,21 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "command_queues_builder.h" -#include "error_handler.h" +#include "ocl_command_queues_builder.hpp" +#include "cldnn/runtime/error_handler.hpp" #include namespace cldnn { -namespace gpu { +namespace ocl { -command_queues_builder::command_queues_builder(const cl::Context& context, - const cl::Device& device, - const cl_platform_id& platform_id) - : _context(context), - _device(device), - _platform_id(platform_id), - _profiling(false), +command_queues_builder::command_queues_builder() + : _profiling(false), _out_of_order(false), _priority_mode(priority_mode_types::disabled), _throttle_mode(throttle_mode_types::disabled) {} @@ -27,12 +21,13 @@ cl_command_queue_properties command_queues_builder::get_properties() { return ret; } -void command_queues_builder::build() { +ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) { auto properties = get_properties(); + ocl_queue_type queue; + if (_priority_mode == priority_mode_types::disabled && _throttle_mode == throttle_mode_types::disabled) { - _queue = queue_type(_context, _device, properties); - return; + queue = ocl_queue_type(context, device, properties); } unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR; @@ -72,7 +67,7 @@ void command_queues_builder::build() { properties, 0}; - _queue = queue_type(clCreateCommandQueueWithProperties(_context.get(), _device.get(), properties_low, &error_code)); + queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code)); } else if (_priority_mode != priority_mode_types::disabled) { cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value, @@ -80,7 +75,7 @@ void command_queues_builder::build() { properties, 0}; - _queue = queue_type(clCreateCommandQueueWithProperties(_context.get(), _device.get(), properties_low, &error_code)); + queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code)); } else if (_throttle_mode != throttle_mode_types::disabled) { cl_queue_properties properties_low[] = {CL_QUEUE_THROTTLE_KHR, cl_queue_throttle_value, @@ -88,13 +83,15 @@ void command_queues_builder::build() { properties, 0}; - _queue = queue_type(clCreateCommandQueueWithProperties(_context.get(), _device.get(), properties_low, &error_code)); + queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code)); } if (error_code != CL_SUCCESS) { CLDNN_ERROR_MESSAGE("Command queues builders", "clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code)); } + + return queue; } void command_queues_builder::set_priority_mode(priority_mode_types priority, bool extension_support) { @@ -115,5 +112,5 @@ void command_queues_builder::set_throttle_mode(throttle_mode_types throttle, boo } _throttle_mode = throttle; } -} // namespace gpu +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_command_queues_builder.hpp similarity index 57% rename from inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_command_queues_builder.hpp index 7d99bcccbd2..cddd3507600 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_command_queues_builder.hpp @@ -2,28 +2,24 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "ocl_toolkit.h" + +#include "ocl_common.hpp" +#include "cldnn/runtime/engine.hpp" namespace cldnn { -namespace gpu { +namespace ocl { + class command_queues_builder { public: - command_queues_builder(const cl::Context& context, const cl::Device& device, const cl_platform_id& platform_id); - void build(); + command_queues_builder(); + ocl_queue_type build(const cl::Context& context, const cl::Device& device); void set_throttle_mode(throttle_mode_types throttle, bool extension_support); void set_priority_mode(priority_mode_types priority, bool extension_support); void set_profiling(bool flag) { _profiling = flag; } void set_out_of_order(bool flag) { _out_of_order = flag; } - queue_type& queue() { return _queue; } - queue_type queue() const { return _queue; } private: - queue_type _queue; - cl::Context _context; - cl::Device _device; - cl_platform_id _platform_id; bool _profiling; bool _out_of_order; priority_mode_types _priority_mode; @@ -31,5 +27,6 @@ private: cl_command_queue_properties get_properties(); }; -} // namespace gpu + +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_common.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_common.hpp new file mode 100644 index 00000000000..07be51ff38c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_common.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include "ocl_wrapper.hpp" + +#include + +namespace cldnn { +namespace ocl { + +typedef cl::vector> kernels_binaries_vector; +typedef cl::vector kernels_binaries_container; +typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)( + cl_context context, + cl_device_id device, + const cl_queue_properties* properties, + cl_int* errcodeRet); + +using ocl_queue_type = cl::CommandQueueIntel; +using ocl_kernel_type = cl::KernelIntel; + +class ocl_error : public std::runtime_error { +public: + explicit ocl_error(cl::Error const& err); +}; + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp similarity index 54% rename from inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp index 09e069dda3f..e1ba4bb5a5c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.cpp @@ -2,16 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "device_info.h" -#include "include/to_string_utils.h" +#include "ocl_device.hpp" +#include "ocl_common.hpp" + +#include +#include +#include +#include #include #include #include #include #include #include -#include "ocl_builder.h" - #include #include #include @@ -30,7 +33,7 @@ #endif namespace cldnn { -namespace gpu { +namespace ocl { namespace { int driver_dev_id() { @@ -103,7 +106,7 @@ int driver_dev_id() { return result.back(); } -static device_type get_device_type(const cl::Device& device) { +device_type get_device_type(const cl::Device& device) { auto unified_mem = device.getInfo(); return unified_mem ? device_type::integrated_gpu : device_type::discrete_gpu; @@ -117,7 +120,7 @@ gfx_version parse_version(cl_uint ver) { return {major, minor, revision}; } -static bool get_imad_support(const cl::Device& device) { +bool get_imad_support(const cl::Device& device) { std::string dev_name = device.getInfo(); if (dev_name.find("Gen12") != std::string::npos || @@ -183,74 +186,111 @@ bool is_local_block_io_supported(const cl::Device& device) { } } -} // namespace +device_info init_device_info(const cl::Device& device) { + device_info info; + info.vendor_id = static_cast(device.getInfo()); + info.dev_name = device.getInfo(); + info.driver_version = device.getInfo(); + info.dev_type = get_device_type(device); -device_info_internal::device_info_internal(const cl::Device& device) { - dev_name = device.getInfo(); - driver_version = device.getInfo(); - dev_type = get_device_type(device); + info.execution_units_count = device.getInfo(); - compute_units_count = device.getInfo(); + info.gpu_frequency = static_cast(device.getInfo()); - cores_count = static_cast(device.getInfo()); - core_frequency = static_cast(device.getInfo()); + info.max_work_group_size = static_cast(device.getInfo()); - max_work_group_size = static_cast(device.getInfo()); + // looks like WA. Do we still need it? + if (info.max_work_group_size > 256) + info.max_work_group_size = 256; - if (max_work_group_size > 256) - max_work_group_size = 256; + info.max_local_mem_size = static_cast(device.getInfo()); + info.max_global_mem_size = static_cast(device.getInfo()); + info.max_alloc_mem_size = static_cast(device.getInfo()); - max_local_mem_size = static_cast(device.getInfo()); - max_global_mem_size = static_cast(device.getInfo()); - max_alloc_mem_size = static_cast(device.getInfo()); - - supports_image = static_cast(device.getInfo()); - max_image2d_width = static_cast(device.getInfo()); - max_image2d_height = static_cast(device.getInfo()); + info.supports_image = static_cast(device.getInfo()); + info.max_image2d_width = static_cast(device.getInfo()); + info.max_image2d_height = static_cast(device.getInfo()); // Check for supported features. auto extensions = device.getInfo(); extensions.push_back(' '); // Add trailing space to ease searching (search with keyword with trailing space). - supports_fp16 = extensions.find("cl_khr_fp16 ") != std::string::npos; - supports_fp16_denorms = supports_fp16 && (device.getInfo() & CL_FP_DENORM) != 0; + info.supports_fp16 = extensions.find("cl_khr_fp16 ") != std::string::npos; + info.supports_fp64 = extensions.find("cl_khr_fp64 ") != std::string::npos; + info.supports_fp16_denorms = info.supports_fp16 && (device.getInfo() & CL_FP_DENORM) != 0; - supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos; + info.supports_subgroups = extensions.find("cl_intel_subgroups") != std::string::npos; + info.supports_subgroups_short = extensions.find("cl_intel_subgroups_short") != std::string::npos; + info.supports_subgroups_char = extensions.find("cl_intel_subgroups_char") != std::string::npos; - supports_imad = get_imad_support(device); - supports_immad = false; + info.supports_imad = get_imad_support(device); + info.supports_immad = false; - max_threads_per_execution_unit = 7; - max_threads_per_device = static_cast(cores_count * max_threads_per_execution_unit); + info.max_threads_per_execution_unit = 7; + info.max_threads_per_device = static_cast(info.execution_units_count * info.max_threads_per_execution_unit); - vendor_id = static_cast(device.getInfo()); + info.supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos; - supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos; - - supports_optimization_hints = false; - supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos && - is_local_block_io_supported(device); + info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos && + is_local_block_io_supported(device); bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos; if (device_attr_supported) { - gfx_ver = parse_version(device.getInfo()); - device_id = device.getInfo(); - num_slices = device.getInfo(); - num_sub_slices_per_slice = device.getInfo(); - num_eus_per_sub_slice = device.getInfo(); - num_threads_per_eu = device.getInfo(); + info.gfx_ver = parse_version(device.getInfo()); + info.device_id = device.getInfo(); + info.num_slices = device.getInfo(); + info.num_sub_slices_per_slice = device.getInfo(); + info.num_eus_per_sub_slice = device.getInfo(); + info.num_threads_per_eu = device.getInfo(); auto features = device.getInfo(); - supports_imad = supports_imad || (features & CL_DEVICE_FEATURE_FLAG_DP4A_INTEL); + info.supports_imad = info.supports_imad || (features & CL_DEVICE_FEATURE_FLAG_DP4A_INTEL); } else { - gfx_ver = {0, 0, 0}; - device_id = driver_dev_id(); - num_slices = 0; - num_sub_slices_per_slice = 0; - num_eus_per_sub_slice = 0; - num_threads_per_eu = 0; + info.gfx_ver = {0, 0, 0}; + info.device_id = driver_dev_id(); + info.num_slices = 0; + info.num_sub_slices_per_slice = 0; + info.num_eus_per_sub_slice = 0; + info.num_threads_per_eu = 0; } + + return info; } -} // namespace gpu + +bool does_device_support(int32_t param, const cl::Device& device) { + cl_device_unified_shared_memory_capabilities_intel capabilities; + auto err = clGetDeviceInfo(device.get(), param, sizeof(cl_device_unified_shared_memory_capabilities_intel), &capabilities, NULL); + if (err) throw std::runtime_error("[CLDNN ERROR]. clGetDeviceInfo error " + std::to_string(err)); + return !((capabilities & CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL) == 0u); +} + +memory_capabilities init_memory_caps(const cl::Device& device, const device_info& info) { + std::vector memory_caps; + if (info.supports_usm) { + if (does_device_support(CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, device)) { + memory_caps.push_back(allocation_type::usm_host); + } + if (does_device_support(CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, device)) { + memory_caps.push_back(allocation_type::usm_shared); + } + if (does_device_support(CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, device)) { + memory_caps.push_back(allocation_type::usm_device); + } + } + + return memory_capabilities(memory_caps); +} + +} // namespace + + +ocl_device::ocl_device(const cl::Device dev, const cl::Context& ctx, const cl_platform_id platform) +: _context(ctx) +, _device(dev) +, _platform(platform) +, _info(init_device_info(dev)) +, _mem_caps(init_memory_caps(dev, _info)) { } + +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.hpp new file mode 100644 index 00000000000..ecb3498e488 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/device.hpp" +#include "ocl_common.hpp" + +#include +#include +#include +#include + +namespace cldnn { +namespace ocl { + +struct ocl_device : public device { +public: + ocl_device(const cl::Device dev, const cl::Context& ctx, const cl_platform_id platform); + + device_info get_info() const override { return _info; } + memory_capabilities get_mem_caps() const override { return _mem_caps; } + + const cl::Device& get_device() const { return _device; } + cl::Device& get_device() { return _device; } + const cl::Context& get_context() const { return _context; } + cl_platform_id get_platform() const { return _platform; } + + ~ocl_device() = default; + +private: + cl::Context _context; + cl::Device _device; + cl_platform_id _platform; + device_info _info; + memory_capabilities _mem_caps; +}; + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.cpp similarity index 68% rename from inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.cpp index 60fcc9b0435..1ba46d836d4 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.cpp @@ -2,11 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "ocl_builder.h" -#include "configuration.h" -#include "include/to_string_utils.h" -#include "api/device.hpp" +#include "ocl_device_detector.hpp" +#include "ocl_device.hpp" +#include "ocl_common.hpp" + #include #include #include @@ -21,8 +20,29 @@ #pragma GCC diagnostic ignored "-Wignored-attributes" #endif +namespace { +bool does_device_match_config(bool out_of_order, const cl::Device& device) { +// Is it intel gpu +if (device.getInfo() != CL_DEVICE_TYPE_GPU || + device.getInfo() != 0x8086) { + return false; +} + +// Does device support OOOQ? +if (out_of_order) { + auto queue_properties = device.getInfo(); + using cmp_t = std::common_type::type>::type; + if (!(static_cast(queue_properties) & static_cast(cl::QueueProperties::OutOfOrder))) { + return false; + } +} + +return true; +} +} // namespace namespace cldnn { -namespace gpu { +namespace ocl { static constexpr auto INTEL_PLATFORM_VENDOR = "Intel(R) Corporation"; static std::vector getSubDevices(cl::Device& rootDevice) { @@ -68,18 +88,18 @@ static std::vector getSubDevices(cl::Device& rootDevice) { return subDevices; } -std::map ocl_builder::get_available_devices(void* user_context, void* user_device) const { +std::map ocl_device_detector::get_available_devices(void* user_context, void* user_device) const { bool host_out_of_order = true; // Change to false, if debug requires in-order queue. - std::vector dev_orig, dev_sorted; + std::vector dev_orig, dev_sorted; if (user_context != nullptr) { - dev_orig = build_device_list_from_user_context(host_out_of_order, user_context); + dev_orig = create_device_list_from_user_context(host_out_of_order, user_context); } else if (user_device != nullptr) { - dev_orig = build_device_list_from_user_device(host_out_of_order, user_device); + dev_orig = create_device_list_from_user_device(host_out_of_order, user_device); } else { - dev_orig = build_device_list(host_out_of_order); + dev_orig = create_device_list(host_out_of_order); } - std::map ret; + std::map ret; for (auto& dptr : dev_orig) { if (dptr->get_info().dev_type == cldnn::device_type::integrated_gpu) dev_sorted.insert(dev_sorted.begin(), dptr); @@ -91,14 +111,16 @@ std::map ocl_builder::get_available_devices(void* auto map_id = std::to_string(idx++); ret[map_id] = dptr; - auto rootDevice = dptr->get_device(); - auto subDevices = getSubDevices(rootDevice); + auto rootDevice = std::dynamic_pointer_cast(dptr); + if (!rootDevice) { + throw std::runtime_error("Invalid device type created in ocl_device_detector"); + } + + auto subDevices = getSubDevices(rootDevice->get_device()); if (!subDevices.empty()) { uint32_t sub_idx = 0; for (auto& subdevice : subDevices) { - auto subdPtr = device_impl::ptr(new device_impl(subdevice, cl::Context(subdevice), - dptr->get_platform(), - device_info_internal(subdevice)), false); + auto subdPtr = std::make_shared(subdevice, cl::Context(subdevice), rootDevice->get_platform()); ret[map_id+"."+std::to_string(sub_idx++)] = subdPtr; } } @@ -106,7 +128,7 @@ std::map ocl_builder::get_available_devices(void* return ret; } -std::vector ocl_builder::build_device_list(bool out_out_order) const { +std::vector ocl_device_detector::create_device_list(bool out_out_order) const { cl_uint n = 0; // Get number of platforms availible cl_int err = clGetPlatformIDs(0, NULL, &n); @@ -121,7 +143,7 @@ std::vector ocl_builder::build_device_list(bool out_out_order) throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err)); } - std::vector ret; + std::vector ret; for (auto& id : platform_ids) { cl::Platform platform = cl::Platform(id); @@ -131,9 +153,9 @@ std::vector ocl_builder::build_device_list(bool out_out_order) std::vector devices; platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); for (auto& device : devices) { - if (!does_device_match_config(out_out_order, device)) continue; - ret.emplace_back(device_impl::ptr{ new device_impl(device, cl::Context(device), - id, device_info_internal(device)), false}); + if (!does_device_match_config(out_out_order, device)) + continue; + ret.emplace_back(std::make_shared(device, cl::Context(device), id)); } } if (ret.empty()) { @@ -142,16 +164,15 @@ std::vector ocl_builder::build_device_list(bool out_out_order) return ret; } -std::vector ocl_builder::build_device_list_from_user_context(bool out_out_order, void* user_context) const { +std::vector ocl_device_detector::create_device_list_from_user_context(bool out_out_order, void* user_context) const { cl::Context ctx = cl::Context(static_cast(user_context), true); auto all_devices = ctx.getInfo(); - std::vector ret; + std::vector ret; for (auto& device : all_devices) { - if (!does_device_match_config(out_out_order, device)) continue; - ret.emplace_back(device_impl::ptr{ new device_impl(device, cl::Context(device), - device.getInfo(), - device_info_internal(device)), false}); + if (!does_device_match_config(out_out_order, device)) + continue; + ret.emplace_back(std::make_shared(device, cl::Context(device), device.getInfo())); } if (ret.empty()) { @@ -160,7 +181,7 @@ std::vector ocl_builder::build_device_list_from_user_context( return ret; } -std::vector ocl_builder::build_device_list_from_user_device(bool out_out_order, void* user_device) const { +std::vector ocl_device_detector::create_device_list_from_user_device(bool out_out_order, void* user_device) const { cl_uint n = 0; // Get number of platforms availible cl_int err = clGetPlatformIDs(0, NULL, &n); @@ -175,7 +196,7 @@ std::vector ocl_builder::build_device_list_from_user_device(b throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err)); } - std::vector ret; + std::vector ret; for (auto& id : platform_ids) { cl::PlatformVA platform = cl::PlatformVA(id); @@ -206,8 +227,7 @@ std::vector ocl_builder::build_device_list_from_user_device(b CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, CL_CONTEXT_PLATFORM, (cl_context_properties)id, 0 }; - ret.emplace_back(device_impl::ptr{ new device_impl(device, cl::Context(device, props), - id, device_info_internal(device)), false }); + ret.emplace_back(std::make_shared(device, cl::Context(device, props), id)); } } if (ret.empty()) { @@ -216,25 +236,5 @@ std::vector ocl_builder::build_device_list_from_user_device(b return ret; } -bool ocl_builder::does_device_match_config(bool out_of_order, const cl::Device& device) const { - // Is it intel gpu - if (device.getInfo() != device_type || - device.getInfo() != device_vendor) { - return false; - } - - // Does device support OOOQ? - if (out_of_order) { - auto queue_properties = device.getInfo(); - using cmp_t = std::common_type::type>::type; - if (!(static_cast(queue_properties) & static_cast(cl::QueueProperties::OutOfOrder))) { - return false; - } - } - - return true; -} - -} // namespace gpu +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.hpp new file mode 100644 index 00000000000..d738285989c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_device_detector.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/device.hpp" + +#include +#include +#include +#include +#include + +namespace cldnn { +namespace ocl { + +class ocl_device_detector { +public: + ocl_device_detector() = default; + + std::map get_available_devices(void* user_context, void* user_device) const; +private: + std::vector create_device_list(bool out_out_order) const; + std::vector create_device_list_from_user_context(bool out_out_order, void* user_context) const; + std::vector create_device_list_from_user_device(bool out_out_order, void* user_device) const; +}; + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.cpp new file mode 100644 index 00000000000..95b2950ec61 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.cpp @@ -0,0 +1,204 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ocl_engine.hpp" +#include "ocl_common.hpp" +#include "ocl_memory.hpp" +#include "ocl_stream.hpp" +#include +#include +#include +#include +#include + +// NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation +// of some types (even though we already disabled them in scope of definition of these types). +// Moreover this warning is pretty much now only for annoyance: it is generated due to lack +// of proper support for mangling of custom GCC attributes into type name (usually when used +// with templates, even from standard library). +#if defined __GNUC__ && __GNUC__ >= 6 +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + +// static class memebers - pointers to dynamically obtained OpenCL extension functions +cl::PFN_clEnqueueAcquireMediaSurfacesINTEL cl::SharedSurfLock::pfn_acquire = NULL; +cl::PFN_clEnqueueReleaseMediaSurfacesINTEL cl::SharedSurfLock::pfn_release = NULL; +cl::PFN_clCreateFromMediaSurfaceINTEL cl::ImageVA::pfn_clCreateFromMediaSurfaceINTEL = NULL; +#ifdef _WIN32 +cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL; +#endif + +namespace cldnn { +namespace ocl { + +ocl_error::ocl_error(cl::Error const& err) + : std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {} + +ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type, const engine_configuration& conf) + : engine(dev, conf) { + if (runtime_type != runtime_types::ocl) { + throw std::runtime_error("Invalid runtime type specified for OCL engine. Only OCL runtime is supported"); + } + + auto casted = dynamic_cast(dev.get()); + if (!casted) + throw std::runtime_error("[CLDNN] Invalid device type passed to ocl engine"); + casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions); + + _program_stream.reset(new ocl_stream(*this)); +} + +const cl::Context& ocl_engine::get_cl_context() const { + auto cl_device = std::dynamic_pointer_cast(_device); + if (!cl_device) + throw std::runtime_error("Invalid device type for ocl_engine"); + return cl_device->get_context(); +} + +const cl::Device& ocl_engine::get_cl_device() const { + auto cl_device = std::dynamic_pointer_cast(_device); + if (!cl_device) + throw std::runtime_error("Invalid device type for ocl_engine"); + return cl_device->get_device(); +} + +memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) { + if (layout.bytes_count() > get_device_info().max_alloc_mem_size) { + throw std::runtime_error("exceeded max size of memory object allocation"); + } + + _memory_pool->add_memory_used(layout.bytes_count()); + + try { + memory::ptr res = nullptr; + if (layout.format.is_image_2d()) { + res = std::make_shared(this, layout); + } else if (type == allocation_type::cl_mem) { + res = std::make_shared(this, layout); + } else { + res = std::make_shared(this, layout, type); + } + + if (reset || res->is_memory_reset_needed(layout)) { + res->fill(get_program_stream()); + } + + return res; + } catch (const cl::Error& clErr) { + switch (clErr.err()) { + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + case CL_OUT_OF_RESOURCES: + case CL_OUT_OF_HOST_MEMORY: + case CL_INVALID_BUFFER_SIZE: + throw std::runtime_error("out of GPU resources"); + default: + throw std::runtime_error("GPU buffer allocation failed"); + } + } +} + +memory::ptr ocl_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) { + if (memory.get_engine() != this) + throw std::runtime_error("trying to reinterpret buffer allocated by a different engine"); + + if (new_layout.format.is_image() && !memory.get_layout().format.is_image()) + throw std::runtime_error("trying to reinterpret non-image buffer as image"); + + if (!new_layout.format.is_image() && memory.get_layout().format.is_image()) + throw std::runtime_error("trying to reinterpret image buffer as non-image buffer"); + + try { + if (new_layout.format.is_image_2d()) { + return std::make_shared(this, + new_layout, + reinterpret_cast(memory).get_buffer()); + } else if (memory_capabilities::is_usm_type(memory.get_allocation_type())) { + return std::make_shared(this, + new_layout, + reinterpret_cast(memory).get_buffer(), + memory.get_allocation_type()); + } else { + return std::make_shared(this, + new_layout, + reinterpret_cast(memory).get_buffer()); + } + } catch (cl::Error const& err) { + throw ocl::ocl_error(err); + } +} + +memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_params params) { + try { + if (new_layout.format.is_image_2d() && params.mem_type == shared_mem_type::shared_mem_image) { + cl::Image2D img(static_cast(params.mem), true); + return std::make_shared(this, new_layout, img); + } else if (new_layout.format.is_image_2d() && params.mem_type == shared_mem_type::shared_mem_vasurface) { + return std::make_shared(this, new_layout, params); +#ifdef _WIN32 + } else if (params.mem_type == shared_mem_type::shared_mem_dxbuffer) { + return std::make_shared(this, new_layout, params); +#endif + } else if (params.mem_type == shared_mem_type::shared_mem_buffer) { + cl::Buffer buf(static_cast(params.mem), true); + return std::make_shared(this, new_layout, buf); + } else { + throw std::runtime_error("unknown shared object fromat or type"); + } + } + catch (const cl::Error& clErr) { + switch (clErr.err()) { + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + case CL_OUT_OF_RESOURCES: + case CL_OUT_OF_HOST_MEMORY: + case CL_INVALID_BUFFER_SIZE: + throw std::runtime_error("out of GPU resources"); + default: + throw std::runtime_error("GPU buffer allocation failed"); + } + } +} + +bool ocl_engine::is_the_same_buffer(const memory& mem1, const memory& mem2) { + if (mem1.get_engine() != this || mem2.get_engine() != this) + return false; + if (mem1.get_allocation_type() != mem2.get_allocation_type()) + return false; + if (&mem1 == &mem2) + return true; + + if (!memory_capabilities::is_usm_type(mem1.get_allocation_type())) + return (reinterpret_cast(mem1).get_buffer() == + reinterpret_cast(mem2).get_buffer()); + else + return (reinterpret_cast(mem1).get_buffer() == + reinterpret_cast(mem2).get_buffer()); +} + +void* ocl_engine::get_user_context() const { + auto& cl_device = downcast(*_device); + return static_cast(cl_device.get_context().get()); +} + +bool ocl_engine::extension_supported(std::string extension) const { + return _extensions.find(extension) != std::string::npos; +} + +stream::ptr ocl_engine::create_stream() const { + return std::make_shared(*this); +} + +stream& ocl_engine::get_program_stream() const { + return *_program_stream; +} + +std::shared_ptr ocl_engine::create(const device::ptr device, runtime_types runtime_type, const engine_configuration& configuration) { + return std::make_shared(device, runtime_type, configuration); +} + +std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type, const engine_configuration& configuration) { + return ocl_engine::create(device, runtime_type, configuration); +} + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.hpp new file mode 100644 index 00000000000..d0916fcc645 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2016-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/stream.hpp" +#include "ocl_device.hpp" + +#include +#include +#include +#include +#include + +namespace cldnn { +namespace ocl { + +class ocl_engine : public engine { +public: + ocl_engine(const device::ptr dev, runtime_types runtime_type, const engine_configuration& conf); + engine_types type() const override { return engine_types::ocl; }; + runtime_types runtime_type() const override { return runtime_types::ocl; }; + + memory_ptr allocate_memory(const layout& layout, allocation_type type, bool reset = true) override; + memory_ptr reinterpret_handle(const layout& new_layout, shared_mem_params params) override; + memory_ptr reinterpret_buffer(const memory& memory, const layout& new_layout) override; + bool is_the_same_buffer(const memory& mem1, const memory& mem2) override; + + void* get_user_context() const override; + + allocation_type get_default_allocation_type() const override { return allocation_type::cl_mem; } + + const cl::Context& get_cl_context() const; + const cl::Device& get_cl_device() const; + + bool extension_supported(std::string extension) const; + + stream_ptr create_stream() const override; + stream& get_program_stream() const override; + + static std::shared_ptr create(const device::ptr device, runtime_types runtime_type, const engine_configuration& configuration); +private: + std::string _extensions; + std::unique_ptr _program_stream; +}; + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine_factory.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine_factory.hpp new file mode 100644 index 00000000000..3d2530c2342 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_engine_factory.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/device.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/stream.hpp" + +namespace cldnn { +namespace ocl { + +// Factory for ocl_engine creation. It's moved outside of ocl_engine class to avoid possible CL includes conflict +// between different engines in engine.cpp file +std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type, const engine_configuration& configuration); + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/events_pool.h b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_events_pool.hpp similarity index 57% rename from inference-engine/thirdparty/clDNN/src/gpu/events_pool.h rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_events_pool.hpp index 9fdf0a23fab..d8fde6242b5 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/events_pool.h +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_events_pool.hpp @@ -2,18 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "refcounted_obj.h" -#include "event_impl.h" -#include "meta_utils.h" +#include "cldnn/runtime/utils.hpp" +#include "cldnn/runtime/event.hpp" + #include #include #include namespace cldnn { -namespace gpu { - -class gpu_toolkit; +namespace ocl { template ::value>::type> @@ -23,14 +20,14 @@ protected: using type = Type; - event_impl::ptr get_from_pool(std::shared_ptr& ctx) { + event::ptr get_from_pool(const cl::Context& ctx) { for (auto& ev : _events) { if (!ev->is_valid()) { ev->reset(); return ev; } } - const event_impl::ptr ev_impl { new Type(ctx), false }; + auto ev_impl = std::make_shared(ctx); return allocate(ev_impl); } @@ -39,36 +36,36 @@ protected: } private: - std::vector _events; + std::vector _events; - event_impl::ptr allocate(const event_impl::ptr& obj) { + event::ptr allocate(const event::ptr& obj) { _events.emplace_back(obj); return _events.back(); } }; struct base_event_pool : event_pool_impl { - event_impl::ptr get(std::shared_ptr& ctx, const cl::Event& ev, const uint64_t q_stamp) { + event::ptr get(const cl::Context& ctx, const cl::Event& ev, const uint64_t q_stamp) { auto ret = get_from_pool(ctx); - dynamic_cast(ret.get())->attach_ocl_event(ev, q_stamp); + std::dynamic_pointer_cast(ret)->attach_ocl_event(ev, q_stamp); return ret; } void reset() { reset_events(); } }; struct user_event_pool : event_pool_impl { - event_impl::ptr get(std::shared_ptr& ctx, bool set = false) { + event::ptr get(const cl::Context& ctx, bool set = false) { auto ret = get_from_pool(ctx); - dynamic_cast(ret.get())->attach_event(set); + downcast(ret.get())->attach_event(set); return ret; } void reset() { reset_events(); } }; struct group_event_pool : event_pool_impl { - event_impl::ptr get(std::shared_ptr& ctx, const std::vector& deps) { + event::ptr get(const cl::Context& ctx, const std::vector& deps) { auto ret_ev = get_from_pool(ctx); - dynamic_cast(ret_ev.get())->attach_events(deps); + downcast(ret_ev.get())->attach_events(deps); return ret_ev; } void reset() { reset_events(); } @@ -78,15 +75,15 @@ class events_pool { public: events_pool() = default; - event_impl::ptr get_from_base_pool(std::shared_ptr ctx, const cl::Event& ev, const uint64_t q_stamp) { + event::ptr get_from_base_pool(const cl::Context& ctx, const cl::Event& ev, const uint64_t q_stamp) { return _base_pool.get(ctx, ev, q_stamp); } - event_impl::ptr get_from_user_pool(std::shared_ptr ctx, bool set = false) { + event::ptr get_from_user_pool(const cl::Context& ctx, bool set = false) { return _user_pool.get(ctx, set); } - event_impl::ptr get_from_group_pool(std::shared_ptr ctx, const std::vector& deps) { + event::ptr get_from_group_pool(const cl::Context& ctx, const std::vector& deps) { return _group_pool.get(ctx, deps); } @@ -101,5 +98,5 @@ private: user_event_pool _user_pool; group_event_pool _group_pool; }; -} // namespace gpu +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/common/include/cl2_ext.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_ext.hpp similarity index 97% rename from inference-engine/thirdparty/clDNN/common/include/cl2_ext.hpp rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_ext.hpp index 8af34068af8..bacaeb03b00 100644 --- a/inference-engine/thirdparty/clDNN/common/include/cl2_ext.hpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_ext.hpp @@ -52,11 +52,9 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_FEATURE_CAPABILITIES_INTE namespace { template T load_entrypoint(const cl_platform_id platform, const std::string name) { - T p = reinterpret_cast( - clGetExtensionFunctionAddressForPlatform(platform, name.c_str())); + T p = reinterpret_cast(clGetExtensionFunctionAddressForPlatform(platform, name.c_str())); if (!p) { - throw std::runtime_error("clGetExtensionFunctionAddressForPlatform(" + - name + ") returned NULL."); + throw std::runtime_error("clGetExtensionFunctionAddressForPlatform(" + name + ") returned NULL."); } return p; } @@ -64,11 +62,9 @@ T load_entrypoint(const cl_platform_id platform, const std::string name) { template T load_entrypoint(const cl_device_id device, const std::string name) { cl_platform_id platform; - cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), - &platform, nullptr); + cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, nullptr); if (error) { - throw std::runtime_error("Failed to retrieve CL_DEVICE_PLATFORM: " + - std::to_string(error)); + throw std::runtime_error("Failed to retrieve CL_DEVICE_PLATFORM: " + std::to_string(error)); } return load_entrypoint(platform, name); } @@ -78,20 +74,16 @@ T load_entrypoint(const cl_device_id device, const std::string name) { template T load_entrypoint(const cl_context context, const std::string name) { size_t size = 0; - cl_int error = - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr, &size); + cl_int error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr, &size); if (error) { - throw std::runtime_error("Failed to retrieve CL_CONTEXT_DEVICES size: " + - std::to_string(error)); + throw std::runtime_error("Failed to retrieve CL_CONTEXT_DEVICES size: " + std::to_string(error)); } std::vector devices(size / sizeof(cl_device_id)); - error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), - nullptr); + error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), nullptr); if (error) { - throw std::runtime_error("Failed to retrieve CL_CONTEXT_DEVICES: " + - std::to_string(error)); + throw std::runtime_error("Failed to retrieve CL_CONTEXT_DEVICES: " + std::to_string(error)); } return load_entrypoint(devices.front(), name); @@ -148,11 +140,6 @@ inline void* deviceMemAlloc(const cl::Device& cpp_device, const cl::Context& cpp return fn(cpp_context.get(), cpp_device.get(), properties, size, alignment, err_code_ret); } -inline cl_int memFree(const cl::Context& cpp_context, void* ptr) { - clMemFreeINTEL_fn fn = load_entrypoint(cpp_context.get(), "clMemFreeINTEL"); - return fn(cpp_context.get(), ptr); -} - inline cl_int set_kernel_arg_mem_pointer(const cl::Kernel& kernel, uint32_t index, const void* ptr, clSetKernelArgMemPointerINTEL_fn fn) { return fn(kernel.get(), index, ptr); } @@ -605,12 +592,20 @@ typedef CL_API_ENTRY cl_mem(CL_API_CALL * PFN_clCreateFromMediaSurfaceINTEL)( */ class UsmHolder { public: - explicit UsmHolder(Context& ctx, void* ptr) : _ctx(ctx), _ptr(ptr) {} + explicit UsmHolder(Context& ctx, void* ptr) : _ctx(ctx), _ptr(ptr), deleter(nullptr) { + deleter = load_entrypoint(_ctx.get(), "clMemFreeINTEL"); + if (!deleter) { + throw std::runtime_error("clMemFreeINTEL is nullptr in UsmHolder"); + } + } void* ptr() { return _ptr; } - ~UsmHolder() { usm::memFree(_ctx, _ptr); } + ~UsmHolder() { + deleter(_ctx.get(), _ptr); + } private: Context _ctx; void* _ptr; + clMemFreeINTEL_fn deleter; }; /* diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernel.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernel.hpp new file mode 100644 index 00000000000..4bda3ef8f31 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernel.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2016-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ocl_common.hpp" +#include "ocl_memory.hpp" +#include "cldnn/runtime/kernel_args.hpp" +#include "cldnn/runtime/kernel.hpp" + +#include +#include + +namespace cldnn { +namespace ocl { + +class ocl_kernel : public kernel { + ocl_kernel_type _compiled_kernel; + std::string _kernel_id; + +public: + ocl_kernel(ocl_kernel_type compiled_kernel, const std::string& kernel_id) + : _compiled_kernel(compiled_kernel) + , _kernel_id(kernel_id) { } + + const ocl_kernel_type& get_handle() const { return _compiled_kernel; } + ocl_kernel_type& get_handle() { return _compiled_kernel; } + std::shared_ptr clone() const override { return std::make_shared(get_handle().clone(), _kernel_id); } +}; + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernels_factory.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernels_factory.cpp new file mode 100644 index 00000000000..29a43144261 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_kernels_factory.cpp @@ -0,0 +1,21 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ocl_kernel.hpp" +#include "kernels_factory.hpp" + +#include +#include + +namespace cldnn { +namespace ocl { + +std::shared_ptr create_ocl_kernel(engine& engine, cl_context /* context */, cl_kernel kernel, std::string entry_point) { + // Retain kernel to keep it valid + cl::Kernel k(kernel, true); + return std::make_shared(ocl::ocl_kernel_type(k, engine.use_unified_shared_memory()), entry_point); +} + +} // namespace kernels_factory +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp new file mode 100644 index 00000000000..9a5c2b3aa52 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.cpp @@ -0,0 +1,373 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn/runtime/error_handler.hpp" +#include "cldnn/runtime/utils.hpp" +#include "ocl_memory.hpp" +#include "ocl_engine.hpp" +#include "ocl_stream.hpp" +#include "ocl_base_event.hpp" +#include +#include + +namespace cldnn { +namespace ocl { + +gpu_buffer::gpu_buffer(ocl_engine* engine, + const layout& layout) + : lockable_gpu_mem(), memory(engine, layout, allocation_type::cl_mem, false) + , _buffer(engine->get_cl_context(), CL_MEM_READ_WRITE, size()) { } + +gpu_buffer::gpu_buffer(ocl_engine* engine, + const layout& new_layout, + const cl::Buffer& buffer) + : lockable_gpu_mem(), memory(engine, new_layout, allocation_type::cl_mem, true) + , _buffer(buffer) {} + +void* gpu_buffer::lock(const stream& stream) { + auto& cl_stream = downcast(stream); + std::lock_guard locker(_mutex); + if (0 == _lock_count) { + _mapped_ptr = cl_stream.get_cl_queue().enqueueMapBuffer(_buffer, CL_TRUE, CL_MAP_WRITE, 0, size()); + } + _lock_count++; + return _mapped_ptr; +} + +void gpu_buffer::unlock(const stream& stream) { + auto& cl_stream = downcast(stream); + std::lock_guard locker(_mutex); + _lock_count--; + if (0 == _lock_count) { + cl_stream.get_cl_queue().enqueueUnmapMemObject(_buffer, _mapped_ptr); + _mapped_ptr = nullptr; + } +} + +event::ptr gpu_buffer::fill(stream& stream) { + return fill(stream, 0); +} + +event::ptr gpu_buffer::fill(stream& stream, unsigned char pattern) { + auto& cl_stream = downcast(stream); + auto ev = stream.create_base_event(); + cl::Event ev_ocl = std::dynamic_pointer_cast(ev)->get(); + cl_stream.get_cl_queue().enqueueFillBuffer(_buffer, pattern, 0, size(), nullptr, &ev_ocl); + + // TODO: do we need sync here? + cl_stream.finish(); + + return ev; +} + +shared_mem_params gpu_buffer::get_internal_params() const { + auto cl_engine = downcast(_engine); + return {shared_mem_type::shared_mem_buffer, static_cast(cl_engine->get_cl_context().get()), nullptr, + static_cast(_buffer.get()), +#ifdef _WIN32 + nullptr, +#else + 0, +#endif + 0}; +} + +event::ptr gpu_buffer::copy_from(stream& /* stream */, const memory& /* other */) { + throw std::runtime_error("[clDNN] copy_from is not implemented for gpu_buffer"); +} + +event::ptr gpu_buffer::copy_from(stream& stream, const void* host_ptr) { + auto& cl_stream = downcast(stream); + auto ev = stream.create_base_event(); + cl::Event ev_ocl = std::dynamic_pointer_cast(ev)->get(); + cl_stream.get_cl_queue().enqueueWriteBuffer(_buffer, false, 0, size(), host_ptr, nullptr, &ev_ocl); + + return ev; +} + +gpu_image2d::gpu_image2d(ocl_engine* engine, const layout& layout) + : lockable_gpu_mem(), memory(engine, layout, allocation_type::cl_mem, false), _row_pitch(0), _slice_pitch(0) { + cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT; + cl_channel_order order = CL_R; + switch (layout.format) { + case format::image_2d_weights_c1_b_fyx: + _width = layout.size.batch[0]; + _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1]; + break; + case format::image_2d_weights_winograd_6x3_s1_fbxyb: + _height = layout.size.feature[0]; + _width = layout.size.spatial[0] * layout.size.batch[0] * layout.size.spatial[1] * 8 / 3; + break; + case format::image_2d_weights_winograd_6x3_s1_xfbyb: + _height = layout.size.feature[0] * layout.size.spatial[0] * 8 / 3; + _width = layout.size.batch[0] * layout.size.spatial[1]; + break; + case format::image_2d_weights_c4_fyx_b: + _width = layout.size.batch[0]; + _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1]; + order = CL_RGBA; + break; + case format::image_2d_rgba: + _width = layout.size.spatial[0]; + _height = layout.size.spatial[1]; + order = CL_RGBA; + if (layout.size.feature[0] != 3 && layout.size.feature[0] != 4) { + CLDNN_ERROR_MESSAGE("2D image allocation", "invalid number of channels in image_2d_rgba input image (should be 3 or 4)!"); + } + type = CL_UNORM_INT8; + break; + case format::nv12: + _width = layout.size.spatial[1]; + _height = layout.size.spatial[0]; + if (layout.size.feature[0] == 2) { + order = CL_RG; + } else if (layout.size.feature[0] > 2) { + CLDNN_ERROR_MESSAGE("2D image allocation", "invalid number of channels in NV12 input image!"); + } + type = CL_UNORM_INT8; + break; + default: + CLDNN_ERROR_MESSAGE("2D image allocation", "unsupported image type!"); + } + + cl::ImageFormat imageFormat(order, type); + _buffer = cl::Image2D(engine->get_cl_context(), CL_MEM_READ_WRITE, imageFormat, _width, _height, 0); +} + +gpu_image2d::gpu_image2d(ocl_engine* engine, + const layout& new_layout, + const cl::Image2D& buffer) + : lockable_gpu_mem(), memory(engine, new_layout, allocation_type::cl_mem, true), + _buffer(buffer) { + _width = _buffer.getImageInfo(); + _height = _buffer.getImageInfo(); + _row_pitch = _buffer.getImageInfo(); + _slice_pitch = _buffer.getImageInfo(); +} + +event::ptr gpu_image2d::fill(stream& stream) { + return fill(stream, 0); +} + +event::ptr gpu_image2d::fill(stream& stream, unsigned char pattern) { + auto& cl_stream = downcast(stream); + auto ev = stream.create_base_event(); + cl::Event ev_ocl = downcast(ev.get())->get(); + cl_uint4 pattern_uint4 = {pattern, pattern, pattern, pattern}; + cl_stream.get_cl_queue().enqueueFillImage(_buffer, pattern_uint4, {0, 0, 0}, {_width, _height, 1}, 0, &ev_ocl); + + // TODO: do we need sync here? + cl_stream.finish(); + + return ev; +} + +void* gpu_image2d::lock(const stream& stream) { + auto& cl_stream = downcast(stream); + std::lock_guard locker(_mutex); + if (0 == _lock_count) { + _mapped_ptr = cl_stream.get_cl_queue() + .enqueueMapImage(_buffer, + CL_TRUE, + CL_MAP_WRITE, + {0, 0, 0}, + {_width, _height, 1}, + &_row_pitch, + &_slice_pitch); + } + _lock_count++; + return _mapped_ptr; +} + +void gpu_image2d::unlock(const stream& stream) { + auto& cl_stream = downcast(stream); + std::lock_guard locker(_mutex); + _lock_count--; + if (0 == _lock_count) { + cl_stream.get_cl_queue().enqueueUnmapMemObject(_buffer, _mapped_ptr); + _mapped_ptr = nullptr; + } +} + + +shared_mem_params gpu_image2d::get_internal_params() const { + auto cl_engine = downcast(_engine); + return {shared_mem_type::shared_mem_image, static_cast(cl_engine->get_cl_context().get()), nullptr, + static_cast(_buffer.get()), +#ifdef _WIN32 + nullptr, +#else + 0, +#endif + 0}; +} + +event::ptr gpu_image2d::copy_from(stream& /* stream */, const memory& /* other */) { + throw std::runtime_error("[clDNN] copy_from is not implemented for gpu_image2d"); +} + +event::ptr gpu_image2d::copy_from(stream& /* stream */, const void* /* host_ptr */) { + throw std::runtime_error("[clDNN] copy_from is not implemented for gpu_image2d"); +} + +gpu_media_buffer::gpu_media_buffer(ocl_engine* engine, + const layout& new_layout, + shared_mem_params params) + : gpu_image2d(engine, new_layout, cl::ImageVA(engine->get_cl_context(), CL_MEM_READ_WRITE, params.surface, params.plane)), + device(params.user_device), + surface(params.surface), + plane(params.plane) { } + +shared_mem_params gpu_media_buffer::get_internal_params() const { + auto cl_engine = downcast(_engine); + return {shared_mem_type::shared_mem_vasurface, static_cast(cl_engine->get_cl_context().get()), device, + static_cast(_buffer.get()), surface, plane }; +} + +#ifdef _WIN32 +gpu_dx_buffer::gpu_dx_buffer(ocl_engine* engine, + const layout& new_layout, + shared_mem_params params) + : gpu_buffer(engine, new_layout, + cl::BufferDX(engine->get_cl_context(), CL_MEM_READ_WRITE, params.mem)), + device(params.user_device), + resource(params.mem) { } + +shared_mem_params gpu_dx_buffer::get_internal_params() const { + auto cl_engine = downcast(_engine); + return {shared_mem_type::shared_mem_dxbuffer, static_cast(cl_engine->get_cl_context().get()), device, + static_cast(_buffer.get()), resource, 0 }; +} +#endif + +gpu_usm::gpu_usm(ocl_engine* engine, + const layout& new_layout, const cl::UsmMemory& buffer, + allocation_type type) + : lockable_gpu_mem() + , memory(engine, new_layout, type, true) + , _buffer(buffer) { +} + +gpu_usm::gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type) + : lockable_gpu_mem() + , memory(engine, layout, type, false) + , _buffer(engine->get_cl_context()) { + auto device = engine->get_cl_device(); + switch (get_allocation_type()) { + case allocation_type::usm_host: + _buffer.allocateHost(_bytes_count); + break; + case allocation_type::usm_shared: + _buffer.allocateShared(device, _bytes_count); + break; + case allocation_type::usm_device: + _buffer.allocateDevice(device, _bytes_count); + break; + default: + CLDNN_ERROR_MESSAGE("gpu_usm allocation type", + "Unknown unified shared memory type!"); + } +} + +void* gpu_usm::lock(const stream& stream) { + assert(get_allocation_type() != allocation_type::usm_device && "Can't lock usm device memory!"); + std::lock_guard locker(_mutex); + if (0 == _lock_count) { + stream.finish(); // Synchronization needed for OOOQ. + _mapped_ptr = _buffer.get(); + } + _lock_count++; + return _mapped_ptr; +} + +void gpu_usm::unlock(const stream& /* stream */) { + std::lock_guard locker(_mutex); + _lock_count--; + if (0 == _lock_count) { + _mapped_ptr = nullptr; + } +} + +event::ptr gpu_usm::fill(stream& stream, unsigned char pattern) { + auto& cl_stream = downcast(stream); + auto ev = stream.create_base_event(); + cl::Event ev_ocl = downcast(ev.get())->get(); + // enqueueFillUsm call will never finish. Driver bug? Uncomment when fixed. Some older drivers doesn't support enqueueFillUsm call at all. + // cl_stream.get_cl_queue().enqueueFillUsm(_buffer, pattern, _bytes_count, nullptr, &ev_ocl) + // Workarounded with enqeue_memcopy. ToDo: Remove below code. Uncomment above. + std::vector temp_buffer(_bytes_count, pattern); + // TODO: Do we really need blocking call here? Non-blocking one causes accuracy issues right now, but hopefully it can be fixed in more performant way. + const bool blocking = true; + cl::usm::enqueue_memcpy(cl_stream.get_cl_queue(), _buffer.get(), temp_buffer.data(), _bytes_count, blocking, nullptr, &ev_ocl); + + return ev; +} + +event::ptr gpu_usm::fill(stream& stream) { + // event::ptr ev{ new base_event(_context), false }; + // cl::Event ev_ocl = downcast(ev.get())->get(); + // cl::usm::enqueue_set_mem(cl_stream.get_cl_queue(), _buffer.get(), 0, _bytes_count, nullptr, &ev_ocl); + // ev->wait(); + + // [WA] + return fill(stream, 0); +} + +event::ptr gpu_usm::copy_from(stream& stream, const memory& other) { + auto& cl_stream = downcast(stream); + auto& casted = downcast(other); + cl_stream.get_cl_queue().enqueueCopyUsm(casted.get_buffer(), get_buffer(), _bytes_count, true); + return stream.create_user_event(true); +} + +event::ptr gpu_usm::copy_from(stream& /* stream */, const void* /* host_ptr */) { + throw std::runtime_error("[clDNN] copy_from is not implemented for gpu_usm"); +} + +shared_mem_params gpu_usm::get_internal_params() const { + auto cl_engine = downcast(_engine); + return { + shared_mem_type::shared_mem_empty, // shared_mem_type + static_cast(cl_engine->get_cl_context().get()), // context handle + nullptr, // user_device handle + nullptr, // mem handle +#ifdef _WIN32 + nullptr, // surface handle +#else + 0, // surface handle +#endif + 0 // plane + }; +} + +std::vector ocl_surfaces_lock::get_handles(std::vector mem) const { + std::vector res; + for (auto& m : mem) { + auto mem_type = m->get_internal_params().mem_type; + if (mem_type == shared_mem_type::shared_mem_vasurface || mem_type == shared_mem_type::shared_mem_dxbuffer) { + res.push_back(static_cast(m->get_internal_params().mem)); + } + } + + return res; +} + +ocl_surfaces_lock::ocl_surfaces_lock(std::vector mem, const stream& stream) + : surfaces_lock() + , _stream(stream) + , _handles(get_handles(mem)) + , _lock(nullptr) { + cl_int err = CL_SUCCESS; + + auto& cl_stream = downcast(stream); + auto queue = cl_stream.get_cl_queue(); + _lock.reset(new cl::SharedSurfLock(queue.get(), _handles, &err)); + // TODO: err code for some reason is 32766 + if (/* err != CL_SUCCESS || */ !_lock) { + throw std::runtime_error("Unable to lock shared surface (" + std::to_string(err) + ")"); + } +} + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.hpp new file mode 100644 index 00000000000..f7590e366b8 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_memory.hpp @@ -0,0 +1,128 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ocl_common.hpp" +#include "ocl_engine.hpp" +#include "ocl_stream.hpp" +#include "cldnn/runtime/memory.hpp" + +#include +#include +#include +#include + +namespace cldnn { +namespace ocl { +struct lockable_gpu_mem { + lockable_gpu_mem() : + _lock_count(0), + _mapped_ptr(nullptr) {} + + std::mutex _mutex; + unsigned _lock_count; + void* _mapped_ptr; +}; + +struct gpu_buffer : public lockable_gpu_mem, public memory { + gpu_buffer(ocl_engine* engine, const layout& new_layout, const cl::Buffer& buffer); + gpu_buffer(ocl_engine* engine, const layout& layout); + + void* lock(const stream& stream) override; + void unlock(const stream& stream) override; + event::ptr fill(stream& stream, unsigned char pattern) override; + event::ptr fill(stream& stream) override; + shared_mem_params get_internal_params() const override; + const cl::Buffer& get_buffer() const { + assert(0 == _lock_count); + return _buffer; + } + + event::ptr copy_from(stream& stream, const memory& other) override; + event::ptr copy_from(stream& stream, const void* host_ptr) override; + +protected: + cl::Buffer _buffer; +}; + +struct gpu_image2d : public lockable_gpu_mem, public memory { + gpu_image2d(ocl_engine* engine, const layout& new_layout, const cl::Image2D& buffer); + gpu_image2d(ocl_engine* engine, const layout& layout); + + void* lock(const stream& stream) override; + void unlock(const stream& stream) override; + event::ptr fill(stream& stream, unsigned char pattern) override; + event::ptr fill(stream& stream) override; + shared_mem_params get_internal_params() const override; + const cl::Image2D& get_buffer() const { + assert(0 == _lock_count); + return _buffer; + } + + event::ptr copy_from(stream& /* stream */, const memory& /* other */) override; + event::ptr copy_from(stream& /* stream */, const void* /* other */) override; + +protected: + cl::Image2D _buffer; + size_t _width; + size_t _height; + size_t _row_pitch; + size_t _slice_pitch; +}; + +struct gpu_media_buffer : public gpu_image2d { + gpu_media_buffer(ocl_engine* engine, const layout& new_layout, shared_mem_params params); + shared_mem_params get_internal_params() const override; +private: + void* device; +#ifdef _WIN32 + void* surface; +#else + uint32_t surface; +#endif + uint32_t plane; +}; + +#ifdef _WIN32 +struct gpu_dx_buffer : public gpu_buffer { + gpu_dx_buffer(ocl_engine* engine, const layout& new_layout, shared_mem_params VAEncMiscParameterTypeSubMbPartPel); + shared_mem_params get_internal_params() const override; +private: + void* device; + void* resource; +}; +#endif + +struct gpu_usm : public lockable_gpu_mem, public memory { + gpu_usm(ocl_engine* engine, const layout& new_layout, const cl::UsmMemory& usm_buffer, allocation_type type); + gpu_usm(ocl_engine* engine, const layout& layout, allocation_type type); + + void* lock(const stream& stream) override; + void unlock(const stream& stream) override; + const cl::UsmMemory& get_buffer() const { return _buffer; } + cl::UsmMemory& get_buffer() { return _buffer; } + + event::ptr fill(stream& stream, unsigned char pattern) override; + event::ptr fill(stream& stream) override; + shared_mem_params get_internal_params() const override; + + event::ptr copy_from(stream& stream, const memory& other) override; + event::ptr copy_from(stream& stream, const void* host_ptr) override; +protected: + cl::UsmMemory _buffer; +}; + +struct ocl_surfaces_lock : public surfaces_lock { + ocl_surfaces_lock(std::vector mem, const stream& stream); + + ~ocl_surfaces_lock() = default; +private: + std::vector get_handles(std::vector mem) const; + const stream& _stream; + std::vector _handles; + std::unique_ptr _lock; +}; +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.cpp new file mode 100644 index 00000000000..508f2114214 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.cpp @@ -0,0 +1,428 @@ +// Copyright (C) 2019-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ocl_stream.hpp" +#include "ocl_base_event.hpp" +#include "ocl_user_event.hpp" +#include "ocl_command_queues_builder.hpp" +#include "ocl_events_pool.hpp" +#include "ocl_kernel.hpp" +#include "ocl_common.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include + +// NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation +// of some types (even though we already disabled them in scope of definition of these types). +// Moreover this warning is pretty much now only for annoyance: it is generated due to lack +// of proper support for mangling of custom GCC attributes into type name (usually when used +// with templates, even from standard library). +#if defined __GNUC__ && __GNUC__ >= 6 +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + +namespace cldnn { +namespace ocl { + +namespace { +inline cl::NDRange toNDRange(const std::vector& v) { + switch (v.size()) { + case 1: + return cl::NDRange(v[0]); + case 2: + return cl::NDRange(v[0], v[1]); + case 3: + return cl::NDRange(v[0], v[1], v[2]); + default: + return cl::NullRange; + } +} + +void set_arguments_impl(ocl_kernel_type& kernel, + const arguments_desc& args, + const kernel_arguments_data& data) { + using args_t = argument_desc::Types; + using scalar_t = scalar_desc::Types; + for (uint32_t i = 0; i < static_cast(args.size()); i++) { + cl_int status = CL_INVALID_ARG_VALUE; + switch (args[i].t) { + case args_t::INPUT: + if (args[i].index < data.inputs.size() && data.inputs[args[i].index]) { + const auto& input_mem = data.inputs[args[i].index]; + if (input_mem) { + if (input_mem->get_layout().format.is_image_2d()) + status = kernel.setArg(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + else if (memory_capabilities::is_usm_type(input_mem->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + } + } + break; + case args_t::INPUT_OF_FUSED_PRIMITIVE: + if (args[i].index < data.fused_op_inputs.size() && data.fused_op_inputs[args[i].index]) { + const auto& input_mem = data.fused_op_inputs[args[i].index]; + if (input_mem) { + if (memory_capabilities::is_usm_type(input_mem->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + } + } + break; + case args_t::INTERNAL_BUFFER: + if (args[i].index < data.intermediates.size() && data.intermediates[args[i].index]) { + const auto& input_mem = data.intermediates[args[i].index]; + if (input_mem) { + if (memory_capabilities::is_usm_type(input_mem->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(input_mem)->get_buffer()); + } + } + break; + case args_t::OUTPUT: + if (data.output) { + if (data.output->get_layout().format.is_image_2d()) + status = kernel.setArg(i, std::dynamic_pointer_cast(data.output)->get_buffer()); + else if (memory_capabilities::is_usm_type(data.output->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(data.output)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(data.output)->get_buffer()); + } + break; + case args_t::WEIGHTS: + if (data.weights) { + if (data.weights->get_layout().format.is_image_2d()) + status = kernel.setArg(i, std::dynamic_pointer_cast(data.weights)->get_buffer()); + else if (memory_capabilities::is_usm_type(data.weights->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(data.weights)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(data.weights)->get_buffer()); + } + break; + case args_t::BIAS: + if (data.bias) { + if (memory_capabilities::is_usm_type(data.bias->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(data.bias)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(data.bias)->get_buffer()); + } + break; + case args_t::WEIGHTS_ZERO_POINTS: + if (data.weights_zero_points) { + if (memory_capabilities::is_usm_type(data.weights_zero_points->get_allocation_type())) + status = kernel.setArgUsm( + i, + std::dynamic_pointer_cast(data.weights_zero_points)->get_buffer()); + else + status = kernel.setArg( + i, + std::dynamic_pointer_cast(data.weights_zero_points)->get_buffer()); + } + break; + case args_t::ACTIVATIONS_ZERO_POINTS: + if (data.activations_zero_points) { + if (memory_capabilities::is_usm_type(data.activations_zero_points->get_allocation_type())) + status = kernel.setArgUsm( + i, + std::dynamic_pointer_cast(data.activations_zero_points)->get_buffer()); + else + status = kernel.setArg( + i, + std::dynamic_pointer_cast(data.activations_zero_points)->get_buffer()); + } + break; + case args_t::COMPENSATION: + if (data.compensation) { + if (memory_capabilities::is_usm_type(data.compensation->get_allocation_type())) + status = kernel.setArgUsm( + i, + std::dynamic_pointer_cast(data.compensation)->get_buffer()); + else + status = kernel.setArg( + i, + std::dynamic_pointer_cast(data.compensation)->get_buffer()); + } + break; + case args_t::SCALE_TABLE: + if (data.scale_table) { + if (memory_capabilities::is_usm_type(data.scale_table->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(data.scale_table)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(data.scale_table)->get_buffer()); + } + break; + case args_t::SLOPE: + if (data.slope) { + if (memory_capabilities::is_usm_type(data.slope->get_allocation_type())) + status = kernel.setArgUsm(i, std::dynamic_pointer_cast(data.slope)->get_buffer()); + else + status = kernel.setArg(i, std::dynamic_pointer_cast(data.slope)->get_buffer()); + } + break; + case args_t::SPLIT: + status = kernel.setArg(i, data.split); + break; + case args_t::SCALAR: + if (data.scalars && args[i].index < data.scalars->size()) { + const auto& scalar = (*data.scalars)[args[i].index]; + switch (scalar.t) { + case scalar_t::UINT8: + status = kernel.setArg(i, scalar.v.u8); + break; + case scalar_t::UINT16: + status = kernel.setArg(i, scalar.v.u16); + break; + case scalar_t::UINT32: + status = kernel.setArg(i, scalar.v.u32); + break; + case scalar_t::UINT64: + status = kernel.setArg(i, scalar.v.u64); + break; + case scalar_t::INT8: + status = kernel.setArg(i, scalar.v.s8); + break; + case scalar_t::INT16: + status = kernel.setArg(i, scalar.v.s16); + break; + case scalar_t::INT32: + status = kernel.setArg(i, scalar.v.s32); + break; + case scalar_t::INT64: + status = kernel.setArg(i, scalar.v.s64); + break; + case scalar_t::FLOAT32: + status = kernel.setArg(i, scalar.v.f32); + break; + case scalar_t::FLOAT64: + status = kernel.setArg(i, scalar.v.f64); + break; + default: + break; + } + } + break; + case args_t::RECURRENT: // RNN/LSTM/GRU layers + if (data.recurrent) { + if (data.recurrent->get_layout().format.is_image_2d()) + status = kernel.setArg(i, dynamic_cast(*data.recurrent).get_buffer()); + else if (memory_capabilities::is_usm_type(data.recurrent->get_allocation_type())) + status = kernel.setArgUsm(i, dynamic_cast(*data.recurrent).get_buffer()); + else + status = kernel.setArg(i, dynamic_cast(*data.recurrent).get_buffer()); + } + break; + case args_t::HIDDEN: // RNN/LSTM/GRU layers + if (data.hidden) { + if (data.hidden->get_layout().format.is_image_2d()) + status = kernel.setArg(i, dynamic_cast(*data.hidden).get_buffer()); + else if (memory_capabilities::is_usm_type(data.hidden->get_allocation_type())) + status = kernel.setArgUsm(i, dynamic_cast(*data.hidden).get_buffer()); + else + status = kernel.setArg(i, dynamic_cast(*data.hidden).get_buffer()); + } + break; + case args_t::CELL: // LSTMlayers + if (data.cell) { + if (data.cell->get_layout().format.is_image_2d()) + status = kernel.setArg(i, dynamic_cast(*data.cell).get_buffer()); + else if (memory_capabilities::is_usm_type(data.cell->get_allocation_type())) + status = kernel.setArgUsm(i, dynamic_cast(*data.cell).get_buffer()); + else + status = kernel.setArg(i, dynamic_cast(*data.cell).get_buffer()); + } + break; + default: + break; + } + + if (status != CL_SUCCESS) { + throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n"); + } + } +} +} // namespace + +ocl_stream::ocl_stream(const ocl_engine& engine) : stream(engine.configuration().queue_type), _engine(engine) { + auto context = engine.get_cl_context(); + auto device = engine.get_cl_device(); + auto config = engine.configuration(); + ocl::command_queues_builder queue_builder; + queue_builder.set_profiling(config.enable_profiling); + queue_builder.set_out_of_order((config.queue_type == queue_types::out_of_order)); + + sync_method = _engine.configuration().enable_profiling ? sync_methods::events : + config.queue_type == queue_types::out_of_order ? sync_methods::barriers : sync_methods::none; + + if (sync_method == sync_methods::none && config.queue_type == queue_types::out_of_order) { + throw std::runtime_error("[CLDNN] Unexpected sync method (none) is specified for out_of_order queue"); + } + + bool priorty_extensions = engine.extension_supported("cl_khr_priority_hints") && engine.extension_supported("cl_khr_create_command_queue"); + queue_builder.set_priority_mode(config.priority_mode, priorty_extensions); + + bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue"); + queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions); + + _command_queue = queue_builder.build(context, device); + _events_pool.reset(new events_pool()); +} + +void ocl_stream::set_arguments(kernel& kernel, const kernel_arguments_desc& args_desc, const kernel_arguments_data& args) { + static std::mutex m; + std::lock_guard guard(m); + + auto& ocl_kernel = downcast(kernel); + + auto& kern = ocl_kernel.get_handle(); + + try { + set_arguments_impl(kern, args_desc.arguments, args); + } catch (cl::Error const& err) { + throw ocl_error(err); + } +} + +event::ptr ocl_stream::enqueue_kernel(kernel& kernel, + const kernel_arguments_desc& args_desc, + const kernel_arguments_data& /* args */, + std::vector const& deps, + bool is_output) { + auto& ocl_kernel = downcast(kernel); + + auto& kern = ocl_kernel.get_handle(); + auto global = toNDRange(args_desc.workGroups.global); + auto local = toNDRange(args_desc.workGroups.local); + std::vector dep_events; + std::vector* dep_events_ptr = nullptr; + if (sync_method == sync_methods::events) { + for (auto& dep : deps) { + if (auto ocl_base_ev = std::dynamic_pointer_cast(dep)) { + if (ocl_base_ev->get().get() != nullptr) + dep_events.push_back(ocl_base_ev->get()); + } + } + dep_events_ptr = &dep_events; + } else if (sync_method == sync_methods::barriers) { + sync_events(deps, is_output); + } + + cl::Event ret_ev; + + bool set_output_event = sync_method == sync_methods::events || is_output; + + try { + _command_queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local, dep_events_ptr, set_output_event ? &ret_ev : nullptr); + } catch (cl::Error const& err) { + throw ocl_error(err); + } + + return _events_pool->get_from_base_pool(_engine.get_cl_context(), ret_ev, ++_queue_counter); +} + +void ocl_stream::enqueue_barrier() { + _command_queue.enqueueBarrierWithWaitList(nullptr, nullptr); +} + +event::ptr ocl_stream::enqueue_marker(std::vector const& deps, bool is_output) { + if (deps.empty()) + return _events_pool->get_from_user_pool(_engine.get_cl_context(), true); + + if (sync_method == sync_methods::events) { + cl::Event ret_ev; + std::vector dep_events; + for (auto& dep : deps) { + if (auto ocl_base_ev = dynamic_cast(dep.get())) + if (ocl_base_ev->get().get() != nullptr) + dep_events.push_back(ocl_base_ev->get()); + } + + try { + if (dep_events.empty()) { + return create_user_event(true); + } + _command_queue.enqueueMarkerWithWaitList(&dep_events, &ret_ev); + } catch (cl::Error const& err) { + throw ocl_error(err); + } + + return _events_pool->get_from_base_pool(_engine.get_cl_context(), ret_ev, ++_queue_counter); + } else if (sync_method == sync_methods::barriers) { + sync_events(deps, is_output); + return _events_pool->get_from_base_pool(_engine.get_cl_context(), _last_barrier_ev, _last_barrier); + } else { + return _events_pool->get_from_user_pool(_engine.get_cl_context(), true); + } +} + +event::ptr ocl_stream::group_events(std::vector const& deps) { + return _events_pool->get_from_group_pool(_engine.get_cl_context(), deps); +} + +event::ptr ocl_stream::create_user_event(bool set) { + return _events_pool->get_from_user_pool(_engine.get_cl_context(), set); +} + +event::ptr ocl_stream::create_base_event() { + cl::Event ret_ev; + return _events_pool->get_from_base_pool(_engine.get_cl_context(), ret_ev, ++_queue_counter); +} + +void ocl_stream::reset_events() { _events_pool->reset_events(); } + +void ocl_stream::release_events_pool() { _events_pool.reset(); } + +void ocl_stream::flush() const { get_cl_queue().flush(); } +void ocl_stream::finish() const { get_cl_queue().finish(); } + +void ocl_stream::wait_for_events(const std::vector& events) { + if (events.empty()) + return; + + std::vector clevents; + for (auto& ev : events) { + if (auto ocl_base_ev = dynamic_cast(ev.get())) + clevents.push_back(ocl_base_ev->get()); + } + + try { + cl::WaitForEvents(clevents); + } catch (cl::Error const& err) { + throw ocl_error(err); + } +} + +void ocl_stream::sync_events(std::vector const& deps, bool is_output) { + bool needs_barrier = false; + for (auto& dep : deps) { + auto* ocl_base_ev = dynamic_cast(dep.get()); + if (ocl_base_ev->get_queue_stamp() > _last_barrier) { + needs_barrier = true; + } + } + + if (needs_barrier) { + try { + if (is_output) + _command_queue.enqueueBarrierWithWaitList(nullptr, &_last_barrier_ev); + else + _command_queue.enqueueBarrierWithWaitList(nullptr, nullptr); + } catch (cl::Error const& err) { + throw ocl_error(err); + } + + _last_barrier = ++_queue_counter; + } +} + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.hpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.hpp new file mode 100644 index 00000000000..3545d6d17e5 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_stream.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2019-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/stream.hpp" +#include "ocl_common.hpp" +#include "ocl_engine.hpp" + +#include +#include +#include +#include +#include +#include +#include + +namespace cldnn { +namespace ocl { + +class events_pool; + +// Possible sync methods for kernels in stream +enum class sync_methods { + /* Build dependency graph using events. Each kernel creates proper cl_event which is set as dependency of users + At this moment it requires multiple retain/release calls for cl_event after each enqueueNDRange + which is less performant comparing to the barriers version + */ + events = 0, + /* Enqueue barriers between dependent kernels. For example consider the following dimond dependency graph: + kernel_0 + / \ + kernel_1 kernel_2 + \ / + kernel_3 + In that case we do the following: + 1. Enqueue kernel_0 + 2. Enqueue barrier (ensures kernel_0 is completed) + 3. Enqueue kernel_1 + 4. Enqueue kernel_2 (doesn't depend on kernel_1) + 5. Enqueue barrier (ensures kernel_1 and kernel_2 are completed) + 6. Enqueue kernel_3 + */ + barriers = 1, + /* No explicit syncronization is needed. Applicable for in-order queue only */ + none = 2 +}; +class ocl_stream : public stream { +public: + const ocl_queue_type& get_cl_queue() const { return _command_queue; } + + explicit ocl_stream(const ocl_engine& engine); + ocl_stream(ocl_stream&& other) + : stream(other._engine.configuration().queue_type) + , _engine(other._engine) + , _command_queue(other._command_queue) + , _queue_counter(other._queue_counter.load()) + , _last_barrier(other._last_barrier.load()) + , _events_pool(std::move(other._events_pool)) + , _last_barrier_ev(other._last_barrier_ev) + , sync_method(other.sync_method) {} + + ~ocl_stream() = default; + + void flush() const override; + void finish() const override; + + void set_arguments(kernel& kernel, const kernel_arguments_desc& args_desc, const kernel_arguments_data& args) override; + event::ptr enqueue_kernel(kernel& kernel, + const kernel_arguments_desc& args_desc, + const kernel_arguments_data& args, + std::vector const& deps, + bool is_output = false) override; + event::ptr enqueue_marker(std::vector const& deps, bool is_output) override; + event::ptr group_events(std::vector const& deps) override; + void wait_for_events(const std::vector& events) override; + void enqueue_barrier() override; + void reset_events() override; + event::ptr create_user_event(bool set) override; + event::ptr create_base_event() override; + void release_events_pool() override; + +private: + void sync_events(std::vector const& deps, bool is_output = false); + + const ocl_engine& _engine; + ocl_queue_type _command_queue; + std::atomic _queue_counter{0}; + std::atomic _last_barrier{0}; + std::shared_ptr _events_pool; + cl::Event _last_barrier_ev; + + sync_methods sync_method; +}; + +} // namespace ocl +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp similarity index 94% rename from inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp index c5663fb549d..86da8d90bbe 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ocl_user_event.h" +#include "ocl_user_event.hpp" #include -using namespace cldnn::gpu; +using namespace cldnn::ocl; void user_event::set_impl() { // we simulate "wrapper_cast" here to cast from cl::Event to cl::UserEvent which both wrap the same cl_event diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.hpp similarity index 71% rename from inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.hpp index 45e44509fac..199a8a8a260 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_user_event.hpp @@ -4,8 +4,8 @@ #pragma once -#include "ocl_base_event.h" -#include "api/profiling.hpp" +#include "cldnn/runtime/profiling.hpp" +#include "ocl_base_event.hpp" #include #include @@ -16,14 +16,15 @@ #endif namespace cldnn { -namespace gpu { +namespace ocl { struct user_event : public base_event, public cldnn::user_event { - explicit user_event(std::shared_ptr ctx) : base_event(ctx), cldnn::user_event(false) {} + explicit user_event(const cl::Context& ctx) : base_event(ctx, cl::UserEvent(ctx)), cldnn::user_event(false), _ctx(ctx) {} void set_impl() override; void attach_event(bool set) { - _event = cl::UserEvent(get_context()->context()); + // Event handle must be created again as clSetUserEventStatus can't be called twice for the same object + _event = cl::UserEvent(_ctx); // we need to reset the timer(since attach_ocl_event is called only when this object is being reused) _timer = cldnn::instrumentation::timer<>(); if (set) { @@ -36,11 +37,12 @@ struct user_event : public base_event, public cldnn::user_event { protected: cldnn::instrumentation::timer<> _timer; std::unique_ptr _duration; + const cl::Context& _ctx; }; #ifdef _WIN32 #pragma warning(pop) #endif -} // namespace gpu +} // namespace ocl } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/common/include/cl2_wrapper.h b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_wrapper.hpp similarity index 95% rename from inference-engine/thirdparty/clDNN/common/include/cl2_wrapper.h rename to inference-engine/thirdparty/clDNN/runtime/ocl/ocl_wrapper.hpp index 3beb802304c..f75779075ff 100644 --- a/inference-engine/thirdparty/clDNN/common/include/cl2_wrapper.h +++ b/inference-engine/thirdparty/clDNN/runtime/ocl/ocl_wrapper.hpp @@ -6,8 +6,7 @@ /// \file This file wraps cl2.hpp and disables temporary some warnings that this header can emit. /// -#ifndef CL2_WRAPPER_H_ -#define CL2_WRAPPER_H_ +#pragma once // Check for C++. #ifndef __cplusplus @@ -58,7 +57,7 @@ #endif -#include "cl2_ext.hpp" +#include "ocl_ext.hpp" namespace cl { namespace detail { @@ -76,5 +75,3 @@ CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #elif defined __GNUC__ #pragma GCC diagnostic pop #endif - -#endif // CL2_WRAPPER_H_ diff --git a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt index 0e4bcb0c5d9..de841fe48be 100644 --- a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt +++ b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt @@ -38,16 +38,11 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS # ========================================= Source/Header files ======================================== set(__CLDNN_Label__api "api") -file(GLOB __CLDNN_Headers__api +file(GLOB_RECURSE __CLDNN_Headers__api "${CLDNN__API_DIR}/*.h" "${CLDNN__API_DIR}/*.hpp" ) -set(__CLDNN_Label__api_extension "api_extension") -file(GLOB __CLDNN_Headers__api_extension - "${CLDNN__API_EXTENSION_DIR}/*.hpp" - ) - set(__CLDNN_Label__main "") file(GLOB __CLDNN_Sources__main "${CMAKE_CURRENT_SOURCE_DIR}/*.h" @@ -108,7 +103,6 @@ set(__CLDNN_AllSources ${__CLDNN_Headers__api} ${__CLDNN_Sources__graph_opt} ${__CLDNN_Headers__include} - ${__CLDNN_Headers__api_extension} ${__CLDNN_Sources__main} ${__CLDNN_Sources__gpu} ${__CLDNN_Sources__cache} @@ -121,7 +115,6 @@ set_property(SOURCE ${__CLDNN_Sources__cg_cache} PROPERTY GENERATED TRUE) # =============================================== Filters ============================================== source_group("${__CLDNN_Label__api}" FILES ${__CLDNN_Headers__api}) -source_group("${__CLDNN_Label__api_extension}" FILES ${__CLDNN_Headers__api_extension}) source_group("${__CLDNN_Label__include}" FILES ${__CLDNN_Headers__include}) source_group("${__CLDNN_Label__graph_opt}" FILES ${__CLDNN_Sources__graph_opt}) source_group("${__CLDNN_Label__main}" FILES ${__CLDNN_Sources__main}) @@ -135,6 +128,7 @@ source_group("${__CLDNN_Label__cg_cache}" FILES ${__CLDNN_Sources__c include_directories( "${CLDNN__MAIN_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}" + "${CLDNN__RUNTIME_DIR}" "${__CLDNN_Directory__include}" "${__CLDNN_Directory__ks_core}" "${__CLDNN_Directory__ks_core}/common" @@ -155,6 +149,7 @@ target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE clDNN_OpenCL cldnn_kernel_selector openvino::itt + cldnn_runtime ) if(COMMAND add_cpplint_target) @@ -171,3 +166,5 @@ elseif((NOT ANDROID) AND (UNIX)) target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE pthread) endif() target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE ${CLDNN__SYSTEM_LINK_LIBRARIES}) + +# ====================================================================================================== diff --git a/inference-engine/thirdparty/clDNN/src/activation.cpp b/inference-engine/thirdparty/clDNN/src/activation.cpp index dba83df3583..0987286ab4e 100644 --- a/inference-engine/thirdparty/clDNN/src/activation.cpp +++ b/inference-engine/thirdparty/clDNN/src/activation.cpp @@ -4,7 +4,7 @@ #include "activation_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp b/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp index f4a5a188e61..0d2f1f4beae 100644 --- a/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp +++ b/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp @@ -2,11 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #include "arg_max_min_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp index c4ba3ac8418..3082862f302 100644 --- a/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp +++ b/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp @@ -5,7 +5,7 @@ #include "average_unpooling_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp b/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp index 7540b49e754..aa34ab8da53 100644 --- a/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp +++ b/inference-engine/thirdparty/clDNN/src/batch_to_space.cpp @@ -5,7 +5,7 @@ #include "batch_to_space_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "data_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp index ece928a51d0..a2ffcdc02ca 100644 --- a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp @@ -8,7 +8,7 @@ #include "reorder_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/border.cpp b/inference-engine/thirdparty/clDNN/src/border.cpp index 810027192e0..cdeca46d8b1 100644 --- a/inference-engine/thirdparty/clDNN/src/border.cpp +++ b/inference-engine/thirdparty/clDNN/src/border.cpp @@ -4,7 +4,7 @@ #include "border_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/broadcast.cpp b/inference-engine/thirdparty/clDNN/src/broadcast.cpp index 790a1222fba..d6cc1506707 100644 --- a/inference-engine/thirdparty/clDNN/src/broadcast.cpp +++ b/inference-engine/thirdparty/clDNN/src/broadcast.cpp @@ -4,7 +4,7 @@ #include "broadcast_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/concatenation.cpp b/inference-engine/thirdparty/clDNN/src/concatenation.cpp index e6e622a93e9..d341566679e 100644 --- a/inference-engine/thirdparty/clDNN/src/concatenation.cpp +++ b/inference-engine/thirdparty/clDNN/src/concatenation.cpp @@ -4,7 +4,7 @@ #include "concatenation_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/condition.cpp b/inference-engine/thirdparty/clDNN/src/condition.cpp index f6b2d237392..c31e6cb80e7 100644 --- a/inference-engine/thirdparty/clDNN/src/condition.cpp +++ b/inference-engine/thirdparty/clDNN/src/condition.cpp @@ -4,7 +4,7 @@ #include "condition_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include @@ -71,10 +71,8 @@ Condition primitive is resuing memory with the input. */ condition_inst::typed_primitive_inst(network_impl& network, condition_node const& node) : parent(network, node), - _net_true( - node.get_program().get_engine().allocate_network(*node.get_branch_true(), true)), - _net_false( - node.get_program().get_engine().allocate_network(*node.get_branch_false(), true)) { + _net_true(network_impl::allocate_network(node.get_program().get_engine(), node.get_branch_true(), true)), + _net_false(network_impl::allocate_network(node.get_program().get_engine(), node.get_branch_false(), true)) { auto compare_tensor = node.compare().get_output_layout().size; auto input_tensor = node.input().get_output_layout().size; CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(), diff --git a/inference-engine/thirdparty/clDNN/src/convolution.cpp b/inference-engine/thirdparty/clDNN/src/convolution.cpp index 5b49b614f67..c145dfc339c 100644 --- a/inference-engine/thirdparty/clDNN/src/convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/convolution.cpp @@ -7,7 +7,7 @@ #include "convolution_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/crop.cpp b/inference-engine/thirdparty/clDNN/src/crop.cpp index 4dbcc9ce247..7d35ad1e04f 100644 --- a/inference-engine/thirdparty/clDNN/src/crop.cpp +++ b/inference-engine/thirdparty/clDNN/src/crop.cpp @@ -4,8 +4,8 @@ #include "crop_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/cum_sum.cpp b/inference-engine/thirdparty/clDNN/src/cum_sum.cpp index ebb176ec745..b7e0b144b90 100644 --- a/inference-engine/thirdparty/clDNN/src/cum_sum.cpp +++ b/inference-engine/thirdparty/clDNN/src/cum_sum.cpp @@ -5,7 +5,7 @@ #include "cum_sum_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/data.cpp b/inference-engine/thirdparty/clDNN/src/data.cpp index 7d0d89ea3d9..c08b6a28ce3 100644 --- a/inference-engine/thirdparty/clDNN/src/data.cpp +++ b/inference-engine/thirdparty/clDNN/src/data.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "data_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" +#include "cldnn/runtime/memory.hpp" #include "json_object.h" #include @@ -19,28 +19,28 @@ primitive_type_id data::type_id() { } namespace { -memory_impl::ptr attach_or_copy_data(network_impl& network, memory_impl& mem) { +memory::ptr attach_or_copy_data(network_impl& network, memory::ptr mem) { auto& engine = network.get_engine(); - if (mem.is_allocated_by(engine)) - return (memory_impl::ptr) &mem; + if (mem->is_allocated_by(engine)) + return mem; - memory_impl::ptr result = engine.allocate_memory(mem.get_layout(), network.get_id(), false); - mem_lock src(mem); - mem_lock dst(result); + memory::ptr result = engine.allocate_memory(mem->get_layout(), false); + mem_lock src(mem, network.get_stream()); + mem_lock dst(result, network.get_stream()); std::copy(src.begin(), src.end(), dst.begin()); return result; } } // namespace data_node::typed_program_node(const std::shared_ptr dprim, program_impl& prog) - : parent(dprim, prog), mem(dprim->mem.get()) { + : parent(dprim, prog), mem(dprim->mem) { constant = true; can_share_buffer(false); recalc_output_layout(false); } -void data_node::attach_memory(memory_impl& new_mem, bool invalidate_users_if_changed) { - mem = (memory_impl::ptr) &new_mem; +void data_node::attach_memory(memory::ptr new_mem, bool invalidate_users_if_changed) { + mem = new_mem; recalc_output_layout(invalidate_users_if_changed); } @@ -54,6 +54,6 @@ std::string data_inst::to_string(data_node const& node) { } data_inst::typed_primitive_inst(network_impl& network, data_node const& node) - : parent(network, node, *attach_or_copy_data(network, node.get_attached_memory())) {} + : parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr())) {} } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp index 9642e3e3289..6ddc0373f5f 100644 --- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp @@ -6,7 +6,7 @@ #include "deconvolution_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp b/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp index d3e04b2e1c8..237ed6929f6 100644 --- a/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/deformable_convolution.cpp @@ -6,7 +6,7 @@ #include "deformable_convolution_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp b/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp index 3c8622f6b8a..09f16b6f7c6 100644 --- a/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp +++ b/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp @@ -5,7 +5,7 @@ #include "depth_to_space_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/detection_output.cpp index 78a770caa54..c4e0fa86e2b 100644 --- a/inference-engine/thirdparty/clDNN/src/detection_output.cpp +++ b/inference-engine/thirdparty/clDNN/src/detection_output.cpp @@ -5,7 +5,7 @@ #include "detection_output_inst.h" #include "primitive_type_base.h" #include "network_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/device.cpp b/inference-engine/thirdparty/clDNN/src/device.cpp deleted file mode 100644 index b3470db4ff9..00000000000 --- a/inference-engine/thirdparty/clDNN/src/device.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "device_impl.h" -#include "gpu/ocl_builder.h" - -#include -#include - -namespace cldnn { - -device device::create_default() { - device_query query; - auto devices = query.get_available_devices(); - // ToDo Maybe some heuristic should be added to decide what device is the default? (i.e number of EUs) - return devices.begin()->second; -} - -device_info device::get_info() const { - return _impl->get_info().convert_to_api(); -} - -void device::retain() { - _impl->add_ref(); -} -void device::release() { - _impl->release(); -} - -// --- device query --- -device_query::device_query(void* clcontext, void* user_device) - : _impl(new device_query_impl(clcontext, user_device)) { -} - -std::map device_query::get_available_devices() const { - std::map ret; - auto device_list = _impl->get_available_devices(); - for (auto dev : device_list) { - ret.insert({ dev.first, device(dev.second.detach())}); - } - return ret; -} - -void device_query::retain() { - _impl->add_ref(); -} -void device_query::release() { - _impl->release(); -} - -// --- device query impl --- -device_query_impl::device_query_impl(void* user_context, void* user_device) { - gpu::ocl_builder builder; - _available_devices = builder.get_available_devices(user_context, user_device); -} -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/eltwise.cpp index 760b504a013..a7f544a3a75 100644 --- a/inference-engine/thirdparty/clDNN/src/eltwise.cpp +++ b/inference-engine/thirdparty/clDNN/src/eltwise.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "eltwise_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp b/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp index 951ff190049..84e5483dff0 100644 --- a/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp +++ b/inference-engine/thirdparty/clDNN/src/embedding_bag.cpp @@ -5,7 +5,7 @@ #include "embedding_bag_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/engine.cpp b/inference-engine/thirdparty/clDNN/src/engine.cpp deleted file mode 100644 index e64c0051827..00000000000 --- a/inference-engine/thirdparty/clDNN/src/engine.cpp +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "engine_impl.h" -#include "event_impl.h" -#include "program_impl.h" -#include "network_impl.h" -#include "gpu/ocl_toolkit.h" -#include "gpu/memory_gpu.h" -#include "gpu/ocl_user_event.h" -#include "gpu/register_gpu.hpp" -#include -#include -#include -#include -#include - -namespace cldnn { - -engine::engine(engine_types type, const device& dev, const engine_configuration& configuration) - : _impl(new engine_impl(*dev.get(), configuration)) { - if (type != engine_types::ocl) - throw std::invalid_argument("Invalid engine type, should be ocl."); -} - -uint32_t engine::engine_count(engine_types type) { - if (type == engine_types::ocl) { - return 1; - } else { - return 0; - } -} - -void engine::release_pending_memory(uint32_t net_id) const { - _impl->release_pending_memory(net_id); -} - -device_info engine::get_info() const { - auto info = _impl->get_device_info(); - return info.convert_to_api(); -} - -void* engine::get_context() const { - return _impl->get_user_context(); -} - -uint64_t engine::get_max_used_device_memory_size() const { - return _impl->get_max_used_device_memory(); -} - -uint64_t engine::get_temp_used_device_memory_size() const { - return _impl->get_used_device_memory(); -} - -engine_types engine::get_type() const { - return _impl->type(); -} - -void engine::retain() { - _impl->add_ref(); -} -void engine::release() { - _impl->release(); -} - -using gpu_toolkit_config = gpu::configuration; - -gpu_toolkit_config convert_configuration(const engine_configuration conf) { - gpu_toolkit_config result; - result.compiler_options = conf.compiler_options; - result.enable_profiling = conf.enable_profiling != 0; - result.meaningful_kernels_names = conf.meaningful_kernels_names != 0; - result.dump_custom_program = conf.dump_custom_program != 0; - result.single_kernel_name = conf.single_kernel_name; - result.host_out_of_order = true; - result.use_unifed_shared_memory = true; // Switch on/off USM. - result.log = conf.engine_log; - result.ocl_sources_dumps_dir = conf.sources_dumps_dir; - result.priority_mode = conf.priority_mode; - result.throttle_mode = conf.throttle_mode; - result.queues_num = conf.n_streams; - result.kernels_cache_path = conf.kernels_cache_path; - result.tuning_cache_path = conf.tuning_cache_path; - result.n_threads = conf.n_threads; - return result; -} - -engine_impl::engine_impl(const device_impl& dev, const engine_configuration& conf) - : _configuration(conf), _context(gpu_toolkit::create(dev, convert_configuration(conf))), _memory_pool(*this) { - gpu::register_implementations_gpu(); -} - -engine_impl::~engine_impl() { - /* - Engine, which is main owner of context deallocate events pool manually, because - of the event_impl <-> gpu_toolkit dependencies. - */ - _context->release_all_events_pools(); -} - -memory_impl::ptr engine_impl::allocate_memory(const layout& layout, uint32_t net_id, bool reset) { - allocation_type type = get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d()); - return _memory_pool.get_memory(layout, type, net_id, reset); -} - -memory_impl::ptr engine_impl::allocate_memory(const layout& layout, allocation_type type, uint32_t net_id, bool reset) { - return _memory_pool.get_memory(layout, type, net_id, reset); -} - -memory_impl::ptr engine_impl::allocate_memory(const layout& layout, - primitive_id id, - uint32_t network_id, - std::set dependencies, - allocation_type type, - bool reusable) { - if (use_memory_pool()) - return _memory_pool.get_memory(layout, id, network_id, dependencies, type, reusable); - return _memory_pool.get_memory(layout, type, network_id); -} - -memory_impl::ptr engine_impl::reinterpret_buffer(const memory_impl& memory, const layout& new_layout) { - if (memory.get_engine() != (const refcounted_obj_ptr) this) - throw std::runtime_error("trying to reinterpret buffer allocated by a different engine"); - - if (new_layout.format.is_image() && !memory.get_layout().format.is_image()) - throw std::runtime_error("trying to reinterpret non-image buffer as image"); - - if (!new_layout.format.is_image() && memory.get_layout().format.is_image()) - throw std::runtime_error("trying to reinterpret image buffer as non-image buffer"); - - try { - if (new_layout.format.is_image_2d()) { - memory_impl::ptr mem_impl { - new gpu::gpu_image2d((refcounted_obj_ptr) this, - new_layout, - reinterpret_cast(memory).get_buffer(), - memory.get_net_id()), - false }; - return mem_impl; - } else if (memory_capabilities::is_usm_type(memory.get_allocation_type())) { - memory_impl::ptr mem_impl{ - new gpu::gpu_usm((refcounted_obj_ptr) this, - new_layout, - reinterpret_cast(memory).get_buffer(), - memory.get_allocation_type(), - memory.get_net_id()), - false }; - return mem_impl; - } else { - memory_impl::ptr mem_impl { - new gpu::gpu_buffer((refcounted_obj_ptr) this, - new_layout, - reinterpret_cast(memory).get_buffer(), - memory.get_net_id()), - false}; - return mem_impl; - } - } catch (cl::Error const& err) { - throw gpu::ocl_error(err); - } -} - -memory_impl::ptr engine_impl::reinterpret_handle(const layout& new_layout, - const shared_mem_params* params, - uint32_t net_id) { - return _memory_pool.get_memory(new_layout, params, net_id); -} - - -bool engine_impl::is_the_same_buffer(const memory_impl& mem1, const memory_impl& mem2) { - if (mem1.get_engine() != (refcounted_obj_ptr)this || mem2.get_engine() != (refcounted_obj_ptr) this) - return false; - if (mem1.get_net_id() != mem2.get_net_id()) - return false; - if (mem1.get_allocation_type() != mem2.get_allocation_type()) - return false; - if (&mem1 == &mem2) - return true; - - if (!memory_capabilities::is_usm_type(mem1.get_allocation_type())) - return (reinterpret_cast(mem1).get_buffer() == - reinterpret_cast(mem2).get_buffer()); - else - return (reinterpret_cast(mem1).get_buffer() == - reinterpret_cast(mem2).get_buffer()); -} - -event_impl::ptr engine_impl::create_user_event(uint32_t net_id, bool set) { - try { - return _context->create_user_event(net_id, set); - } catch (cl::Error const& err) { - throw gpu::ocl_error(err); - } -} - -void engine_impl::flush_network(uint32_t net_id) { get_context()->flush(net_id); } - -void engine_impl::release_pending_memory(uint32_t net_id) { get_context()->release_pending_memory(net_id); } - -program_impl::ptr engine_impl::build_program(const topology_impl& topology, - const build_options& options, - bool is_internal, - bool no_optimizations) { - program_impl::ptr progr_impl{ new program_impl(*this, topology, options, is_internal, no_optimizations), false }; - return progr_impl; -} - -program_impl::ptr engine_impl::build_program(const std::set>& nodes, - const build_options& options, - bool is_internal) { - program_impl::ptr progr_impl{ new program_impl(*this, nodes, options, is_internal), false }; - return progr_impl; -} - -network_impl::ptr engine_impl::build_network(const topology_impl& topology, - const build_options& options, - uint16_t stream_id, - bool is_internal) { - network_impl::ptr netw_impl{ new network_impl(*this, topology, options, stream_id, is_internal), false }; - return netw_impl; -} - -network_impl::ptr engine_impl::build_network(const std::set>& nodes, - const build_options& options, - bool is_internal) { - network_impl::ptr netw_impl{ new network_impl(*this, nodes, options, is_internal), false }; - return netw_impl; -} - -network_impl::ptr engine_impl::allocate_network(const program_impl& program, uint16_t stream_id, bool is_internal) { - if (stream_id >= _configuration.n_streams) - throw std::invalid_argument("Unable to create network with stream_id=" + std::to_string(stream_id)); - network_impl::ptr netw_impl{ new network_impl(program, stream_id, is_internal), false }; - return netw_impl; -} - -void engine_impl::wait_for_events(std::vector const& events) { - if (!events.empty()) - _context->wait_for_events(events); -} - -gpu::device_info_internal engine_impl::get_device_info() const { return _context->get_device_info(); } - -void* engine_impl::get_user_context() const { return static_cast(_context->context().get()); } - -void engine_impl::compile_program(program_impl& program) { - auto& cache = _context->get_kernels_cache(program.get_id()); - if (!program.get_options().get()->serialization_network_name.empty()) - cache.get_context().set_serialization_flag(true); - // TODO: better compilation logic instead of a simple 'compile all'? - cache.build_all(); -} - -bool engine_impl::use_memory_pool() const { - if (configuration().enable_memory_pool && get_context()->is_neo_driver()) { - return true; - } - return false; -} - -bool engine_impl::use_unified_shared_memory() const { - if (get_context()->memory_caps().supports_usm() && get_context()->get_configuration().use_unifed_shared_memory) { - return true; - } - return false; -} - -bool engine_impl::supports_allocation(allocation_type type) const { - if (memory_capabilities::is_usm_type(type) && !use_unified_shared_memory()) - return false; - if (allocation_type::usm_shared == type) - return false; - return get_context()->memory_caps().support_allocation_type(type); -} - -allocation_type engine_impl::get_lockable_preffered_memory_allocation_type(bool is_image_layout) const { - if (!use_unified_shared_memory() || is_image_layout) - return allocation_type::cl_mem; - - /* - We do not check device allocation here. - Device allocation is reserved for buffers of hidden layers. - Const buffers are propagated to device if possible. - */ - - bool support_usm_host = supports_allocation(allocation_type::usm_host); - bool support_usm_shared = supports_allocation(allocation_type::usm_shared); - - if (support_usm_shared) - return allocation_type::usm_shared; - if (support_usm_host) - return allocation_type::usm_host; - - throw std::runtime_error("[clDNN internal error] Could not find proper allocation type!"); -} -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/error_handler.cpp b/inference-engine/thirdparty/clDNN/src/error_handler.cpp index 1e41407b27d..bb7ce522412 100644 --- a/inference-engine/thirdparty/clDNN/src/error_handler.cpp +++ b/inference-engine/thirdparty/clDNN/src/error_handler.cpp @@ -3,7 +3,7 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/event.cpp b/inference-engine/thirdparty/clDNN/src/event.cpp deleted file mode 100644 index 48b6dfaff31..00000000000 --- a/inference-engine/thirdparty/clDNN/src/event.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/event.hpp" -#include "event_impl.h" -#include "engine_impl.h" -#include -#include -#include - -namespace cldnn { - -event event::create_user_event(const engine& engine, uint32_t net_id) { - return event(engine.get()->create_user_event(net_id).detach()); -} - -void event::wait() const { - _impl->wait(); -} - -void event::set() const { - if (auto user_ev = dynamic_cast(_impl)) - user_ev->set(); - else - throw std::invalid_argument("Event passed to cldnn_set_event should be an user event"); -} - -void event::set_event_handler(event_handler handler, void* param) const { - _impl->add_event_handler(handler, param); -} - -std::vector event::get_profiling_info() const { - auto interval_list = _impl->get_profiling_info(); - std::vector result(interval_list.size()); - std::copy(interval_list.begin(), interval_list.end(), result.begin()); - return result; -} - -void event::retain() { - _impl->add_ref(); -} - -void event::release() { - _impl->release(); -} - -void event_impl::wait() { - if (_set) - return; - - // TODO: refactor in context of multiple simultaneous calls (for generic engine) - wait_impl(); - _set = true; - return; -} - -bool event_impl::is_set() { - if (_set) - return true; - - // TODO: refactor in context of multiple simultaneous calls (for generic engine) - _set = is_set_impl(); - return _set; -} - -bool event_impl::add_event_handler(event_handler handler, void* data) { - if (is_set()) { - handler(data); - return true; - } - - std::lock_guard lock(_handlers_mutex); - auto itr = _handlers.insert(_handlers.end(), {handler, data}); - auto ret = add_event_handler_impl(handler, data); - if (!ret) - _handlers.erase(itr); - - return ret; -} - -const std::list& event_impl::get_profiling_info() { - if (_profiling_captured) - return _profiling_info; - - _profiling_captured = get_profiling_info_impl(_profiling_info); - return _profiling_info; -} - -void event_impl::call_handlers() { - std::lock_guard lock(_handlers_mutex); - for (auto& pair : _handlers) { - try { - pair.first(pair.second); - } catch (...) { - } - } - _handlers.clear(); -} - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp index a4cc4ff1fe6..fffa064d13b 100644 --- a/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp +++ b/inference-engine/thirdparty/clDNN/src/extract_image_patches.cpp @@ -5,7 +5,7 @@ #include "extract_image_patches_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/fully_connected.cpp b/inference-engine/thirdparty/clDNN/src/fully_connected.cpp index 4c6aeabe80e..71130b19a18 100644 --- a/inference-engine/thirdparty/clDNN/src/fully_connected.cpp +++ b/inference-engine/thirdparty/clDNN/src/fully_connected.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "fully_connected_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp index dc4db14393b..9d11b1ad4c5 100644 --- a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp +++ b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp @@ -6,7 +6,7 @@ #include "fused_conv_eltwise_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/gather.cpp b/inference-engine/thirdparty/clDNN/src/gather.cpp index c688ea19823..5a264d2505b 100644 --- a/inference-engine/thirdparty/clDNN/src/gather.cpp +++ b/inference-engine/thirdparty/clDNN/src/gather.cpp @@ -5,7 +5,7 @@ #include "gather_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/gather_nd.cpp b/inference-engine/thirdparty/clDNN/src/gather_nd.cpp index f01d82dffa3..998df5a76b2 100644 --- a/inference-engine/thirdparty/clDNN/src/gather_nd.cpp +++ b/inference-engine/thirdparty/clDNN/src/gather_nd.cpp @@ -1,23 +1,11 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ #include "gather_nd_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/gather_tree.cpp b/inference-engine/thirdparty/clDNN/src/gather_tree.cpp index 98b687be5b7..6610df3523c 100644 --- a/inference-engine/thirdparty/clDNN/src/gather_tree.cpp +++ b/inference-engine/thirdparty/clDNN/src/gather_tree.cpp @@ -4,7 +4,7 @@ #include "gather_tree_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/gemm.cpp b/inference-engine/thirdparty/clDNN/src/gemm.cpp index 1c3191b87d9..2652116b90a 100644 --- a/inference-engine/thirdparty/clDNN/src/gemm.cpp +++ b/inference-engine/thirdparty/clDNN/src/gemm.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "gemm_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp index 7ea703cab3b..9a90f972752 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp @@ -5,11 +5,10 @@ #include "activation_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "activation/activation_kernel_selector.h" #include "activation/activation_kernel_base.h" -#include "api/activation.hpp" #include "register_gpu.hpp" namespace cldnn { @@ -19,12 +18,15 @@ struct activation_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + std::unique_ptr clone() const override { + return make_unique(*this); + } + + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); if (_outer.is_parameterized()) { - args.slope = (memory_impl::cptr) &instance.slope_memory(); + args.slope = instance.slope_memory(); } return args; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp index 3b898761653..7717ea901de 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp @@ -5,7 +5,7 @@ #include "arg_max_min_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "arg_max_min/arg_max_min_kernel_selector.h" #include "arg_max_min/arg_max_min_kernel_base.h" @@ -18,10 +18,13 @@ struct arg_max_min_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, 0); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args = parent::get_arguments(instance, 0); if (args.inputs.size() == 3) { args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K diff --git a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp index 4ed6597d63a..55ad712d4d8 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp @@ -5,7 +5,7 @@ #include "average_unpooling_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "average_unpooling/average_unpooling_kernel_selector.h" #include "average_unpooling/average_unpooling_kernel_base.h" @@ -17,10 +17,13 @@ struct average_unpooling_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp index 820e6256167..d2ef8e70df9 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "batch_to_space/batch_to_space_kernel_selector.h" #include "batch_to_space/batch_to_space_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "data_inst.h" #include @@ -20,6 +20,10 @@ struct batch_to_space_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const batch_to_space_node& arg) { auto batch_to_space_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp index 8800cc2c5ad..c9b4c78eee5 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include "cldnn/primitives/scale.hpp" +#include "cldnn/primitives/quantize.hpp" #include "binary_convolution_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "kernel_runner.h" #include "kernel_selector/core/actual_kernels/binary_convolution/binary_convolution_kernel_selector.h" @@ -22,6 +22,10 @@ struct binary_convolution_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: bool validate_impl(const typed_primitive_inst& instance) const override { bool res = true; @@ -40,17 +44,16 @@ protected: "Input memory", data_type, "filter memory", - instance.weights_memory(0).get_layout().data_type, + instance.weights_memory(0)->get_layout().data_type, ""); return res; } - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); - args.weights = (memory_impl::cptr) &instance.weights_memory(split); + args.weights = instance.weights_memory(split); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp index 33985b48c97..01c1d54ad10 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp @@ -9,7 +9,7 @@ #include "kernel_selector_helper.h" #include "border/border_kernel_selector.h" #include "border/border_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -18,6 +18,10 @@ struct border_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const border_node& arg) { auto b_params = get_default_params(arg, 1); auto b_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp index a3fa9715eb9..1c07420d7a9 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp @@ -9,7 +9,7 @@ #include "kernel_selector_helper.h" #include "broadcast/broadcast_kernel_selector.h" #include "broadcast/broadcast_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -18,6 +18,10 @@ struct broadcast_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const broadcast_node& arg) { auto bc_params = get_default_params(arg, 1); auto bc_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp index b2abee8ab58..590e97a2a8d 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp @@ -5,8 +5,7 @@ #include "concatenation_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "events_waiter.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "concatenation/concatenation_kernel_selector.h" #include "concatenation/concatenation_kernel_base.h" @@ -40,6 +39,10 @@ kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis struct concatenation_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; + std::unique_ptr clone() const override { + return make_unique(*this); + } + concatenation_gpu(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) { if (!_outer.can_be_optimized()) { CLDNN_ERROR_NOT_EQUAL(_outer.id(), diff --git a/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp index e3df38f7229..2f84605cc19 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp @@ -17,23 +17,27 @@ namespace gpu { struct condition_gpu : typed_primitive_impl { const condition_node& outer; + std::unique_ptr clone() const override { + return make_unique(*this); + } + explicit condition_gpu(const condition_node& outer) : outer(outer) {} - event_impl::ptr execute_impl(const std::vector& events, condition_inst& instance) override { + event::ptr execute_impl(const std::vector& events, condition_inst& instance) override { for (auto& a : events) { a->wait(); } - auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false); + auto ev = instance.get_network().get_stream().create_user_event(false); bool exec_branch = choose_branch_to_exec(instance); - memory_impl::ptr memory_to_copy; + memory::ptr memory_to_copy; if (exec_branch) - memory_to_copy = (memory_impl::ptr) &execute_branch(instance.get_net_true(), instance.result_id(), instance.input_memory()); + memory_to_copy = execute_branch(instance.get_net_true(), instance.result_id(), instance.input_memory_ptr()); else - memory_to_copy = (memory_impl::ptr) &execute_branch(instance.get_net_false(), instance.result_id(), instance.input_memory()); + memory_to_copy = execute_branch(instance.get_net_false(), instance.result_id(), instance.input_memory_ptr()); // just copy memory - mem_lock inp_ptr{memory_to_copy}; - mem_lock out_ptr{instance.output_memory()}; + mem_lock inp_ptr{memory_to_copy, instance.get_network().get_stream()}; + mem_lock out_ptr{instance.output_memory_ptr(), instance.get_network().get_stream()}; std::copy(inp_ptr.begin(), inp_ptr.end(), out_ptr.begin()); dynamic_cast(ev.get())->set(); // set as complete return ev; @@ -41,6 +45,8 @@ struct condition_gpu : typed_primitive_impl { static primitive_impl* create(const condition_node& arg) { return new condition_gpu(arg); } + void init_kernels() override {} + private: /* Add functions here. @@ -67,11 +73,11 @@ private: Returns boolean flag, which says what branch should be executed. */ bool choose_branch_to_exec(condition_inst& instance) const { - mem_lock lock_compare_data{instance.compare_memory()}; + mem_lock lock_compare_data{instance.compare_memory_ptr(), instance.get_network().get_stream()}; auto compare_layout = instance.compare_memory().get_layout(); auto compare_ptr = lock_compare_data.begin(); - mem_lock lock_input{instance.input_memory()}; + mem_lock lock_input{instance.input_memory_ptr(), instance.get_network().get_stream()}; auto input_layout = instance.input_memory().get_layout(); auto input_ptr = lock_input.begin(); @@ -101,12 +107,12 @@ private: return true; } - memory_impl& execute_branch(network_impl::ptr branch, - const primitive_id& input_id, - memory_impl& input_memory) const { + memory::ptr execute_branch(network_impl::ptr branch, + const primitive_id& input_id, + memory::ptr input_memory) const { branch->set_input_data(input_id, input_memory); branch->execute({}); - return branch->get_outputs().at(0)->output_memory(); + return branch->get_outputs().at(0)->output_memory_ptr(); } }; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp b/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp deleted file mode 100644 index 2536bf0b0c1..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "configuration.h" -#include - -namespace cldnn { -namespace gpu { - -configuration::configuration() - : enable_profiling(false), - meaningful_kernels_names(false), - dump_custom_program(false), - host_out_of_order(true), - use_unifed_shared_memory(false), - compiler_options(""), - single_kernel_name(""), - log(""), - ocl_sources_dumps_dir(""), - priority_mode(priority_mode_types::disabled), - throttle_mode(throttle_mode_types::disabled), - queues_num(0), - tuning_cache_path("cache.json"), - kernels_cache_path(""), - n_threads(std::max(static_cast(std::thread::hardware_concurrency()), static_cast(1))) {} -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/configuration.h b/inference-engine/thirdparty/clDNN/src/gpu/configuration.h deleted file mode 100644 index f0792ad8dac..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/configuration.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include -#include "api/cldnn.hpp" -#include "api/engine.hpp" - -namespace cl { -class Context; -} -namespace cldnn { -namespace gpu { - -struct configuration { - configuration(); - - bool enable_profiling; - bool meaningful_kernels_names; - bool dump_custom_program; - bool host_out_of_order; - bool use_unifed_shared_memory; - std::string compiler_options; - std::string single_kernel_name; - std::string log; - std::string ocl_sources_dumps_dir; - priority_mode_types priority_mode; - throttle_mode_types throttle_mode; - uint16_t queues_num; - std::string tuning_cache_path; - std::string kernels_cache_path; - uint16_t n_threads; -}; -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp index 1c37c207303..63065d8be87 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp @@ -6,7 +6,7 @@ #include "eltwise_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "kernel_runner.h" #include "convolution/convolution_kernel_selector.h" @@ -21,6 +21,10 @@ struct convolution_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: bool validate_impl(const typed_primitive_inst& instance) const override { bool res = true; @@ -33,26 +37,20 @@ protected: "Input memory", data_type, "filter memory", - instance.weights_memory(0).get_layout().data_type, + instance.weights_memory(0)->get_layout().data_type, ""); return res; } - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); - args.weights = (memory_impl::cptr) &instance.weights_memory(split); - args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory(split) : nullptr); - args.weights_zero_points = (memory_impl::cptr) (instance.weights_zero_points_term() ? &instance.weights_zero_points_memory(split) - : nullptr); - args.activations_zero_points = (memory_impl::cptr) (instance.activations_zero_points_term() - ? &instance.activations_zero_points_memory(split) - : nullptr); - args.compensation = (memory_impl::cptr) (instance.compensation_term() - ? &instance.compensation_memory(split) - : nullptr); + args.weights = instance.weights_memory(split); + args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr; + args.weights_zero_points = instance.weights_zero_points_term() ? instance.weights_zero_points_memory(split) : nullptr; + args.activations_zero_points = instance.activations_zero_points_term() ? instance.activations_zero_points_memory(split) : nullptr; + args.compensation = instance.compensation_term() ? instance.compensation_memory(split) : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp index 0a4498973d5..07244dde5ae 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "eltwise/eltwise_kernel_selector.h" #include "eltwise/eltwise_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -17,6 +17,10 @@ struct crop_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: bool optimized_out(crop_inst& instance) const override { return parent::optimized_out(instance) || _outer.can_be_optimized(); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp index 1161e4aee48..4c6c7f39c3c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp @@ -5,7 +5,7 @@ #include "ctc_greedy_decoder_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "ctc_greedy_decoder/ctc_greedy_decoder_kernel_selector.h" #include "ctc_greedy_decoder/ctc_greedy_decoder_kernel_base.h" @@ -21,6 +21,10 @@ struct ctc_greedy_decoder_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const ctc_greedy_decoder_node& arg) { auto ctc_gd_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp index d6d82ae2055..82e44cf07a3 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "cum_sum/cum_sum_kernel_selector.h" #include "cum_sum/cum_sum_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -40,6 +40,10 @@ struct cum_sum_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const cum_sum_node& arg) { auto cum_sum_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp index 493f40203cd..eaecee357b6 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp @@ -3,13 +3,12 @@ // #include "custom_gpu_primitive_inst.h" -#include "kernel.h" +#include "cldnn/runtime/engine.hpp" #include "implementation_map.h" #include "kernel_selector_helper.h" #include "network_impl.h" -#include "engine_impl.h" #include "jitter.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "register_gpu.hpp" #include @@ -28,37 +27,52 @@ namespace neural { struct custom_gpu_primitive_gpu : typed_primitive_impl { const custom_gpu_primitive_node& outer; std::shared_ptr cl_kernel; - gpu::kernel _kernel; + std::vector _kernels; + kernel_id _kernel_id; + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + custom_gpu_primitive_gpu(const custom_gpu_primitive_gpu& other) + : outer(other.outer) + , cl_kernel(other.cl_kernel) + , _kernels({}) + , _kernel_id(other._kernel_id) { + _kernels.emplace_back(std::move(outer.get_program().get_kernel(_kernel_id)->clone())); + } custom_gpu_primitive_gpu(const custom_gpu_primitive_node& arg, std::shared_ptr& cl_kernel) - : outer(arg), - cl_kernel(cl_kernel), - _kernel(arg.get_program().get_engine().get_context(), - cl_kernel->kernelString, - arg.get_program().get_id(), - arg.get_program().get_engine().get_context()->get_configuration().dump_custom_program) {} + : outer(arg) + , cl_kernel(cl_kernel) + , _kernels() { + _kernel_id = outer.get_program().add_kernel(cl_kernel->code.kernelString); + } + + void init_kernels() override { + _kernels.emplace_back(std::move(outer.get_program().get_kernel(_kernel_id))); + } void set_arguments_impl(custom_gpu_primitive_inst& instance) override { - auto net_id = instance.get_network().get_id(); - gpu::kernel::kernel_arguments_data args; + auto& stream = instance.get_network().get_stream(); + kernel_arguments_data args; for (auto& dep : instance.dependencies()) { - args.inputs.push_back((memory_impl::cptr) &(dep->output_memory())); + args.inputs.push_back(dep->output_memory_ptr()); } - args.output = (memory_impl::cptr) &instance.output_memory(); - _kernel.set_arguments(net_id, *cl_kernel.get(), args); + args.output = instance.output_memory_ptr(); + stream.set_arguments(*_kernels.front(), cl_kernel.get()->params, args); } - void cleanup_impl(custom_gpu_primitive_inst& instance) override { - auto net_id = instance.get_network().get_id(); - _kernel.cleanup(net_id); - } - - event_impl::ptr execute_impl(const std::vector& events, + event::ptr execute_impl(const std::vector& events, custom_gpu_primitive_inst& instance) override { - auto net_id = instance.get_network().get_id(); - _kernel.set_output_event(net_id, instance.node.is_output()); - return _kernel.run(net_id, *cl_kernel.get(), events); + auto& stream = instance.get_network().get_stream(); + kernel_arguments_data args; + for (auto& dep : instance.dependencies()) { + args.inputs.push_back(dep->output_memory_ptr()); + } + args.output = instance.output_memory_ptr(); + return stream.enqueue_kernel(*_kernels.front(), cl_kernel.get()->params, args, events, instance.node.is_output()); } }; @@ -195,19 +209,19 @@ static primitive_impl* create(const custom_gpu_primitive_node& arg) { const auto primitive = arg.get_primitive().get(); auto cl_kernel = std::make_shared(); - cl_kernel->kernelString = std::make_shared(); - cl_kernel->kernelString->entry_point = primitive->kernel_entry_point; - cl_kernel->kernelString->options = primitive->build_options; - cl_kernel->kernelString->jit = get_jit_constant(arg); + cl_kernel->code.kernelString = std::make_shared(); + cl_kernel->code.kernelString->entry_point = primitive->kernel_entry_point; + cl_kernel->code.kernelString->options = primitive->build_options; + cl_kernel->code.kernelString->jit = get_jit_constant(arg); for (const auto& s : primitive->kernels_code) { - cl_kernel->kernelString->str += s + "\n"; + cl_kernel->code.kernelString->str += s + "\n"; } - cl_kernel->workGroups.global = primitive->gws; - cl_kernel->workGroups.local = primitive->lws; + cl_kernel->params.workGroups.global = primitive->gws; + cl_kernel->params.workGroups.local = primitive->lws; for (const auto& p : primitive->kernel_arguments) { - cl_kernel->arguments.push_back(get_arg(p)); + cl_kernel->params.arguments.push_back(get_arg(p)); } return new custom_gpu_primitive_gpu(arg, cl_kernel); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp index 352bdf00d5d..9c1532ecfd4 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp @@ -5,7 +5,7 @@ #include "deconvolution_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "deconvolution/deconvolution_kernel_selector.h" #include "deconvolution/deconvolution_kernel_base.h" @@ -18,6 +18,10 @@ struct deconvolution_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: // TODO: share it with convolution and fully connected bool validate_impl(const typed_primitive_inst&) const override { @@ -33,12 +37,11 @@ protected: return res; } - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); - args.weights = (memory_impl::cptr) &instance.weights_memory(split); - args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory(split) : nullptr); + args.weights = instance.weights_memory(split); + args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp index 897f1a07a76..f2b61a505e2 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp @@ -5,7 +5,7 @@ #include "deformable_convolution_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "kernel_runner.h" #include "convolution/convolution_kernel_selector.h" @@ -19,13 +19,16 @@ struct deformable_conv_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; -protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + std::unique_ptr clone() const override { + return make_unique(*this); + } - args.weights = (memory_impl::cptr) &instance.weights_memory(split); - args.bias = memory_impl::cptr(instance.bias_term() ? &instance.bias_memory(split) : nullptr); +protected: + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); + + args.weights = instance.weights_memory(split); + args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr; return args; } @@ -78,6 +81,10 @@ struct deformable_interp_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: int32_t get_split() const override { return 1; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp index fc3c5dd9f56..3a62c9e0a9c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "depth_to_space/depth_to_space_kernel_selector.h" #include "depth_to_space/depth_to_space_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "common_types.h" using namespace cldnn; @@ -19,6 +19,10 @@ struct depth_to_space_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const depth_to_space_node& arg) { auto depth_to_space_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp index dfe22f193d6..9673270ab26 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp @@ -3,7 +3,6 @@ // #include "detection_output_inst.h" -#include "kernel.h" #include "network_impl.h" #include "implementation_map.h" #include "math_utils.h" @@ -37,6 +36,9 @@ struct detection_output_cpu : typed_primitive_impl { const detection_output_node& outer; NMSType nms_type; + std::unique_ptr clone() const override { + return make_unique(*this); + } explicit detection_output_cpu(const detection_output_node& outer) : outer(outer) , nms_type(outer.get_primitive()->decrease_label_id ? MXNET : CAFFE) {} @@ -249,12 +251,12 @@ struct detection_output_cpu : typed_primitive_impl { } template - void generate_detections(const detection_output_inst& instance, + void generate_detections(stream& stream, const detection_output_inst& instance, const int num_of_images, const std::vector>>& all_bboxes, std::vector>>>& confidences, std::vector>>>& scoreIndexPairs) { - mem_lock lock{instance.output_memory()}; + mem_lock lock{instance.output_memory_ptr(), stream}; auto out_ptr = lock.begin(); const auto& args = instance.argument; @@ -405,22 +407,23 @@ struct detection_output_cpu : typed_primitive_impl { } template - void extract_locations_per_image(const detection_output_inst& instance, + void extract_locations_per_image(stream& stream, const detection_output_inst& instance, std::vector>>& locations, const int num_of_priors, const int num_loc_classes) { const bool share_location = instance.argument.share_location; - auto& input_location = instance.location_memory(); + auto input_location = instance.location_memory(); + auto location_layout = input_location->get_layout(); const int num_of_images = static_cast(locations.size()); - mem_lock lock{input_location}; + mem_lock lock{input_location, stream}; auto location_data = lock.begin(); - assert(num_of_priors * num_loc_classes * PRIOR_BOX_SIZE == input_location.get_layout().size.feature[0]); + assert(num_of_priors * num_loc_classes * PRIOR_BOX_SIZE == input_location->get_layout().size.feature[0]); - const auto& input_buffer_size = input_location.get_layout().get_buffer_size(); + const auto& input_buffer_size = location_layout.get_buffer_size(); const int input_buffer_size_x = input_buffer_size.spatial[0]; const int input_buffer_size_y = input_buffer_size.spatial[1]; const int input_buffer_size_f = input_buffer_size.feature[0]; - const auto& input_padding = input_location.get_layout().data_padding; + const auto& input_padding = location_layout.data_padding; const int input_padding_lower_x = input_padding.lower_size().spatial[0]; const int input_padding_lower_y = input_padding.lower_size().spatial[1]; @@ -467,16 +470,16 @@ struct detection_output_cpu : typed_primitive_impl { } template - void extract_prior_boxes_and_variances(const detection_output_inst& instance, + void extract_prior_boxes_and_variances(stream& stream, const detection_output_inst& instance, const bool variance_encoded_in_target, const int32_t prior_info_size, const int32_t prior_coordinates_offset, const int32_t images_count, std::vector& prior_bboxes, std::vector>& prior_variances) { - auto& input_prior_box = instance.prior_box_memory(); + auto input_prior_box = instance.prior_box_memory(); const int num_of_priors = static_cast(prior_bboxes.size()) / images_count; - mem_lock lock{input_prior_box}; + mem_lock lock{input_prior_box, stream}; for (int i = 0; i < images_count; i++) { auto prior_box_data = lock.begin() + i * num_of_priors * prior_info_size * (variance_encoded_in_target ? 1 : 2); @@ -503,25 +506,25 @@ struct detection_output_cpu : typed_primitive_impl { } template - void extract_confidences_per_image_caffe(const detection_output_inst& instance, + void extract_confidences_per_image_caffe(stream& stream, const detection_output_inst& instance, std::vector>>>& confidences, const int num_of_priors) { const int num_classes = instance.argument.num_classes; const int num_of_images = static_cast(confidences.size()); - auto& input_confidence = instance.confidence_memory(); + auto input_confidence = instance.confidence_memory(); const float confidence_threshold = instance.argument.confidence_threshold; - mem_lock lock{(memory_impl::ptr) &input_confidence}; + mem_lock lock{input_confidence, stream}; auto confidence_data = lock.begin(); - assert(num_of_priors * num_classes == input_confidence.get_layout().size.feature[0]); + assert(num_of_priors * num_classes == input_confidence->get_layout().size.feature[0]); - const auto& input_buffer_size = input_confidence.get_layout().get_buffer_size(); + const auto& input_buffer_size = input_confidence->get_layout().get_buffer_size(); const int input_buffer_size_x = input_buffer_size.spatial[0]; const int input_buffer_size_y = input_buffer_size.spatial[1]; const int input_buffer_size_f = input_buffer_size.feature[0]; - const auto& input_padding = input_confidence.get_layout().data_padding; + const auto& input_padding = input_confidence->get_layout().data_padding; const int input_padding_lower_x = input_padding.lower_size().spatial[0]; const int input_padding_lower_y = input_padding.lower_size().spatial[1]; const int stride = input_buffer_size_y * input_buffer_size_x; @@ -593,26 +596,27 @@ struct detection_output_cpu : typed_primitive_impl { } template - void extract_confidences_per_image_mxnet(const detection_output_inst& instance, + void extract_confidences_per_image_mxnet(stream& stream, const detection_output_inst& instance, std::vector>>>& confidences, const int num_of_priors, std::vector>>>& scoreIndexPairs) { const int num_classes = instance.argument.num_classes; const int num_of_images = static_cast(confidences.size()); - auto& input_confidence = instance.confidence_memory(); + auto input_confidence = instance.confidence_memory(); const float confidence_threshold = instance.argument.confidence_threshold; + auto confidence_layout = input_confidence->get_layout(); - mem_lock lock{(memory_impl::ptr) &input_confidence}; + mem_lock lock{input_confidence, stream}; auto confidence_data = lock.begin(); - assert(num_of_priors * num_classes == input_confidence.get_layout().size.feature[0]); + assert(num_of_priors * num_classes == confidence_layout.size.feature[0]); - const auto& input_buffer_size = input_confidence.get_layout().get_buffer_size(); + const auto& input_buffer_size = confidence_layout.get_buffer_size(); const int input_buffer_size_x = input_buffer_size.spatial[0]; const int input_buffer_size_y = input_buffer_size.spatial[1]; const int input_buffer_size_f = input_buffer_size.feature[0]; - const auto& input_padding = input_confidence.get_layout().data_padding; + const auto& input_padding = confidence_layout.data_padding; const int input_padding_lower_x = input_padding.lower_size().spatial[0]; const int input_padding_lower_y = input_padding.lower_size().spatial[1]; const int stride = input_buffer_size_y * input_buffer_size_x; @@ -703,7 +707,7 @@ struct detection_output_cpu : typed_primitive_impl { } template - void prepare_data(const detection_output_inst& instance, + void prepare_data(stream& stream, const detection_output_inst& instance, std::vector>>& bboxes, std::vector>>>& confidences, std::vector>>>& scoreIndexPairs) { @@ -711,23 +715,26 @@ struct detection_output_cpu : typed_primitive_impl { const auto& args = instance.argument; + auto priors_layout = instance.prior_box_memory()->get_layout(); + const int num_of_images = static_cast(bboxes.size()); - const int num_of_priors = instance.prior_box_memory().get_layout().size.spatial[1] / args.prior_info_size; + const int num_of_priors = priors_layout.size.spatial[1] / args.prior_info_size; const int num_loc_classes = args.share_location ? 1 : args.num_classes; // Extract locations per image. std::vector>> locations( num_of_images); // Per image : label -> bounding boxes. - extract_locations_per_image(instance, locations, num_of_priors, num_loc_classes); + extract_locations_per_image(stream, instance, locations, num_of_priors, num_loc_classes); - int32_t batches_in_prior_boxes = instance.prior_box_memory().get_layout().size.batch[0]; + int32_t batches_in_prior_boxes = priors_layout.size.batch[0]; std::vector prior_bboxes(batches_in_prior_boxes * num_of_priors); // Prior-Boxes (identical for all images since we assume // all images in a batch are of same dimension). std::vector> prior_variances( batches_in_prior_boxes * num_of_priors); // Variances per prior-box (identical for all images since we // assume all images in a batch are of same dimension). - extract_prior_boxes_and_variances(instance, + extract_prior_boxes_and_variances(stream, + instance, args.variance_encoded_in_target, args.prior_info_size, args.prior_coordinates_offset, @@ -770,38 +777,39 @@ struct detection_output_cpu : typed_primitive_impl { } // Extract confidences per image. if (nms_type == CAFFE) { - extract_confidences_per_image_caffe(instance, confidences, num_of_priors); + extract_confidences_per_image_caffe(stream, instance, confidences, num_of_priors); } else { - extract_confidences_per_image_mxnet(instance, confidences, num_of_priors, scoreIndexPairs); + extract_confidences_per_image_mxnet(stream, instance, confidences, num_of_priors, scoreIndexPairs); } } - event_impl::ptr execute_impl(const std::vector& events, detection_output_inst& instance) override { + event::ptr execute_impl(const std::vector& events, detection_output_inst& instance) override { for (auto& a : events) { a->wait(); } - auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false); + auto& stream = instance.get_network().get_stream(); - const int num_of_images = instance.location_memory().get_layout().size.batch[0]; // batch size + const int num_of_images = instance.location_memory()->get_layout().size.batch[0]; // batch size // Per image : label -> decoded bounding boxes. std::vector>> bboxes(num_of_images); // Per image : class -> confidences per bounding box. std::vector>>> confidences(num_of_images); + std::vector>>> scoreIndexPairs; - if (instance.location_memory().get_layout().data_type == data_types::f32) { - prepare_data::type>(instance, bboxes, confidences, scoreIndexPairs); - generate_detections::type>(instance, num_of_images, bboxes, confidences, scoreIndexPairs); + if (instance.location_memory()->get_layout().data_type == data_types::f32) { + prepare_data::type>(stream, instance, bboxes, confidences, scoreIndexPairs); + generate_detections::type>(stream, instance, num_of_images, bboxes, confidences, scoreIndexPairs); } else { - prepare_data::type>(instance, bboxes, confidences, scoreIndexPairs); - generate_detections::type>(instance, num_of_images, bboxes, confidences, scoreIndexPairs); + prepare_data::type>(stream, instance, bboxes, confidences, scoreIndexPairs); + generate_detections::type>(stream, instance, num_of_images, bboxes, confidences, scoreIndexPairs); } - dynamic_cast(ev.get())->set(); // set as complete - // TODO: consider refactoring create_user_event() to return cldnn::user_event* - return ev; + return stream.create_user_event(true); } + void init_kernels() override {} + static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); } }; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.h b/inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.h deleted file mode 100644 index 51c50b0df77..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/device_cache_reader.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include -#include "document.h" -#include - -namespace kernel_selector { -class TuningCache; -} - -namespace cldnn { -namespace gpu { - -class device_cache_reader { -public: - explicit device_cache_reader(const std::string tuning_file_path); - std::shared_ptr get() { return _dev_cache; } - -private: - std::shared_ptr _dev_cache; -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/device_info.h b/inference-engine/thirdparty/clDNN/src/gpu/device_info.h deleted file mode 100644 index 225ed453c2b..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include -#include -#include "api/device.hpp" -#include - -namespace cl { -class Device; -} -namespace cldnn { -namespace gpu { - -struct device_info_internal : cldnn::device_info { - std::uint32_t compute_units_count; - uint32_t vendor_id; - uint8_t supports_usm; - bool supports_optimization_hints; - bool supports_local_block_io; - - explicit device_info_internal(const cl::Device& device); - - device_info convert_to_api() { - return { cores_count, - core_frequency, - max_threads_per_execution_unit, - max_threads_per_device, - max_work_group_size, - max_local_mem_size, - max_global_mem_size, - max_alloc_mem_size, - max_image2d_width, - max_image2d_height, - supports_fp16, - supports_fp16_denorms, - supports_subgroups_short, - supports_image, - supports_imad, - supports_immad, - supports_usm, - dev_name, - driver_version, - dev_type, - gfx_ver, - device_id, - num_slices, - num_sub_slices_per_slice, - num_eus_per_sub_slice, - num_threads_per_eu, - }; - } -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp index 5ff1be35b76..8ffc149e160 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp @@ -5,7 +5,7 @@ #include "eltwise_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "eltwise/eltwise_kernel_selector.h" #include "eltwise/eltwise_kernel_base.h" @@ -18,10 +18,13 @@ struct eltwise_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp index 4de2c9cda6d..9e705b8bcb5 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "embedding_bag/embedding_bag_kernel_selector.h" #include "embedding_bag/embedding_bag_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "data_inst.h" using namespace cldnn; @@ -19,6 +19,10 @@ struct embedding_bag_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const embedding_bag_node& arg) { auto embedding_bag_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h b/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h deleted file mode 100644 index 55be7d2e092..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "ocl_toolkit.h" -#include "event_impl.h" -#include -#include - -namespace cldnn { -namespace gpu { -class events_waiter : public context_holder { -public: - explicit events_waiter(std::shared_ptr context) : context_holder(context) {} - - event_impl::ptr run(uint32_t queue_id, const std::vector& dependencies) { - if (dependencies.size() == 1) - return dependencies[0]; - - return context()->enqueue_marker(queue_id, dependencies); - } -}; -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp index bff7dafc638..c696821b727 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp @@ -5,7 +5,7 @@ #include "extract_image_patches_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "extract_image_patches/extract_image_patches_kernel_selector.h" @@ -18,6 +18,10 @@ struct extract_image_patches_gpu : typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const extract_image_patches_node& arg) { auto params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp index 05cdc2eb3e1..2f4a6b48f7e 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp @@ -12,11 +12,11 @@ #include "fully_connected/fully_connected_params.h" #include "network_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_runner.h" -#include "api/reorder.hpp" -#include "api/input_layout.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/input_layout.hpp" #include namespace cldnn { @@ -26,13 +26,16 @@ struct fully_connected_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; -protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + std::unique_ptr clone() const override { + return make_unique(*this); + } - args.weights = (memory_impl::cptr) &instance.weights_memory(); - args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory() : nullptr); +protected: + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); + + args.weights = instance.weights_memory(); + args.bias = instance.bias_term() ? instance.bias_memory() : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp index 28d8990c9ea..9e608af17b1 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp @@ -5,7 +5,7 @@ #include "fused_conv_eltwise_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "kernel_runner.h" #include "fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h" @@ -20,6 +20,10 @@ struct fused_conv_eltwise_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: bool validate_impl(const typed_primitive_inst& instance) const override { (void)instance; @@ -36,12 +40,11 @@ protected: return res; } - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); - args.weights = (memory_impl::cptr) &instance.weights_memory(split); - args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory(split) : nullptr); + args.weights = instance.weights_memory(split); + args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp index c596bbef83f..e0d51b3f000 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "gather/gather_kernel_selector.h" #include "gather/gather_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -37,6 +37,10 @@ struct gather_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const gather_node& arg) { auto gather_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp index dc05203f379..cf7d692651a 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp @@ -1,18 +1,6 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ #include "gather_nd_inst.h" #include "primitive_gpu_base.h" @@ -20,7 +8,6 @@ #include "kernel_selector_helper.h" #include "gather/gather_nd_kernel_selector.h" #include "gather/gather_nd_kernel_ref.h" -#include "error_handler.h" using namespace cldnn; @@ -31,7 +18,10 @@ struct gather_nd_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; -public: + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const gather_nd_node& arg) { auto gather_nd_params = get_default_params(arg); auto gather_nd_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp index db98e0b543c..f0ae5fcdbc6 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp @@ -9,7 +9,8 @@ #include "kernel_selector_helper.h" #include "gather_tree/gather_tree_kernel_selector.h" #include "gather_tree/gather_tree_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" + namespace cldnn { namespace gpu { @@ -17,6 +18,10 @@ struct gather_tree_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const gather_tree_node& arg) { auto b_params = get_default_params(arg, 1); auto b_optional_params = get_default_optional_params(arg.get_program()); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp index bc475596c7c..9a6a76802dc 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp @@ -9,7 +9,7 @@ #include "kernel_selector_helper.h" #include "gemm/gemm_kernel_selector.h" #include "gemm/gemm_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -18,6 +18,10 @@ struct gemm_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const gemm_node& arg) { auto gemm_params = get_default_params(arg, 1); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp index c6643ed65be..a7d3d610b31 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp @@ -3,11 +3,10 @@ // #include "generic_layer_inst.h" -#include "kernel.h" +#include "cldnn/runtime/engine.hpp" #include "implementation_map.h" #include "kernel_selector_helper.h" #include "network_impl.h" -#include "engine_impl.h" #include "register_gpu.hpp" #include @@ -18,36 +17,57 @@ namespace neural { struct generic_layer_gpu : typed_primitive_impl { const generic_layer_node& outer; const kernel_selector::cl_kernel_data& _cl_kernel_data; - gpu::kernel _kernel; + std::vector _kernels; + kernel_id _kernel_id; - explicit generic_layer_gpu(const generic_layer_node& arg) - : outer(arg), - _cl_kernel_data(*outer.get_primitive()->generic_params.clKernel.get()), - _kernel(arg.get_program().get_engine().get_context(), - outer.get_primitive()->generic_params.clKernel->kernelString, - arg.get_program().get_id()) {} + std::unique_ptr clone() const override { + return make_unique(*this); + } + + generic_layer_gpu(const generic_layer_gpu& other) + : outer(other.outer) + , _cl_kernel_data(other._cl_kernel_data) + , _kernels({}) + , _kernel_id(other._kernel_id) { + if (other._kernels.empty()) { + throw std::runtime_error("Can't copy generic_layer_gpu node: kernels vector is empty"); + } + _kernels.push_back(other._kernels.front()->clone()); + } + + generic_layer_gpu(const generic_layer_node& arg) + : outer(arg) + , _cl_kernel_data(*outer.get_primitive()->generic_params.clKernel.get()) + , _kernels() { + _kernel_id = outer.get_program().add_kernel(outer.get_primitive()->generic_params.clKernel->code.kernelString); + } + + void init_kernels() override { + _kernels.push_back(outer.get_program().get_kernel(_kernel_id)); + } void set_arguments_impl(generic_layer_inst& instance) override { - auto net_id = instance.get_network().get_id(); - gpu::kernel::kernel_arguments_data args; - args.scalars = &_cl_kernel_data.scalars; + stream& stream = instance.get_network().get_stream(); + kernel_arguments_data args; + args.scalars = &_cl_kernel_data.params.scalars; for (size_t i = 0; i < instance.inputs_memory_count(); i++) { - args.inputs.push_back((memory_impl::cptr) &instance.input_memory(i)); + args.inputs.push_back(instance.input_memory_ptr(i)); } - args.output = (memory_impl::cptr) &instance.output_memory(); - _kernel.set_arguments(net_id, _cl_kernel_data, args); + args.output = instance.output_memory_ptr(); + stream.set_arguments(*_kernels.front(), _cl_kernel_data.params, args); } - void cleanup_impl(generic_layer_inst& instance) override { - auto net_id = instance.get_network().get_id(); - _kernel.cleanup(net_id); - } + event::ptr execute_impl(const std::vector& events, generic_layer_inst& instance) override { + stream& stream = instance.get_network().get_stream(); + kernel_arguments_data args; + args.scalars = &_cl_kernel_data.params.scalars; - event_impl::ptr execute_impl(const std::vector& events, generic_layer_inst& instance) override { - uint32_t net_id = instance.get_network().get_id(); - _kernel.set_output_event(net_id, instance.node.is_output()); - return _kernel.run(net_id, _cl_kernel_data, events); + for (size_t i = 0; i < instance.inputs_memory_count(); i++) { + args.inputs.push_back(instance.input_memory_ptr(i)); + } + args.output = instance.output_memory_ptr(); + return stream.enqueue_kernel(*_kernels.front(), _cl_kernel_data.params, args, events, true); } }; @@ -55,28 +75,34 @@ struct generic_layer_gpu : typed_primitive_impl { struct generic_layer_cpu : typed_primitive_impl { const generic_layer_node& outer; + std::unique_ptr clone() const override { + return make_unique(*this); + } + explicit generic_layer_cpu(const generic_layer_node& arg) : outer(arg) {} - event_impl::ptr execute_impl(const std::vector& events, generic_layer_inst& instance) override { - uint32_t net_id = instance.get_network().get_id(); - auto& input_mem = instance.input_memory(); - auto& output_mem = instance.output_memory(); + event::ptr execute_impl(const std::vector& events, generic_layer_inst& instance) override { + stream& stream = instance.get_network().get_stream(); + auto input_mem = instance.input_memory_ptr(); + auto output_mem = instance.output_memory_ptr(); - std::vector tmp_events(events); + std::vector tmp_events(events); for (auto& a : events) { a->wait(); } - mem_lock old_pointer(input_mem); - mem_lock new_pointer(output_mem); + mem_lock old_pointer(input_mem, stream); + mem_lock new_pointer(output_mem, stream); const auto& cpu_kernel = *outer.get_primitive()->generic_params.cpuKernel.get(); cpu_kernel.Execute(old_pointer.data(), old_pointer.size(), new_pointer.data(), new_pointer.size()); - return instance.get_network().get_engine().create_user_event(net_id, true); + return stream.create_user_event(true); } + + void init_kernels() override {} }; static primitive_impl* create(const generic_layer_node& arg) { diff --git a/inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp index 7612be5ccff..5b9d8c9145c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp @@ -5,7 +5,7 @@ #include "grn_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "grn/grn_kernel_selector.h" #include "grn/grn_kernel_base.h" @@ -21,6 +21,10 @@ struct grn_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const grn_node& arg) { auto grn_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp deleted file mode 100644 index 0ba0ece0443..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp +++ /dev/null @@ -1,275 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -#include -#include "kernel.h" -#include "memory_gpu.h" -#include "memory_impl.h" -#include "refcounted_obj.h" -#include - -namespace cldnn { -namespace gpu { - -namespace { -inline cl::NDRange toNDRange(const std::vector& v) { - switch (v.size()) { - case 1: - return cl::NDRange(v[0]); - case 2: - return cl::NDRange(v[0], v[1]); - case 3: - return cl::NDRange(v[0], v[1], v[2]); - default: - return cl::NullRange; - } -} - -void set_arguments_impl(kernels_cache::kernel_type& kernel, - const kernel_selector::kernel_arguments& args, - const kernel::kernel_arguments_data& data) { - for (uint32_t i = 0; i < static_cast(args.size()); i++) { - cl_int status = CL_INVALID_ARG_VALUE; - switch (args[i].t) { - case kernel_selector::kernel_argument_types::INPUT: - if (args[i].index < data.inputs.size() && data.inputs[args[i].index]) { - const auto& input_mem = data.inputs[args[i].index]; - if (input_mem) { - if (input_mem->get_layout().format.is_image_2d()) - status = kernel.setArg(i, dynamic_cast(*input_mem).get_buffer()); - else if (memory_capabilities::is_usm_type(input_mem->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*input_mem).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*input_mem).get_buffer()); - } - } - break; - case kernel_selector::kernel_argument_types::INPUT_OF_FUSED_PRIMITIVE: - if (args[i].index < data.fused_op_inputs.size() && data.fused_op_inputs[args[i].index]) { - const auto& input_mem = data.fused_op_inputs[args[i].index]; - if (input_mem) { - if (memory_capabilities::is_usm_type(input_mem->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*input_mem).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*input_mem).get_buffer()); - } - } - break; - case kernel_selector::kernel_argument_types::INTERNAL_BUFFER: - if (args[i].index < data.intermediates.size() && data.intermediates[args[i].index]) { - const auto& input_mem = data.intermediates[args[i].index]; - if (input_mem) { - if (memory_capabilities::is_usm_type(input_mem->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*input_mem).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*input_mem).get_buffer()); - } - } - break; - case kernel_selector::kernel_argument_types::OUTPUT: - if (data.output) { - if (data.output->get_layout().format.is_image_2d()) - status = kernel.setArg(i, dynamic_cast(*data.output).get_buffer()); - else if (memory_capabilities::is_usm_type(data.output->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.output).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.output).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::WEIGHTS: - if (data.weights) { - if (data.weights->get_layout().format.is_image_2d()) - status = kernel.setArg(i, dynamic_cast(*data.weights).get_buffer()); - else if (memory_capabilities::is_usm_type(data.weights->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.weights).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.weights).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::BIAS: - if (data.bias) { - if (memory_capabilities::is_usm_type(data.bias->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.bias).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.bias).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::WEIGHTS_ZERO_POINTS: - if (data.weights_zero_points) { - if (memory_capabilities::is_usm_type(data.weights_zero_points->get_allocation_type())) - status = kernel.setArgUsm( - i, - dynamic_cast(*data.weights_zero_points).get_buffer()); - else - status = kernel.setArg( - i, - dynamic_cast(*data.weights_zero_points).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::ACTIVATIONS_ZERO_POINTS: - if (data.activations_zero_points) { - if (memory_capabilities::is_usm_type(data.activations_zero_points->get_allocation_type())) - status = kernel.setArgUsm( - i, - dynamic_cast(*data.activations_zero_points).get_buffer()); - else - status = kernel.setArg( - i, - dynamic_cast(*data.activations_zero_points).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::COMPENSATION: - if (data.compensation) { - if (memory_capabilities::is_usm_type(data.compensation->get_allocation_type())) - status = kernel.setArgUsm( - i, - dynamic_cast(*data.compensation).get_buffer()); - else - status = kernel.setArg( - i, - dynamic_cast(*data.compensation).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::SCALE_TABLE: - if (data.scale_table) { - if (memory_capabilities::is_usm_type(data.scale_table->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.scale_table).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.scale_table).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::SLOPE: - if (data.slope) { - if (memory_capabilities::is_usm_type(data.slope->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.slope).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.slope).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::SPLIT: - status = kernel.setArg(i, data.split); - break; - case kernel_selector::kernel_argument_types::SCALAR: - if (data.scalars && args[i].index < data.scalars->size()) { - const auto& scalar = (*data.scalars)[args[i].index]; - switch (scalar.t) { - case kernel_selector::kernel_scalar_argument_types::UINT8: - status = kernel.setArg(i, scalar.v.u8); - break; - case kernel_selector::kernel_scalar_argument_types::UINT16: - status = kernel.setArg(i, scalar.v.u16); - break; - case kernel_selector::kernel_scalar_argument_types::UINT32: - status = kernel.setArg(i, scalar.v.u32); - break; - case kernel_selector::kernel_scalar_argument_types::UINT64: - status = kernel.setArg(i, scalar.v.u64); - break; - case kernel_selector::kernel_scalar_argument_types::INT8: - status = kernel.setArg(i, scalar.v.s8); - break; - case kernel_selector::kernel_scalar_argument_types::INT16: - status = kernel.setArg(i, scalar.v.s16); - break; - case kernel_selector::kernel_scalar_argument_types::INT32: - status = kernel.setArg(i, scalar.v.s32); - break; - case kernel_selector::kernel_scalar_argument_types::INT64: - status = kernel.setArg(i, scalar.v.s64); - break; - case kernel_selector::kernel_scalar_argument_types::FLOAT32: - status = kernel.setArg(i, scalar.v.f32); - break; - case kernel_selector::kernel_scalar_argument_types::FLOAT64: - status = kernel.setArg(i, scalar.v.f64); - break; - default: - break; - } - } - break; - case kernel_selector::kernel_argument_types::RECURRENT: // RNN/LSTM/GRU layers - if (data.recurrent) { - if (data.recurrent->get_layout().format.is_image_2d()) - status = kernel.setArg(i, dynamic_cast(*data.recurrent).get_buffer()); - else if (memory_capabilities::is_usm_type(data.recurrent->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.recurrent).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.recurrent).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::HIDDEN: // RNN/LSTM/GRU layers - if (data.hidden) { - if (data.hidden->get_layout().format.is_image_2d()) - status = kernel.setArg(i, dynamic_cast(*data.hidden).get_buffer()); - else if (memory_capabilities::is_usm_type(data.hidden->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.hidden).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.hidden).get_buffer()); - } - break; - case kernel_selector::kernel_argument_types::CELL: // LSTMlayers - if (data.cell) { - if (data.cell->get_layout().format.is_image_2d()) - status = kernel.setArg(i, dynamic_cast(*data.cell).get_buffer()); - else if (memory_capabilities::is_usm_type(data.cell->get_allocation_type())) - status = kernel.setArgUsm(i, dynamic_cast(*data.cell).get_buffer()); - else - status = kernel.setArg(i, dynamic_cast(*data.cell).get_buffer()); - } - break; - default: - break; - } - - if (status != CL_SUCCESS) { - throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n"); - } - } -} -} // namespace - -void kernel::set_arguments(uint32_t queue_id, - const kernel_selector::cl_kernel_data& kernel_data, - const kernel_arguments_data& args) { - static std::mutex m; - std::lock_guard guard(m); - auto compiled_kernel = context()->get_kernels_cache(_prog_id).get_kernel(_kernel_id, _one_time_kernel); - - // Create a copy of cl kernel for each stream if it doesn't exist - // Copy is needed to avoid data races between streams, but we create it only once for each stream - // because the cloning is quite expensive. - // Mutex is still needed to ensure that insert operation into the map is thread safe - if (_cl_kernels.find(queue_id) == _cl_kernels.end()) - _cl_kernels[queue_id] = compiled_kernel.clone(); - - try { - set_arguments_impl(_cl_kernels.at(queue_id), kernel_data.arguments, args); - } catch (cl::Error const& err) { - throw ocl_error(err); - } -} - -void kernel::cleanup(uint32_t queue_id) { - _cl_kernels.erase(queue_id); -} - -event_impl::ptr kernel::run(uint32_t queue_id, - const kernel_selector::cl_kernel_data& kernel_data, - const std::vector& dependencies) const { - if (_cl_kernels.find(queue_id) == _cl_kernels.end() || _cl_kernels.at(queue_id).get() == NULL) { - throw std::runtime_error("[clDNN] Kernel for layer " + kernel_data.layerID + " is not found for stream " + std::to_string(queue_id)); - } - - return context()->enqueue_kernel(queue_id, - _cl_kernels.at(queue_id), - toNDRange(kernel_data.workGroups.global), - toNDRange(kernel_data.workGroups.local), - dependencies); -} - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel.h b/inference-engine/thirdparty/clDNN/src/gpu/kernel.h deleted file mode 100644 index c55f2ca615b..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernel.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once - -#include "ocl_toolkit.h" -#include "memory_impl.h" -#include "kernels_cache.h" -#include "event_impl.h" - -#include "kernel_selector_helper.h" -#include -#include - -namespace cldnn { -namespace gpu { - -class kernel : public context_holder { - uint32_t _prog_id; - kernels_cache::kernel_id _kernel_id; - bool _one_time_kernel; // If this flag is true, the kernel is intended to be executed only once (can be removed - // later from the cache). - - std::map _cl_kernels; - -public: - explicit kernel(std::shared_ptr context, - const std::shared_ptr& kernel_string, - uint32_t prog_id, - bool dump_custom_program = false, - bool one_time_kernel = false) - : context_holder(context) - , _prog_id(prog_id) - , _kernel_id(context->get_kernels_cache(prog_id).set_kernel_source(kernel_string, dump_custom_program, one_time_kernel)) - , _one_time_kernel(one_time_kernel) - , _cl_kernels({}) {} - - kernel(const kernel& other) - : context_holder(other.context()) - , _prog_id(other._prog_id) - , _kernel_id(other._kernel_id) - , _one_time_kernel(other._one_time_kernel) - , _cl_kernels(other._cl_kernels) {} - - kernel& operator=(const kernel& other) { - if (this == &other) { - return *this; - } - - _kernel_id = other._kernel_id; - _prog_id = other._prog_id; - _one_time_kernel = other._one_time_kernel; - _cl_kernels = other._cl_kernels; - - return *this; - } - - struct kernel_arguments_data { - std::vector inputs; - std::vector intermediates; - memory_impl::cptr output; - memory_impl::cptr weights; - memory_impl::cptr recurrent; - memory_impl::cptr hidden; - memory_impl::cptr cell; - memory_impl::cptr bias; - memory_impl::cptr weights_zero_points; - memory_impl::cptr activations_zero_points; - memory_impl::cptr compensation; - memory_impl::cptr lookup_table; - memory_impl::cptr scale_table; - memory_impl::cptr slope; - // used for fused primitives - std::vector fused_op_inputs; - int32_t split = 0; - float lr; - const kernel_selector::kernel_scalar_arguments* scalars = nullptr; - }; - - void set_output_event(uint32_t net_id, bool is_out_event) { - context()->set_output_event(net_id, is_out_event); - } - - void cleanup(uint32_t queue_id); - void set_arguments(uint32_t queue_id, - const kernel_selector::cl_kernel_data& kernel_data, - const kernel_arguments_data& args); - event_impl::ptr run(uint32_t queue_id, - const kernel_selector::cl_kernel_data& kernel_data, - const std::vector& dependencies) const; -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp index 2faddb64a6a..8f937863b52 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp @@ -5,8 +5,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "kernel_runner.h" -#include "kernel.h" +#include "runtime/kernels_cache.hpp" +#include "cldnn/runtime/stream.hpp" #include "weight_bias_params.h" +#include "kernel_selector_helper.h" #include #include #include @@ -15,19 +17,18 @@ namespace cldnn { namespace gpu { -kernel_runner::kernel_runner(engine_impl& engine_ref, uint32_t program_id, bool weights_and_bias_exist, bool zero_points_exist) - : engine(&engine_ref), program_id(program_id), weights_and_bias_exist(weights_and_bias_exist), zero_points_exist(zero_points_exist) {} +kernel_runner::kernel_runner(engine& engine_ref, uint32_t program_id, bool weights_and_bias_exist, bool zero_points_exist) + : _engine(engine_ref), program_id(program_id), weights_and_bias_exist(weights_and_bias_exist), zero_points_exist(zero_points_exist) {} void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kernels_data, - gpu::kernel::kernel_arguments_data& args) { + kernel_arguments_data& args) { const auto& base_params = *static_cast(kernels_data[0].params.get()); // Prepare input buffers if (input_buffers.empty()) { for (const auto& input : base_params.inputs) { int num_of_input_elements = static_cast(input.PhysicalSize()); - input_buffers.push_back(engine->allocate_memory( - {from_data_type(input.GetDType()), format::bfyx, tensor(1, 1, num_of_input_elements, 1)}, - 0)); + input_buffers.push_back(_engine.allocate_memory( + {from_data_type(input.GetDType()), format::bfyx, tensor(1, 1, num_of_input_elements, 1)})); } } for (const auto& input : input_buffers) { @@ -38,9 +39,8 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern for (auto& fused_op : base_params.fused_ops) { for (auto& fused_ops_input : fused_op.tensors) { auto num_of_elements = static_cast(fused_ops_input.PhysicalSize()); - fused_ops_buffers.push_back(engine->allocate_memory( - { from_data_type(fused_ops_input.GetDType()), format::bfyx, tensor(1, 1, num_of_elements, 1) }, - 0)); + fused_ops_buffers.push_back(_engine.allocate_memory( + { from_data_type(fused_ops_input.GetDType()), format::bfyx, tensor(1, 1, num_of_elements, 1) })); } } } @@ -50,9 +50,8 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern // Prepare output buffer if (output_buffers.empty()) { int num_of_output_elements = static_cast(base_params.output.PhysicalSize()); - output_buffers.push_back(engine->allocate_memory( - {from_data_type(base_params.output.GetDType()), format::bfyx, tensor(1, 1, num_of_output_elements, 1)}, - 0)); + output_buffers.push_back(_engine.allocate_memory( + {from_data_type(base_params.output.GetDType()), format::bfyx, tensor(1, 1, num_of_output_elements, 1)})); } args.output = output_buffers[0]; @@ -72,17 +71,15 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern if (!cldnn::format::is_image_2d(from_weights_layout(weights_bias_params.weights.GetLayout()))) { if (weight_buffers.empty()) weight_buffers.push_back( - engine->allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), + _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), fmt, - tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}, - 0)); + tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)})); if (weight_buffers[0]->get_layout().format != fmt) weight_buffers[0] = - engine->allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), + _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), fmt, - tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}, - 0); + tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}); while (weight_buffers[0]->get_layout().bytes_count() < weights_bias_params.weights.PhysicalSizeInBytes()) { // Weights layout depends on the kernel. Multiply the buffer size by 2 until it is big enough @@ -90,22 +87,20 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern weight_buffers.clear(); num_of_weight_elements_spatial *= 2; weight_buffers.push_back( - engine->allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), + _engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), fmt, - tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)}, - 0)); + tensor(num_of_weight_elements_ofm, 1, num_of_weight_elements_spatial, 1)})); } } else { weight_buffers.clear(); fmt = from_weights_layout(weights_bias_params.weights.GetLayout()); num_of_weight_elements_ofm = static_cast(weights_bias_params.weights.OFM().v); - weight_buffers.push_back(engine->allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), + weight_buffers.push_back(_engine.allocate_memory({from_weights_type(weights_bias_params.weights.GetDType()), fmt, tensor(num_of_weight_elements_ofm, num_of_weight_elements_ifm, num_of_weight_elements_spatial_x, - num_of_weight_elements_spatial_y)}, - 0)); + num_of_weight_elements_spatial_y)})); } args.weights = weight_buffers[0]; @@ -113,10 +108,9 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern if (!weights_bias_params.bias.empty()) { if (bias_buffers.empty()) { int num_of_bias_elements = static_cast(weights_bias_params.bias[0].PhysicalSize()); - bias_buffers.push_back(engine->allocate_memory({from_data_type(weights_bias_params.bias[0].GetDType()), + bias_buffers.push_back(_engine.allocate_memory({from_data_type(weights_bias_params.bias[0].GetDType()), format::bfyx, - tensor(1, num_of_bias_elements, 1, 1)}, - 0)); + tensor(1, num_of_bias_elements, 1, 1)})); } args.bias = bias_buffers[0]; } @@ -128,11 +122,10 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern auto& weight_zero_point = zero_point_params.weights_zero_points[0]; auto num_of_elements = static_cast(weight_zero_point.PhysicalSize()); weight_zero_point_buffers.push_back( - engine->allocate_memory({ + _engine.allocate_memory({ from_data_type(weight_zero_point.GetDType()), format::bfyx, - tensor(1, num_of_elements, 1, 1) }, - 0)); + tensor(1, num_of_elements, 1, 1) })); } args.weights_zero_points = weight_zero_point_buffers[0]; } @@ -141,11 +134,10 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern auto& activation_zero_point = zero_point_params.activations_zero_points[0]; auto num_of_elements = static_cast(activation_zero_point.PhysicalSize()); activation_zero_point_buffers.push_back( - engine->allocate_memory({ + _engine.allocate_memory({ from_data_type(activation_zero_point.GetDType()), format::bfyx, - tensor(1, num_of_elements, 1, 1) }, - 0)); + tensor(1, num_of_elements, 1, 1) })); } args.activations_zero_points = activation_zero_point_buffers[0]; } @@ -154,11 +146,10 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern auto& compensation = zero_point_params.compensation[0]; auto num_of_elements = static_cast(compensation.PhysicalSize()); compensation_buffers.push_back( - engine->allocate_memory({ + _engine.allocate_memory({ from_data_type(compensation.GetDType()), format::bfyx, - tensor(1, num_of_elements, 1, 1) }, - 0)); + tensor(1, num_of_elements, 1, 1) })); } args.compensation = compensation_buffers[0]; } @@ -169,10 +160,10 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern } std::vector kernel_runner::run_kernels(const kernel_selector::KernelsData& kernels_data) { - auto context = engine->get_context(); - std::vector run_times; + stream::ptr stream = _engine.create_stream(); + int num_of_kernels_to_run = static_cast(kernels_data.size()); int num_of_kernels_run = 0; @@ -182,28 +173,31 @@ std::vector kernel_runner::run_kernels(const kernel_se int current_compilation_batch = std::min(num_of_kernels_to_run, compilation_batch_size); batch_end = batch_start + current_compilation_batch; - std::vector kernels; + std::vector kernels; + kernels_cache cache(_engine); for (auto it = batch_start; it < batch_end; it++) { - kernels.push_back(kernel(context, it->kernels[0].kernelString, program_id, false, true)); + auto kernel_id = cache.set_kernel_source(it->kernels[0].code.kernelString, false); + + kernels.push_back(cache.get_kernel(kernel_id)); } - gpu::kernel::kernel_arguments_data args; + kernel_arguments_data args; prepare_kernel_args(kernels_data, args); - context->queue(0).finish(); + stream->finish(); int i = 0; for (auto it = batch_start; it < batch_end; it++) { - std::vector events; + std::vector events; auto kernel_run_time = std::chrono::nanoseconds::max(); int num_of_runs = 0; for (int iteration = 0; iteration < runs_per_kernel; iteration++) { - event_impl::ptr event; + event::ptr event; try { - kernels[i].set_arguments(0, it->kernels[0], args); - event = kernels[i].run(0, it->kernels[0], {}); + stream->set_arguments(*kernels[i], it->kernels[0].params, args); + event = stream->enqueue_kernel(*kernels[i], it->kernels[0].params, args, {}); } catch (std::exception& e) { std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName << " with auto-tune index " << it->autoTuneIndex << std::endl @@ -215,7 +209,7 @@ std::vector kernel_runner::run_kernels(const kernel_se } events.push_back(event); } - context->queue(0).finish(); + stream->finish(); for (auto& event : events) { if (event.get() != NULL) { diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h index 6e257fef15f..53eadc90956 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h @@ -5,10 +5,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "engine_impl.h" +#include "cldnn/runtime/engine.hpp" #include "kernel_selector_common.h" +#include "kernel_selector_helper.h" #include "kernel_runner_interface.h" -#include "kernel.h" #include namespace cldnn { @@ -16,7 +16,7 @@ namespace gpu { class kernel_runner : public kernel_selector::KernelRunnerInterface { public: - kernel_runner(engine_impl& engine_ref, uint32_t program_id, bool weights_and_bias_exist = false, bool zero_points_exist = false); + kernel_runner(engine& engine_ref, uint32_t program_id, bool weights_and_bias_exist = false, bool zero_points_exist = false); std::vector run_kernels(const kernel_selector::KernelsData& kernelsData) override; @@ -25,20 +25,20 @@ private: const int runs_per_kernel = 15; void prepare_kernel_args(const kernel_selector::KernelsData& kernels_data, - gpu::kernel::kernel_arguments_data& args); + kernel_arguments_data& args); - engine_impl::ptr engine; + engine& _engine; uint32_t program_id; bool weights_and_bias_exist; bool zero_points_exist; - std::vector input_buffers; - std::vector fused_ops_buffers; - std::vector output_buffers; - std::vector weight_buffers; - std::vector bias_buffers; - std::vector weight_zero_point_buffers; - std::vector activation_zero_point_buffers; - std::vector compensation_buffers; + std::vector input_buffers; + std::vector fused_ops_buffers; + std::vector output_buffers; + std::vector weight_buffers; + std::vector bias_buffers; + std::vector weight_zero_point_buffers; + std::vector activation_zero_point_buffers; + std::vector compensation_buffers; }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp index ea8965ec427..828fb6ca3d1 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp @@ -10,7 +10,6 @@ #include "register_gpu.hpp" #include "mutable_data_inst.h" #include "input_layout_inst.h" -#include "memory_impl.h" #include #include @@ -18,32 +17,38 @@ namespace cldnn { namespace gpu { struct loop_gpu : typed_primitive_impl { const loop_node& node; + std::unique_ptr clone() const override { + return make_unique(*this); + } + void init_kernels() override {} + + loop_gpu(const loop_gpu& other) : typed_primitive_impl(other), node(other.node) {} explicit loop_gpu(const loop_node& node) : node(node) {} // read scala value from data primitive - static int64_t read_scalar_value(memory_impl& mem) { + static int64_t read_scalar_value(memory::ptr mem, stream& stream) { int64_t trip_count = 0; - const layout& prim_layout = mem.get_layout(); + const layout& prim_layout = mem->get_layout(); switch (prim_layout.data_type) { case data_types::u8: { - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; trip_count = *lock_prim_output.data(); break; } case data_types::i8: { - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; trip_count = *lock_prim_output.data(); break; } case data_types::i32: { - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; trip_count = *lock_prim_output.data(); break; } case data_types::i64: { - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; trip_count = *lock_prim_output.data(); break; } @@ -53,33 +58,33 @@ struct loop_gpu : typed_primitive_impl { return trip_count; } - static void write_scalar_value(memory_impl& mem, int64_t input) { - const layout& prim_layout = mem.get_layout(); + static void write_scalar_value(memory::ptr mem, stream& stream, int64_t input) { + const layout& prim_layout = mem->get_layout(); switch (prim_layout.data_type) { case data_types::u8: { assert(input >= std::numeric_limits::min() && input <= std::numeric_limits::max()); - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; *lock_prim_output.data() = static_cast(input); break; } case data_types::i8: { assert(input >= std::numeric_limits::min() && input <= std::numeric_limits::max()); - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; *lock_prim_output.data() = static_cast(input); break; } case data_types::i32: { assert(input >= std::numeric_limits::min() && input <= std::numeric_limits::max()); - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; *lock_prim_output.data() = static_cast(input); break; } case data_types::i64: { - mem_lock lock_prim_output{mem}; + mem_lock lock_prim_output{mem, stream}; *lock_prim_output.data() = input; break; } @@ -88,10 +93,9 @@ struct loop_gpu : typed_primitive_impl { } } - event_impl::ptr execute_impl(const std::vector& events, loop_inst& instance) override { + event::ptr execute_impl(const std::vector& events, loop_inst& instance) override { auto& outer_network = instance.get_network(); - const uint32_t& net_id = instance.get_network().get_id(); - auto ev = outer_network.get_engine().create_user_event(net_id, false); + auto& stream = outer_network.get_stream(); auto body_network = instance.get_body_network(); @@ -104,8 +108,8 @@ struct loop_gpu : typed_primitive_impl { // read trip_count from outer network const primitive_id& trip_count_id = node.get_trip_count_id(); - memory_impl& trip_count_mem = outer_network.get_primitive(trip_count_id)->output_memory(); - int64_t trip_count = read_scalar_value(trip_count_mem); + memory::ptr trip_count_mem = outer_network.get_primitive(trip_count_id)->output_memory_ptr(); + int64_t trip_count = read_scalar_value(trip_count_mem, stream); if (trip_count < 0) { const int64_t max_iteration = node.get_max_iteration(); trip_count = max_iteration; @@ -113,26 +117,26 @@ struct loop_gpu : typed_primitive_impl { // read initial execution condition from outer network const primitive_id& initial_execution_id = node.get_initial_execution_id(); - memory_impl& initial_execution_mem = outer_network.get_primitive(initial_execution_id)->output_memory(); - int64_t execution_condition = read_scalar_value(initial_execution_mem); + memory::ptr initial_execution_mem = outer_network.get_primitive(initial_execution_id)->output_memory_ptr(); + int64_t execution_condition = read_scalar_value(initial_execution_mem, stream); // shortcut of current_iteration memory in body network (slice of input) - memory_impl* current_iteration_mem = nullptr; + memory::ptr current_iteration_mem = nullptr; if (node.is_current_iteration_used()) { const primitive_id& current_iteration_id = node.get_current_iteration_id(); - current_iteration_mem = &body_network->get_primitive(current_iteration_id)->output_memory(); + current_iteration_mem = body_network->get_primitive(current_iteration_id)->output_memory_ptr(); } // shortcut of execution_condition memory in body network - memory_impl* execution_condition_mem = nullptr; + memory::ptr execution_condition_mem = nullptr; if (node.is_execution_condition_used()) { const primitive_id& condition_id = node.get_condition_id(); - execution_condition_mem = &body_network->get_primitive(condition_id)->output_memory(); + execution_condition_mem = body_network->get_primitive(condition_id)->output_memory_ptr(); } int64_t current_iteration = 0; if (node.is_current_iteration_used()) { - write_scalar_value(*current_iteration_mem, current_iteration); + write_scalar_value(current_iteration_mem, stream, current_iteration); } const auto& concatenated_input_mem_mappings = instance.concatenated_input_mem_mappings; @@ -141,23 +145,23 @@ struct loop_gpu : typed_primitive_impl { // Set sliced input data for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { const auto& concatenated_input = concatenated_input_mem_mappings.at(i); - memory_impl::ptr mem = concatenated_input.get_sliced_mem(0); + memory::ptr mem = concatenated_input.get_sliced_mem(0); if (mem) { - body_network->set_input_data(concatenated_input.sliced_data_prim->id(), *mem); + body_network->set_input_data(concatenated_input.sliced_data_prim->id(), mem); } else { CLDNN_ERROR_MESSAGE(node.id(), "sliced input memory of loop is not allocated properly"); } } - std::vector loop_carried_dep(events.begin(), events.end()); + std::vector loop_carried_dep(events.begin(), events.end()); while (current_iteration < trip_count && execution_condition) { // Copy & Set sliced input memory for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) { const auto& concatenated_input = concatenated_input_mem_mappings.at(i); - memory_impl::ptr mem = concatenated_input.get_sliced_mem(current_iteration); + memory::ptr mem = concatenated_input.get_sliced_mem(current_iteration); if (mem) { - concatenated_input.sliced_data_prim->set_output_memory(*mem); + concatenated_input.sliced_data_prim->set_output_memory(mem); } else { CLDNN_ERROR_MESSAGE(node.id(), "sliced input memory of loop is not allocated properly"); } @@ -178,7 +182,7 @@ struct loop_gpu : typed_primitive_impl { loop_carried_dep.clear(); for (const auto& backedge : node.get_back_edges()) { - event_impl::ptr body_event = body_network->get_primitive_event(backedge.from); + event::ptr body_event = body_network->get_primitive_event(backedge.from); loop_carried_dep.emplace_back(body_event); } @@ -186,10 +190,10 @@ struct loop_gpu : typed_primitive_impl { //as they are presented in the ngraph opset document for loop operation. //However they are not being used yet and only TensorIterator which has fixed sequence length is being validated. if (node.is_current_iteration_used()) { - write_scalar_value(*current_iteration_mem, current_iteration); + write_scalar_value(current_iteration_mem, stream, current_iteration); } if (node.is_execution_condition_used()) { - execution_condition = read_scalar_value(*execution_condition_mem); + execution_condition = read_scalar_value(execution_condition_mem, stream); } // update index & execution condition for the next iteration ++current_iteration; @@ -204,11 +208,10 @@ struct loop_gpu : typed_primitive_impl { } const primitive_id& num_iteration_id = node.get_num_iteration_id(); - memory_impl& num_actual_iterations_mem = outer_network.get_primitive(num_iteration_id)->output_memory(); - write_scalar_value(num_actual_iterations_mem, current_iteration); + memory::ptr num_actual_iterations_mem = outer_network.get_primitive(num_iteration_id)->output_memory_ptr(); + write_scalar_value(num_actual_iterations_mem, stream, current_iteration); - dynamic_cast(ev.get())->set(); - return ev; + return stream.create_user_event(true); } static primitive_impl* create(const loop_node& arg) { return new loop_gpu(arg); } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp index c12c07aad67..b829688a736 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp @@ -5,7 +5,7 @@ #include "lrn_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "lrn/lrn_kernel_selector.h" #include "lrn/lrn_kernel_base.h" @@ -17,6 +17,10 @@ struct lrn_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const lrn_node& arg) { auto lrn_params = get_default_params(arg); auto lrn_optional_params = get_default_optional_params(arg.get_program()); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp index 30640edd24e..4a8a4ee5d75 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp @@ -11,7 +11,7 @@ #include "lstm_dynamic/lstm_dynamic_input_kernel_selector.h" #include "lstm_dynamic/lstm_dynamic_input_kernel_base.h" #include "network_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -20,14 +20,17 @@ struct lstm_dynamic_input_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args; - args.inputs = { (memory_impl::cptr) &instance.input_memory(), (memory_impl::cptr) &instance.dyn_length_memory()}; - args.output = (memory_impl::cptr) &instance.output_memory(); - args.weights = (memory_impl::cptr) &instance.weights_memory(); - args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory() : nullptr); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args; + args.inputs = { instance.input_memory_ptr(), instance.dyn_length_memory()}; + args.output = instance.output_memory_ptr(); + args.weights = instance.weights_memory(); + args.bias = instance.bias_term() ? instance.bias_memory() : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp index bbacc878735..9c367d5d994 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp @@ -11,7 +11,7 @@ #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_selector.h" #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h" #include "network_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -20,19 +20,22 @@ struct lstm_dynamic_timeloop_gpu : typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args; - args.inputs = {(memory_impl::cptr) &instance.input_memory(), (memory_impl::cptr) &instance.dyn_length_memory()}; + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args; + args.inputs = {instance.input_memory_ptr(), instance.dyn_length_memory()}; if (instance.last_hidden_output_term()) - args.inputs.push_back((memory_impl::cptr) &instance.last_hidden_output_memory()); + args.inputs.push_back(instance.last_hidden_output_memory()); if (instance.last_cell_output_term()) - args.inputs.push_back((memory_impl::cptr) &instance.last_cell_output_memory()); - args.output = (memory_impl::cptr) &instance.output_memory(); - args.recurrent = (memory_impl::cptr) &instance.recurrent_memory(); - args.hidden = (memory_impl::cptr) (instance.initial_hidden_term() ? &instance.initial_hidden_memory() : nullptr); - args.cell = (memory_impl::cptr) (instance.initial_cell_term() ? &instance.initial_cell_memory() : nullptr); + args.inputs.push_back(instance.last_cell_output_memory()); + args.output = instance.output_memory_ptr(); + args.recurrent = instance.recurrent_memory(); + args.hidden = instance.initial_hidden_term() ? instance.initial_hidden_memory() : nullptr; + args.cell = instance.initial_cell_term() ? instance.initial_cell_memory() : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp index b4a45bfe415..ed558133306 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp @@ -11,7 +11,7 @@ #include "lstm/lstm_elt_kernel_selector.h" #include "lstm/lstm_elt_kernel_base.h" #include "network_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -20,13 +20,16 @@ struct lstm_elt_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; -protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, 0); + std::unique_ptr clone() const override { + return make_unique(*this); + } - args.cell = (memory_impl::cptr) (instance.cell_term() ? &instance.cell_memory() : nullptr); - args.output = (memory_impl::cptr) &instance.output_memory(); +protected: + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args = parent::get_arguments(instance, 0); + + args.cell = instance.cell_term() ? instance.cell_memory() : nullptr; + args.output = instance.output_memory_ptr(); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp index 2b27aee688e..f6c532decd0 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp @@ -11,7 +11,7 @@ #include "lstm/lstm_gemm_kernel_selector.h" #include "lstm/lstm_gemm_kernel_base.h" #include "network_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -20,16 +20,19 @@ struct lstm_gemm_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; -protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, 0); + std::unique_ptr clone() const override { + return make_unique(*this); + } - args.output = (memory_impl::cptr) &instance.output_memory(); - args.weights = (memory_impl::cptr) &instance.weights_memory(); - args.recurrent = (memory_impl::cptr) &instance.recurrent_memory(); - args.bias = (memory_impl::cptr) (instance.bias_term() ? &instance.bias_memory() : nullptr); - args.hidden = (memory_impl::cptr) (instance.hidden_term() ? &instance.hidden_memory() : nullptr); +protected: + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args = parent::get_arguments(instance, 0); + + args.output = instance.output_memory_ptr(); + args.weights = instance.weights_memory(); + args.recurrent = instance.recurrent_memory(); + args.bias = instance.bias_term() ? instance.bias_memory() : nullptr; + args.hidden = instance.hidden_term() ? instance.hidden_memory() : nullptr; return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp index 7e903fc65b0..2daaaa0ddc2 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp @@ -5,7 +5,7 @@ #include "max_unpooling_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "network_impl.h" #include "kernel_selector_helper.h" #include "max_unpooling/max_unpooling_kernel_selector.h" @@ -19,20 +19,23 @@ struct max_unpooling_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); - args.inputs.push_back((memory_impl::cptr) &instance.dep_memory(1)); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); + args.inputs.push_back(instance.dep_memory_ptr(1)); return args; } public: - event_impl::ptr execute_impl(const std::vector& events, max_unpooling_inst& instance) override { + event::ptr execute_impl(const std::vector& events, max_unpooling_inst& instance) override { // clear output buffer - std::vector tmp_events(events); - auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false); - instance.output_memory().fill(0, ev); + std::vector tmp_events(events); + auto& stream = instance.get_network().get_stream(); + auto ev = instance.output_memory().fill(stream); tmp_events.push_back(ev); return parent::execute_impl(tmp_events, instance); } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp deleted file mode 100644 index 5a839905ac3..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "error_handler.h" -#include "memory_gpu.h" -#include "engine_impl.h" -#include "ocl_base_event.h" -#include -#include - -namespace cldnn { -namespace gpu { - -gpu_buffer::gpu_buffer(const refcounted_obj_ptr& engine, - const layout& layout, - uint32_t net_id, - bool reset) - : lockable_gpu_mem(engine), memory_impl(engine, layout, net_id, allocation_type::cl_mem, false), - _buffer(_context->context(), CL_MEM_READ_WRITE, size()) { - if (reset || is_memory_reset_needed(_layout)) zero_buffer(); -} - -gpu_buffer::gpu_buffer(const refcounted_obj_ptr& engine, - const layout& new_layout, - const cl::Buffer& buffer, - uint32_t net_id) - : lockable_gpu_mem(engine), memory_impl(engine, new_layout, net_id, allocation_type::cl_mem, true), - _buffer(buffer) {} - -void* gpu_buffer::lock() { - std::lock_guard locker(_mutex); - if (0 == _lock_count) { - _mapped_ptr = _context->queue(_net_id).enqueueMapBuffer(_buffer, CL_TRUE, CL_MAP_WRITE, 0, size()); - } - _lock_count++; - return _mapped_ptr; -} - -void gpu_buffer::unlock() { - std::lock_guard locker(_mutex); - _lock_count--; - if (0 == _lock_count) { - _context->queue(_net_id).enqueueUnmapMemObject(_buffer, _mapped_ptr); - _mapped_ptr = nullptr; - } -} - -void gpu_buffer::zero_buffer() { - _context->queue(_net_id).enqueueFillBuffer(_buffer, 0, 0, size()); - _context->queue(_net_id).flush(); -} - -void gpu_buffer::fill(unsigned char pattern, event_impl::ptr ev) { - cl::Event ev_ocl = dynamic_cast(ev.get())->get(); - _context->queue(_net_id).enqueueFillBuffer(_buffer, pattern, 0, size(), 0, &ev_ocl); -} - -shared_mem_params gpu_buffer::get_internal_params() const { - return {shared_mem_type::shared_mem_buffer, static_cast(_context->context().get()), nullptr, - static_cast(_buffer.get()), -#ifdef _WIN32 - nullptr, -#else - 0, -#endif - 0}; -} - -gpu_image2d::gpu_image2d(const refcounted_obj_ptr& engine, const layout& layout, uint32_t net_id, - bool reset) - : lockable_gpu_mem(engine), memory_impl(engine, layout, net_id, allocation_type::cl_mem, false), - _row_pitch(0), _slice_pitch(0) { - cl_channel_type type = layout.data_type == data_types::f16 ? CL_HALF_FLOAT : CL_FLOAT; - cl_channel_order order = CL_R; - switch (layout.format) { - case format::image_2d_weights_c1_b_fyx: - _width = layout.size.batch[0]; - _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1]; - break; - case format::image_2d_weights_winograd_6x3_s1_fbxyb: - _height = layout.size.feature[0]; - _width = layout.size.spatial[0] * layout.size.batch[0] * layout.size.spatial[1] * 8 / 3; - break; - case format::image_2d_weights_winograd_6x3_s1_xfbyb: - _height = layout.size.feature[0] * layout.size.spatial[0] * 8 / 3; - _width = layout.size.batch[0] * layout.size.spatial[1]; - break; - case format::image_2d_weights_c4_fyx_b: - _width = layout.size.batch[0]; - _height = layout.size.spatial[0] * layout.size.feature[0] * layout.size.spatial[1]; - order = CL_RGBA; - break; - case format::image_2d_rgba: - _width = layout.size.spatial[0]; - _height = layout.size.spatial[1]; - order = CL_RGBA; - if (layout.size.feature[0] != 3 && layout.size.feature[0] != 4) { - CLDNN_ERROR_MESSAGE("2D image allocation", "invalid number of channels in image_2d_rgba input image (should be 3 or 4)!"); - } - type = CL_UNORM_INT8; - break; - case format::nv12: - _width = layout.size.spatial[1]; - _height = layout.size.spatial[0]; - if (layout.size.feature[0] == 2) { - order = CL_RG; - } else if (layout.size.feature[0] > 2) { - CLDNN_ERROR_MESSAGE("2D image allocation", "invalid number of channels in NV12 input image!"); - } - type = CL_UNORM_INT8; - break; - default: - CLDNN_ERROR_MESSAGE("2D image allocation", "unsupported image type!"); - } - - cl::ImageFormat imageFormat(order, type); - _buffer = cl::Image2D(_context->context(), CL_MEM_READ_WRITE, imageFormat, _width, _height, 0); - - if (reset) zero_image(); -} - -gpu_image2d::gpu_image2d(const refcounted_obj_ptr& engine, - const layout& new_layout, - const cl::Image2D& buffer, - uint32_t net_id) - : lockable_gpu_mem(engine), memory_impl(engine, new_layout, net_id, allocation_type::cl_mem, true), - _buffer(buffer) { - _width = _buffer.getImageInfo(); - _height = _buffer.getImageInfo(); - _row_pitch = _buffer.getImageInfo(); - _slice_pitch = _buffer.getImageInfo(); -} - -void gpu_image2d::zero_image() { - cl_uint4 pattern_uint4 = { 0, 0, 0, 0 }; - _context->queue(_net_id).enqueueFillImage(_buffer, pattern_uint4, { 0, 0, 0 }, { _width, _height, 1 }); - _context->queue(_net_id).flush(); -} - -void* gpu_image2d::lock() { - std::lock_guard locker(_mutex); - if (0 == _lock_count) { - _mapped_ptr = _context->queue(_net_id) - .enqueueMapImage(_buffer, - CL_TRUE, - CL_MAP_WRITE, - {0, 0, 0}, - {_width, _height, 1}, - &_row_pitch, - &_slice_pitch); - } - _lock_count++; - return _mapped_ptr; -} - -void gpu_image2d::unlock() { - std::lock_guard locker(_mutex); - _lock_count--; - if (0 == _lock_count) { - _context->queue(_net_id).enqueueUnmapMemObject(_buffer, _mapped_ptr); - _mapped_ptr = nullptr; - } -} - -void gpu_image2d::fill(unsigned char pattern, event_impl::ptr ev) { - cl::Event ev_ocl = dynamic_cast(ev.get())->get(); - cl_uint4 pattern_uint4 = {pattern, pattern, pattern, pattern}; - _context->queue(_net_id).enqueueFillImage(_buffer, pattern_uint4, {0, 0, 0}, {_width, _height, 1}, 0, &ev_ocl); -} - -shared_mem_params gpu_image2d::get_internal_params() const { - return {shared_mem_type::shared_mem_image, static_cast(_context->context().get()), nullptr, - static_cast(_buffer.get()), -#ifdef _WIN32 - nullptr, -#else - 0, -#endif - 0}; -} - -gpu_media_buffer::gpu_media_buffer(const refcounted_obj_ptr& engine, - const layout& new_layout, - const shared_mem_params* params, - uint32_t net_id) - : gpu_image2d(engine, new_layout, - cl::ImageVA(engine->get_context()->context(), CL_MEM_READ_WRITE, - params->surface, params->plane), - net_id), - device(params->user_device), - surface(params->surface), - plane(params->plane) { -} - -shared_mem_params gpu_media_buffer::get_internal_params() const { - return {shared_mem_type::shared_mem_vasurface, static_cast(_context->context().get()), device, - static_cast(_buffer.get()), surface, plane }; -} - -#ifdef _WIN32 -gpu_dx_buffer::gpu_dx_buffer(const refcounted_obj_ptr& engine, - const layout& new_layout, - const shared_mem_params* params, - uint32_t net_id) - : gpu_buffer(engine, new_layout, - cl::BufferDX(engine->get_context()->context(), CL_MEM_READ_WRITE, params->mem), - net_id), - device(params->user_device), - resource(params->mem) { } - -shared_mem_params gpu_dx_buffer::get_internal_params() const { - return {shared_mem_type::shared_mem_dxbuffer, static_cast(_context->context().get()), device, - static_cast(_buffer.get()), resource, 0 }; -} -#endif - -gpu_usm::gpu_usm(const refcounted_obj_ptr& engine, - const layout& new_layout, const cl::UsmMemory& buffer, - allocation_type type, uint32_t net_id) - : lockable_gpu_mem(engine) - , memory_impl(engine, new_layout, net_id, type, true) - , _buffer(buffer) { -} - -gpu_usm::gpu_usm(const refcounted_obj_ptr& engine, const layout& layout, uint32_t net_id, allocation_type type, bool reset) - : lockable_gpu_mem(engine) - , memory_impl(engine, layout, net_id, type, false) - , _buffer(_engine->get_context()->context()) { - auto device = _engine->get_context()->device(); - switch (get_allocation_type()) { - case allocation_type::usm_host: - _buffer.allocateHost(_bytes_count); - break; - case allocation_type::usm_shared: - _buffer.allocateShared(device, _bytes_count); - break; - case allocation_type::usm_device: - _buffer.allocateDevice(device, _bytes_count); - break; - default: - CLDNN_ERROR_MESSAGE("gpu_usm allocation type", - "Unknown unified shared memory type!"); - } - - if (reset || is_memory_reset_needed(_layout)) zero_buffer(); -} - -void* gpu_usm::lock() { - assert(get_allocation_type() != allocation_type::usm_device && "Can't lock usm device memory!"); - std::lock_guard locker(_mutex); - if (0 == _lock_count) { - _engine->get_context()->queue(_net_id).finish(); // Synchronization needed for OOOQ. - _mapped_ptr = _buffer.get(); - } - _lock_count++; - return _mapped_ptr; -} - -void gpu_usm::unlock() { - std::lock_guard locker(_mutex); - _lock_count--; - if (0 == _lock_count) { - _mapped_ptr = nullptr; - } -} - -void gpu_usm::fill(unsigned char pattern, event_impl::ptr ev) { - cl::Event ev_ocl = dynamic_cast(ev.get())->get(); - // enqueueFillUsm call will never finish. Driver bug? Uncomment when fixed. Some older drivers doesn't support enqueueFillUsm call at all. - // _engine->get_context()->queue(_net_id).enqueueFillUsm(_buffer, pattern, _bytes_count, nullptr, &ev_ocl) - // Workarounded with enqeue_memcopy. ToDo: Remove below code. Uncomment above. - std::vector temp_buffer(_bytes_count, pattern); - cl::usm::enqueue_memcpy(_engine->get_context()->queue(_net_id), _buffer.get(), temp_buffer.data(), _bytes_count, true, nullptr, &ev_ocl); -} - -void gpu_usm::zero_buffer() { - // event_impl::ptr ev{ new base_event(_engine->get_context()), false }; - // cl::Event ev_ocl = dynamic_cast(ev.get())->get(); - // cl::usm::enqueue_set_mem(_engine->get_context()->queue(_net_id), _buffer.get(), 0, _bytes_count, nullptr, &ev_ocl); - // ev->wait(); - - // [WA] - event_impl::ptr ev{ new base_event(_engine->get_context()), false }; - fill(0, ev); - ev->wait(); -} - -void gpu_usm::copy_from_other(const gpu_usm& other) { - _engine->get_context()->queue(_net_id).enqueueCopyUsm(other.get_buffer(), get_buffer(), _bytes_count, true); -} - -shared_mem_params gpu_usm::get_internal_params() const { - return { - shared_mem_type::shared_mem_empty, // shared_mem_type - static_cast(_engine->get_context()->context().get()), // context handle - nullptr, // user_device handle - nullptr, // mem handle -#ifdef _WIN32 - nullptr, // surface handle -#else - 0, // surface handle -#endif - 0 // plane - }; -} - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h deleted file mode 100644 index e1ac301f997..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "ocl_toolkit.h" -#include "memory_impl.h" -#include -#include -#include -#include - -#define BUFFER_ALIGNMENT 4096 -#define CACHE_ALIGNMENT 64 - -namespace cldnn { -namespace gpu { - -template -T* allocate_aligned(size_t size, size_t align) { - assert(sizeof(T) <= size); - assert(alignof(T) <= align); - return reinterpret_cast(_mm_malloc(align_to(size, align), align)); -} - -template -void deallocate_aligned(T* ptr) { - _mm_free(ptr); -} - -#if defined(_SECURE_SCL) && (_SECURE_SCL > 0) -template -stdext::checked_array_iterator arr_begin(T* buf, size_t count) { - return stdext::make_checked_array_iterator(buf, count); -} - -template -stdext::checked_array_iterator arr_end(T* buf, size_t count) { - return stdext::make_checked_array_iterator(buf, count, count); -} - -#else -template -T* arr_begin(T* buf, size_t) { - return buf; -} - -template -T* arr_end(T* buf, size_t count) { - return buf + count; -} -#endif - -struct lockable_gpu_mem { - explicit lockable_gpu_mem(const refcounted_obj_ptr& engine) : _context(engine->get_context()), - _lock_count(0), - _mapped_ptr(nullptr) {} - - std::shared_ptr _context; - std::mutex _mutex; - unsigned _lock_count; - void* _mapped_ptr; -}; - -struct gpu_buffer : public lockable_gpu_mem, public memory_impl { - friend cldnn::memory_pool; - - gpu_buffer(const refcounted_obj_ptr& engine, - const layout& new_layout, - const cl::Buffer& buffer, - uint32_t net_id); - - void* lock() override; - void unlock() override; - void fill(unsigned char pattern, event_impl::ptr ev) override; - shared_mem_params get_internal_params() const override; - const cl::Buffer& get_buffer() const { - assert(0 == _lock_count); - return _buffer; - } - - void zero_buffer(); - -protected: - gpu_buffer(const refcounted_obj_ptr& engine, const layout& layout, uint32_t net_id, - bool reset = true); - cl::Buffer _buffer; -}; - -struct gpu_image2d : public lockable_gpu_mem, public memory_impl { - friend cldnn::memory_pool; - - gpu_image2d(const refcounted_obj_ptr& engine, - const layout& new_layout, - const cl::Image2D& buffer, - uint32_t net_id); - void* lock() override; - void unlock() override; - void fill(unsigned char pattern, event_impl::ptr ev) override; - shared_mem_params get_internal_params() const override; - const cl::Image2D& get_buffer() const { - assert(0 == _lock_count); - return _buffer; - } - - void zero_image(); - -protected: - gpu_image2d(const refcounted_obj_ptr& engine, const layout& layout, uint32_t net_id, - bool reset = true); - - cl::Image2D _buffer; - size_t _width; - size_t _height; - size_t _row_pitch; - size_t _slice_pitch; -}; - -struct gpu_media_buffer : public gpu_image2d { - friend cldnn::memory_pool; - - gpu_media_buffer(const refcounted_obj_ptr& engine, - const layout& new_layout, - const shared_mem_params* params, - uint32_t net_id); - shared_mem_params get_internal_params() const override; -private: - void* device; -#ifdef _WIN32 - void* surface; -#else - uint32_t surface; -#endif - uint32_t plane; -}; - -#ifdef _WIN32 -struct gpu_dx_buffer : public gpu_buffer { - friend cldnn::memory_pool; - - gpu_dx_buffer(const refcounted_obj_ptr& engine, - const layout& new_layout, - const shared_mem_params* params, - uint32_t net_id); - shared_mem_params get_internal_params() const override; -private: - void* device; - void* resource; -}; -#endif - -struct gpu_usm : public lockable_gpu_mem, public memory_impl { - friend cldnn::memory_pool; - - gpu_usm(const refcounted_obj_ptr& engine, - const layout& new_layout, - const cl::UsmMemory& usm_buffer, - allocation_type type, - uint32_t net_id); - - void* lock() override; - void unlock() override; - const cl::UsmMemory& get_buffer() const { return _buffer; } - cl::UsmMemory& get_buffer() { return _buffer; } - - void fill(unsigned char pattern, event_impl::ptr ev) override; - void zero_buffer(); - void copy_from_other(const gpu_usm& other); - shared_mem_params get_internal_params() const override; -protected: - gpu_usm(const refcounted_obj_ptr& engine, const layout& layout, uint32_t net_id, allocation_type type, bool reset = true); - cl::UsmMemory _buffer; -}; -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp index 0b2285af859..24a716b5dbe 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp @@ -13,17 +13,11 @@ struct mutable_data_gpu : public typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; -public: - bool validate_impl(const typed_primitive_inst& instance) const override { - bool is_primary = instance.get_network().is_primary_stream(); - - auto net_id = instance.get_network().get_id(); - auto mem_net_id = instance.output_memory().get_net_id(); - - bool res = is_primary || net_id == mem_net_id; - return res; + std::unique_ptr clone() const override { + return make_unique(*this); } +public: static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_gpu(arg, {}); } }; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp index d2b839e7a9d..fc06edd6312 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp @@ -5,7 +5,7 @@ #include "mvn_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "mvn/mvn_kernel_selector.h" #include "mvn/mvn_kernel_base.h" @@ -21,6 +21,10 @@ struct mvn_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const mvn_node& arg) { auto mvn_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp index bcb8f006c3a..55004113150 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp @@ -110,14 +110,14 @@ std::vector run_nms( } template -vector2D load_boxes_impl(memory_impl& mem, bool center_point) { +vector2D load_boxes_impl(stream& stream, memory::ptr mem, bool center_point) { vector2D result; - auto lay = mem.get_layout(); + auto lay = mem->get_layout(); auto batch_size = lay.size.batch[0]; auto boxes_num = lay.size.feature[0]; result.resize(batch_size); - mem_lock boxes_lock(mem); + mem_lock boxes_lock(mem, stream); auto ptr = boxes_lock.data(); for (int bi = 0; bi < batch_size; ++bi) { @@ -145,28 +145,28 @@ vector2D load_boxes_impl(memory_impl& mem, bool center_point) { return result; } -vector2D load_boxes(memory_impl& mem, bool center_point) { - auto data_type = mem.get_layout().data_type; +vector2D load_boxes(stream& stream, memory::ptr mem, bool center_point) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::f16: - return load_boxes_impl::type>(mem, center_point); + return load_boxes_impl::type>(stream, mem, center_point); case cldnn::data_types::f32: - return load_boxes_impl::type>(mem, center_point); + return load_boxes_impl::type>(stream, mem, center_point); default: throw std::runtime_error("Non max supression - unsupported boxes data type"); } } template -vector3D load_scores_impl(memory_impl& mem) { - auto lay = mem.get_layout(); +vector3D load_scores_impl(stream& stream, memory::ptr mem) { + auto lay = mem->get_layout(); auto batch_size = lay.size.batch[0]; auto classes_num = lay.size.feature[0]; auto boxes_num = lay.size.spatial[1]; vector3D result(batch_size, vector2D(classes_num)); - mem_lock lock(mem); + mem_lock lock(mem, stream); auto ptr = lock.data(); for (int bi = 0; bi < batch_size; ++bi) { @@ -182,47 +182,47 @@ vector3D load_scores_impl(memory_impl& mem) { return result; } -vector3D load_scores(memory_impl& mem) { - auto data_type = mem.get_layout().data_type; +vector3D load_scores(stream& stream, memory::ptr mem) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::f16: - return load_scores_impl::type>(mem); + return load_scores_impl::type>(stream, mem); case cldnn::data_types::f32: - return load_scores_impl::type>(mem); + return load_scores_impl::type>(stream, mem); default: throw std::runtime_error("Non max supression - unsupported scores data type"); } } template -T load_scalar_impl(memory_impl& mem) { - mem_lock lock(mem); +T load_scalar_impl(stream& stream, memory::ptr mem) { + mem_lock lock(mem, stream); auto ptr = lock.data(); return static_cast(ptr[0]); } template -T load_scalar(memory_impl& mem) { - auto data_type = mem.get_layout().data_type; +T load_scalar(stream& stream, memory::ptr mem) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - return load_scalar_impl::type>(mem); + return load_scalar_impl::type>(stream, mem); case cldnn::data_types::f16: - return load_scalar_impl::type>(mem); + return load_scalar_impl::type>(stream, mem); case cldnn::data_types::f32: - return load_scalar_impl::type>(mem); + return load_scalar_impl::type>(stream, mem); default: throw std::runtime_error("Non max supression - unsupported data type"); } } template -void store_result_impl(memory_impl& mem, const std::vector& result) { - mem_lock lock(mem); +void store_result_impl(stream& stream, memory::ptr mem, const std::vector& result) { + mem_lock lock(mem, stream); auto ptr = lock.data(); - auto output_size = static_cast(mem.get_layout().size.batch[0]); + auto output_size = static_cast(mem->get_layout().size.batch[0]); auto results_size = result.size(); size_t si = 0; @@ -240,31 +240,31 @@ void store_result_impl(memory_impl& mem, const std::vector& resu } } -void store_result(memory_impl& mem, const std::vector& result) { - auto data_type = mem.get_layout().data_type; +void store_result(stream& stream, memory::ptr mem, const std::vector& result) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - store_result_impl::type>(mem, result); + store_result_impl::type>(stream, mem, result); break; case cldnn::data_types::f16: - store_result_impl::type>(mem, result); + store_result_impl::type>(stream, mem, result); break; case cldnn::data_types::f32: - store_result_impl::type>(mem, result); + store_result_impl::type>(stream, mem, result); break; default: throw std::runtime_error("Non max supression - unsupported output data type"); } } -void store_first_output(memory_impl& mem, const std::vector& result) { - auto data_type = mem.get_layout().data_type; +void store_first_output(stream& stream, memory::ptr mem, const std::vector& result) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - store_result_impl::type>(mem, result); + store_result_impl::type>(stream, mem, result); break; case cldnn::data_types::i64: - store_result_impl::type>(mem, result); + store_result_impl::type>(stream, mem, result); break; default: throw std::runtime_error("Non max supression - unsupported output data type"); @@ -272,11 +272,11 @@ void store_first_output(memory_impl& mem, const std::vector& res } template -void store_second_output_impl(memory_impl& mem, const std::vector& result) { - mem_lock lock(mem); +void store_second_output_impl(stream& stream, memory::ptr mem, const std::vector& result) { + mem_lock lock(mem, stream); auto ptr = lock.data(); - auto output_size = static_cast(mem.get_layout().size.batch[0]); + auto output_size = static_cast(mem->get_layout().size.batch[0]); auto results_size = result.size(); size_t si = 0; @@ -294,14 +294,14 @@ void store_second_output_impl(memory_impl& mem, const std::vector& result) { - auto data_type = mem.get_layout().data_type; +void store_second_output(stream& stream, memory::ptr mem, const std::vector& result) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::f16: - store_second_output_impl::type>(mem, result); + store_second_output_impl::type>(stream, mem, result); break; case cldnn::data_types::f32: - store_second_output_impl::type>(mem, result); + store_second_output_impl::type>(stream, mem, result); break; default: throw std::runtime_error("Non max supression - unsupported second output data type"); @@ -309,20 +309,20 @@ void store_second_output(memory_impl& mem, const std::vector& re } template -void store_third_output_impl(memory_impl& mem, const std::vector& result) { - mem_lock lock(mem); +void store_third_output_impl(stream& stream, memory::ptr mem, const std::vector& result) { + mem_lock lock(mem, stream); auto ptr = lock.data(); ptr[0] = static_cast(result.size()); } -void store_third_output(memory_impl& mem, const std::vector& result) { - auto data_type = mem.get_layout().data_type; +void store_third_output(stream& stream, memory::ptr mem, const std::vector& result) { + auto data_type = mem->get_layout().data_type; switch (data_type) { case cldnn::data_types::i32: - store_third_output_impl::type>(mem, result); + store_third_output_impl::type>(stream, mem, result); break; case cldnn::data_types::i64: - store_third_output_impl::type>(mem, result); + store_third_output_impl::type>(stream, mem, result); break; default: throw std::runtime_error("Non max supression - unsupported third output data type"); @@ -331,9 +331,10 @@ void store_third_output(memory_impl& mem, const std::vector& res void run(non_max_suppression_inst& instance) { auto prim = instance.node.get_primitive(); + auto& stream = instance.get_network().get_stream(); - auto boxes = load_boxes(instance.input_boxes_mem(), prim->center_point_box); - auto scores = load_scores(instance.input_scores_mem()); + auto boxes = load_boxes(stream, instance.input_boxes_mem(), prim->center_point_box); + auto scores = load_scores(stream, instance.input_scores_mem()); int num_select_per_class = 0; float iou_threshold = 1.f; @@ -341,58 +342,61 @@ void run(non_max_suppression_inst& instance) { float soft_nms_sigma = 0.f; if (instance.has_num_select_per_class()) { - num_select_per_class = load_scalar(instance.num_select_per_class_mem()); + num_select_per_class = load_scalar(stream, instance.num_select_per_class_mem()); } if (instance.has_iou_threshold()) { - iou_threshold = load_scalar(instance.iou_threshold_mem()); + iou_threshold = load_scalar(stream, instance.iou_threshold_mem()); } if (instance.has_score_threshold()) { - score_threshold = load_scalar(instance.score_threshold_mem()); + score_threshold = load_scalar(stream, instance.score_threshold_mem()); } if (instance.has_soft_nms_sigma()) { - soft_nms_sigma = load_scalar(instance.soft_nms_sigma_mem()); + soft_nms_sigma = load_scalar(stream, instance.soft_nms_sigma_mem()); } auto result = run_nms(boxes, scores, num_select_per_class, score_threshold, iou_threshold, soft_nms_sigma, prim->sort_result_descending); if (instance.has_third_output()) { - store_third_output(instance.third_output_mem(), result); + store_third_output(stream, instance.third_output_mem(), result); } if (instance.has_second_output()) { - store_second_output(instance.second_output_mem(), result); - store_first_output(instance.output_memory(), result); + store_second_output(stream, instance.second_output_mem(), result); + store_first_output(stream, instance.output_memory_ptr(), result); return; } - store_result(instance.output_memory(), result); + store_result(stream, instance.output_memory_ptr(), result); } struct non_max_suppression_cpu : typed_primitive_impl { using parent = typed_primitive_impl; + std::unique_ptr clone() const override { + return make_unique(*this); + } + non_max_suppression_cpu() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_cpu") {} - virtual event_impl::ptr execute_impl(const std::vector& event, - typed_primitive_inst& instance) { + virtual event::ptr execute_impl(const std::vector& event, typed_primitive_inst& instance) { for (auto e : event) { e->wait(); } - auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false); + auto& stream = instance.get_network().get_stream(); run(instance); - dynamic_cast(ev.get())->set(); // set as complete - return ev; + return stream.create_user_event(true); } static primitive_impl* create(const non_max_suppression_node&) { return new non_max_suppression_cpu(); } + void init_kernels() override {} }; } // namespace diff --git a/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp index 9a8a025ffb7..7ed0653035f 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp @@ -5,7 +5,7 @@ #include "normalize_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "normalize/normalize_kernel_selector.h" #include "normalize/normalize_kernel_base.h" @@ -21,11 +21,14 @@ struct normalize_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); - args.scale_table = (memory_impl::cptr) &instance.scale_memory(); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); + args.scale_table = instance.scale_memory(); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h deleted file mode 100644 index 5f426c9042f..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once - -#include -#include -#include -#include -#include -#include -#include "device_impl.h" - -namespace cldnn { -namespace gpu { -struct configuration; - -class ocl_builder { -private: - const uint32_t device_type = CL_DEVICE_TYPE_GPU; // cldnn supports only gpu devices - const uint32_t device_vendor = 0x8086; // Intel vendor -public: - ocl_builder() = default; - - std::map get_available_devices(void* user_context, void* user_device) const; - uint32_t get_device_type() const { return device_type; } - uint32_t get_device_vendor() const { return device_vendor; } -private: - bool does_device_match_config(bool out_of_order, const cl::Device& device) const; - std::vector build_device_list(bool out_out_order) const; - std::vector build_device_list_from_user_context(bool out_out_order, void* user_context) const; - std::vector build_device_list_from_user_device(bool out_out_order, void* user_device) const; -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp deleted file mode 100644 index 60298e65a85..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "ocl_queue_wrapper.h" -#include "ocl_base_event.h" -#include "ocl_user_event.h" -#include "command_queues_builder.h" -#include "events_pool.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -// NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation -// of some types (even though we already disabled them in scope of definition of these types). -// Moreover this warning is pretty much now only for annoyance: it is generated due to lack -// of proper support for mangling of custom GCC attributes into type name (usually when used -// with templates, even from standard library). -#if defined __GNUC__ && __GNUC__ >= 6 -#pragma GCC diagnostic ignored "-Wignored-attributes" -#endif - -namespace cldnn { -namespace gpu { - -gpu_queue::gpu_queue(uint32_t id, queue_type queue, std::shared_ptr context) - : id(id), _context(context), _command_queue(queue), _events_pool(new events_pool()) {} - -event_impl::ptr gpu_queue::enqueue_kernel(kernels_cache::kernel_type const& kern, - cl::NDRange const& global, - cl::NDRange const& local, - std::vector const& deps) { - std::vector dep_events; - auto dep_events_ptr = &dep_events; - if (!context()->get_configuration().host_out_of_order) { - for (auto& dep : deps) { - if (auto ocl_base_ev = dynamic_cast(dep.get())) { - dep_events.push_back(ocl_base_ev->get()); - } - } - } else { - dep_events_ptr = nullptr; - - sync_events(deps); - } - - cl::Event ret_ev; - - try { - if (!context()->get_configuration().host_out_of_order || _output_event || - context()->get_configuration().enable_profiling) { - _command_queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local, dep_events_ptr, &ret_ev); - } else { - _command_queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local, dep_events_ptr, nullptr); - } - } catch (cl::Error const& err) { - throw ocl_error(err); - } - - return _events_pool->get_from_base_pool(context(), ret_ev, ++_queue_counter); -} - -event_impl::ptr gpu_queue::enqueue_marker(std::vector const& deps) { - if (deps.empty()) - return _events_pool->get_from_user_pool(context(), true); - - bool enabled_single_kernel = context()->get_configuration().single_kernel_name == "" ? false : true; - if (!context()->get_configuration().host_out_of_order) { - cl::Event ret_ev; - if (!enabled_single_kernel) { - std::vector dep_events; - for (auto& dep : deps) { - if (auto ocl_base_ev = dynamic_cast(dep.get())) - dep_events.push_back(ocl_base_ev->get()); - } - - try { - _command_queue.enqueueMarkerWithWaitList(&dep_events, &ret_ev); - } catch (cl::Error const& err) { - throw ocl_error(err); - } - } else { - try { - _command_queue.enqueueMarkerWithWaitList(nullptr, &ret_ev); - } catch (cl::Error const& err) { - throw ocl_error(err); - } - } - - return _events_pool->get_from_base_pool(context(), ret_ev, ++_queue_counter); - } else { - sync_events(deps); - return _events_pool->get_from_base_pool(context(), _last_barrier_ev, _last_barrier); - } -} - -event_impl::ptr gpu_queue::group_events(std::vector const& deps) { - return _events_pool->get_from_group_pool(context(), deps); -} - -event_impl::ptr gpu_queue::create_user_event(bool set) { return _events_pool->get_from_user_pool(context(), set); } - -void gpu_queue::reset_events() { _events_pool->reset_events(); } - -void gpu_queue::release_events_pool() { _events_pool.reset(); } - -void gpu_queue::flush() { queue().flush(); } - -void gpu_queue::release_pending_memory() { - /* - TODO: Temp. solution, untill proper API calls from OpenCL are released. - */ - void* ptr = nullptr; - ptr = _mm_malloc(4096, 4096); - queue().finish(); - try { - cl::Buffer flusher(context()->context(), CL_MEM_USE_HOST_PTR, (size_t)4096, ptr); - flusher = (cl_mem) nullptr; // clear buffer - } catch (...) { - _mm_free(ptr); - throw; - } - _mm_free(ptr); -} - -void gpu_queue::sync_events(std::vector const& deps) { - bool needs_barrier = false; - for (auto& dep : deps) { - auto* ocl_base_ev = dynamic_cast(dep.get()); - if (ocl_base_ev->get_queue_stamp() > _last_barrier) { - needs_barrier = true; - } - } - - if (needs_barrier) { - try { - if (_output_event) - _command_queue.enqueueBarrierWithWaitList(nullptr, &_last_barrier_ev); - else - _command_queue.enqueueBarrierWithWaitList(nullptr, nullptr); - } catch (cl::Error const& err) { - throw ocl_error(err); - } - - _last_barrier = ++_queue_counter; - } -} - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.h deleted file mode 100644 index 55dabcdcfd0..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_queue_wrapper.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once - -#include "ocl_builder.h" - -#include "kernels_cache.h" -#include "device_info.h" -#include "event_impl.h" -#include "configuration.h" - -#include -#include -#include -#include -#include -#include -#include - -namespace cldnn { -typedef cl::vector> kernels_binaries_vector; -typedef cl::vector kernels_binaries_container; -using queue_type = cl::CommandQueueIntel; -namespace gpu { -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)( - cl_context context, - cl_device_id device, - const cl_queue_properties_khr* properties, - cl_int* errcodeRet); - -class gpu_toolkit; -class events_pool; - -class gpu_queue { -public: - const queue_type& queue() const { return _command_queue; } - gpu_queue(uint32_t id, queue_type queue, std::shared_ptr context); - gpu_queue(gpu_queue&& other) - : id(other.id), - _context(other._context), - _command_queue(other._command_queue), - _queue_counter(other._queue_counter.load()), - _last_barrier(other._last_barrier.load()), - _events_pool(std::move(other._events_pool)), - _last_barrier_ev(other._last_barrier_ev), - _output_event(other._output_event) {} - - gpu_queue& operator=(gpu_queue&& other) { - if (this != &other) { - id = other.id; - _context = std::move(other._context); - _command_queue = std::move(other._command_queue); - _queue_counter = std::move(other._queue_counter.load()); - _last_barrier = std::move(other._last_barrier.load()); - _events_pool = std::move(std::move(other._events_pool)); - _last_barrier_ev = std::move(other._last_barrier_ev); - _output_event = std::move(other._output_event); - } - return *this; - } - - ~gpu_queue() = default; - - void sync_events(std::vector const& deps); - void release_pending_memory(); - void flush(); - - void set_output_event(bool out_event) { _output_event = out_event; } - - event_impl::ptr enqueue_kernel(kernels_cache::kernel_type const& kern, - cl::NDRange const& global, - cl::NDRange const& local, - std::vector const& deps); - event_impl::ptr enqueue_marker(std::vector const& deps); - event_impl::ptr group_events(std::vector const& deps); - void reset_events(); - event_impl::ptr create_user_event(bool set); - void release_events_pool(); - std::shared_ptr context() { return _context.lock(); } - -private: - uint32_t id; - std::weak_ptr _context; - queue_type _command_queue; - std::atomic _queue_counter{0}; - std::atomic _last_barrier{0}; - std::shared_ptr _events_pool; - cl::Event _last_barrier_ev; - bool _output_event = false; -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp deleted file mode 100644 index 3ee0842eb26..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "ocl_toolkit.h" -#include "ocl_base_event.h" -#include "ocl_user_event.h" -#include "command_queues_builder.h" -#include "events_pool.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -// NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation -// of some types (even though we already disabled them in scope of definition of these types). -// Moreover this warning is pretty much now only for annoyance: it is generated due to lack -// of proper support for mangling of custom GCC attributes into type name (usually when used -// with templates, even from standard library). -#if defined __GNUC__ && __GNUC__ >= 6 -#pragma GCC diagnostic ignored "-Wignored-attributes" -#endif - -// static class memebers - pointers to dynamically obtained OpenCL extension functions -cl::PFN_clEnqueueAcquireMediaSurfacesINTEL cl::SharedSurfLock::pfn_acquire = NULL; -cl::PFN_clEnqueueReleaseMediaSurfacesINTEL cl::SharedSurfLock::pfn_release = NULL; -cl::PFN_clCreateFromMediaSurfaceINTEL cl::ImageVA::pfn_clCreateFromMediaSurfaceINTEL = NULL; -#ifdef _WIN32 -cl::PFN_clCreateFromD3D11Buffer cl::BufferDX::pfn_clCreateFromD3D11Buffer = NULL; -#endif - -namespace cldnn { -namespace gpu { - -ocl_error::ocl_error(cl::Error const& err) - : std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {} - -std::mutex gpu_toolkit::cache_mutex; - -std::shared_ptr gpu_toolkit::create(const device_impl& device, const configuration& cfg) { - struct make_shared_wa : public gpu_toolkit { - explicit make_shared_wa(const device_impl& device, const configuration& cfg) - : gpu_toolkit(device, cfg) {} - }; - try { - auto ctx = std::make_shared(device, cfg); - ctx->add_network(0); - return ctx; - } catch (cl::Error const& err) { - throw ocl_error(err); - } -} - -struct gpu_toolkit::ocl_logger { - std::ofstream _log_file; -}; - -gpu_toolkit::gpu_toolkit(const device_impl& device_impl, const configuration& config) - : _configuration(config), - _device(&device_impl), - _neo_driver(strstr(get_device_version().c_str(), "NEO") ? true : false) { - device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions); - - device_cache_reader dc_reader(_configuration.tuning_cache_path); - _device_cache = dc_reader.get(); - - _logger = std::unique_ptr(new ocl_logger()); - if (logging_enabled()) { - auto device_info = get_device_info(); - open_log() << "Engine configuration:\n" - << " profiling: " << std::boolalpha << _configuration.enable_profiling << "\n" - << " meaningful names: " << std::boolalpha << _configuration.meaningful_kernels_names << "\n" - << " dump custom program: " << std::boolalpha << _configuration.dump_custom_program << "\n" - << " vendor type: " << std::hex << std::setfill('0') << std::setw(4) << std::right - << std::to_string(device_info.vendor_id) << "\n" - << std::dec << std::setfill(' ') << std::right - << " compiler options: " << _configuration.compiler_options << "\n" - << " single kernel name: " << _configuration.single_kernel_name << "\n" - << " out-of-order: " << std::boolalpha << config.host_out_of_order << "\n" - << " engine log: " << _configuration.log << "\n" - << " sources dumps: " << _configuration.ocl_sources_dumps_dir << "\n" - << "\nEngine info:\n" - << " cores count: " << device_info.cores_count << "\n" - << " core frequencey: " << device_info.core_frequency << "\n" - << " max work group size: " << device_info.max_work_group_size << "\n" - << " local memory size: " << device_info.max_local_mem_size << "\n" - << " fp16: " << std::boolalpha << (device_info.supports_fp16 != 0) << "\n" - << " fp16 denorms: " << std::boolalpha << (device_info.supports_fp16_denorms != 0) << "\n" - << " subgroups short: " << std::boolalpha << (device_info.supports_subgroups_short != 0) << "\n" - << " local block io: " << std::boolalpha << device_info.supports_local_block_io << "\n" - << " optimization hints: " << std::boolalpha << device_info.supports_optimization_hints << std::endl; - } -} - -gpu_queue& gpu_toolkit::get_command_queue(uint32_t id) { - return _command_queues_w.at(id); -} - -gpu_program_state& gpu_toolkit::get_program_state(uint32_t id) { - std::lock_guard lock(toolkit_mutex); - return *_program_states.at(id); -} - -void gpu_toolkit::add_program(uint32_t prog_id) { - std::lock_guard lock(toolkit_mutex); - _program_states.emplace(std::make_pair(prog_id, std::make_shared(*this, prog_id))); -} - -void gpu_toolkit::remove_program(uint32_t prog_id) { - std::lock_guard lock(toolkit_mutex); - auto state_iter = _program_states.find(prog_id); - - if (state_iter != _program_states.end()) { - _program_states.erase(state_iter); - } -} - -kernels_cache& gpu_toolkit::get_kernels_cache(uint32_t prog_id) { - return get_program_state(prog_id)._kernels_cache; -} - -void gpu_toolkit::add_network(uint32_t net_id) { - std::lock_guard lock(toolkit_mutex); - command_queues_builder queue_builder(context(), device(), _device->get_platform()); - queue_builder.set_profiling(_configuration.enable_profiling); - queue_builder.set_out_of_order((_configuration.host_out_of_order && _neo_driver)); - - bool priorty_extensions = - extension_supported("cl_khr_priority_hints") && extension_supported("cl_khr_create_command_queue"); - queue_builder.set_priority_mode(_configuration.priority_mode, priorty_extensions); - - bool throttle_extensions = - extension_supported("cl_khr_throttle_hints") && extension_supported("cl_khr_create_command_queue"); - queue_builder.set_throttle_mode(_configuration.throttle_mode, throttle_extensions); - - queue_builder.build(); - _command_queues_w.emplace(std::make_pair(net_id, - gpu_queue(net_id, queue_builder.queue(), shared_from_this()))); -} - -void gpu_toolkit::remove_network(uint32_t net_id) { - std::lock_guard lock(toolkit_mutex); - auto net_iter = _command_queues_w.find(net_id); - if (net_iter != _command_queues_w.end()) { - // net_iter->second.release_pending_memory(); - _command_queues_w.erase(net_iter); - } -} - -event_impl::ptr gpu_toolkit::enqueue_kernel(uint32_t queue_id, - kernels_cache::kernel_type const& kern, - cl::NDRange const& global, - cl::NDRange const& local, - std::vector const& deps) { - return get_command_queue(queue_id).enqueue_kernel(kern, global, local, deps); -} - -event_impl::ptr gpu_toolkit::enqueue_marker(uint32_t queue_id, std::vector const& deps) { - return get_command_queue(queue_id).enqueue_marker(deps); -} - -event_impl::ptr gpu_toolkit::group_events(uint32_t queue_id, std::vector const& deps) { - return get_command_queue(queue_id).group_events(deps); -} - -event_impl::ptr gpu_toolkit::create_user_event(uint32_t queue_id, bool set) { - return get_command_queue(queue_id).create_user_event(set); -} - -void gpu_toolkit::reset_events(uint32_t queue_id) { get_command_queue(queue_id).reset_events(); } - -void gpu_toolkit::release_events_pool(uint32_t queue_id) { get_command_queue(queue_id).release_events_pool(); } - -void gpu_toolkit::release_all_events_pools() { - for (auto& queue : _command_queues_w) { - queue.second.release_events_pool(); - } -} - -void gpu_toolkit::flush(uint32_t queue_id) { get_command_queue(queue_id).flush(); } - -void gpu_toolkit::release_pending_memory(uint32_t queue_id) { get_command_queue(queue_id).release_pending_memory(); } - -void gpu_toolkit::wait_for_events(std::vector const& events) { - std::vector clevents; - for (auto& ev : events) { - if (auto ocl_base_ev = dynamic_cast(ev.get())) - clevents.push_back(ocl_base_ev->get()); - } - - try { - cl::WaitForEvents(clevents); - } catch (cl::Error const& err) { - throw ocl_error(err); - } -} - -void gpu_toolkit::log(uint64_t id, std::string const& msg) { - if (_configuration.log.empty()) - return; - - open_log() << "[" << id << "] " << msg << std::endl; -} - -void gpu_toolkit::set_output_event(uint32_t queue_id, bool out_event) { - get_command_queue(queue_id).set_output_event(out_event); -} - -std::ofstream& gpu_toolkit::open_log() { - if (!_logger->_log_file.is_open()) { - _logger->_log_file.open(_configuration.log, std::ios::out | std::ios::trunc); - if (!_logger->_log_file.good()) { - _logger->_log_file.close(); - throw std::runtime_error("Could not initialize ocl_toolkit log file"); - } - - if (!_logger->_log_file.is_open()) { - _logger->_log_file.close(); - throw std::runtime_error("Could not open ocl_toolkit log file '" + _configuration.log + "' for writing"); - } - } - - return _logger->_log_file; -} - -} // namespace gpu - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h deleted file mode 100644 index ba8a3cb8d26..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "device_info.h" -#include "device_impl.h" -#include "kernels_cache.h" -#include "event_impl.h" -#include "configuration.h" -#include "ocl_queue_wrapper.h" -#include "device_cache_reader.h" - -#include -#include -#include -#include -#include -#include - -namespace cldnn { -typedef cl::vector> kernels_binaries_vector; -typedef cl::vector kernels_binaries_container; -using queue_type = cl::CommandQueueIntel; -namespace gpu { -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)( - cl_context context, - cl_device_id device, - const cl_queue_properties_khr* properties, - cl_int* errcodeRet); - -class ocl_error : public std::runtime_error { -public: - explicit ocl_error(cl::Error const& err); -}; - -class events_pool; -class gpu_toolkit; - -class context_holder { -protected: - explicit context_holder(std::shared_ptr context) : _context(context) {} - virtual ~context_holder() = default; - - const std::shared_ptr& context() const { return _context; } - - std::shared_ptr _context; -}; - -struct gpu_program_state { - kernels_cache _kernels_cache; - - gpu_program_state(gpu_toolkit& context, uint32_t prog_id) : - _kernels_cache(context, prog_id) {} -}; - -class gpu_toolkit : public std::enable_shared_from_this { - friend class context_holder; - -protected: - explicit gpu_toolkit(const device_impl& device_impl, - const configuration& aconfiguration = configuration()); - -public: - static std::shared_ptr create(const device_impl& device_impl, - const configuration& cfg = configuration()); - const cl::Context context() const { return _device->get_context(); } - const cl::Device device() const { return _device->get_device(); } - const memory_capabilities memory_caps() const { return _device->mem_caps(); } - const queue_type& queue(uint32_t id) { return get_command_queue(id).queue(); } - - const configuration& get_configuration() const { return _configuration; } - device_info_internal get_device_info() const { return _device->get_info(); } - std::shared_ptr get_device_cache() const { return _device_cache; } - kernels_cache& get_kernels_cache(uint32_t prog_id); - bool get_serialization_flag() { return _serialize; } - void set_serialization_flag(bool serialization_flag) { _serialize = serialization_flag; } - - inline bool extension_supported(const std::string ext) { return _extensions.find(ext) != std::string::npos; } - - gpu_toolkit(const gpu_toolkit& other) = delete; - gpu_toolkit(gpu_toolkit&& other) = delete; - gpu_toolkit& operator=(const gpu_toolkit& other) = delete; - gpu_toolkit& operator=(gpu_toolkit&& other) = delete; - std::string single_kernel_name() const { return _configuration.single_kernel_name; } - bool enabled_single_kernel() const { return single_kernel_name() == "" ? false : true; } - - void set_output_event(uint32_t queue_id, bool out_event); - - event_impl::ptr enqueue_kernel(uint32_t queue_id, - kernels_cache::kernel_type const& kern, - cl::NDRange const& global, - cl::NDRange const& local, - std::vector const& deps); - event_impl::ptr enqueue_marker(uint32_t queue_id, std::vector const& deps); - event_impl::ptr group_events(uint32_t queue_id, std::vector const& deps); - void reset_events(uint32_t queue_id); - event_impl::ptr create_user_event(uint32_t queue_id, bool set); - void release_events_pool(uint32_t queue_id); - void release_all_events_pools(); - - void flush(uint32_t queue_id); - void release_pending_memory(uint32_t queue_id); - void wait_for_events(std::vector const& events); - - void log(uint64_t id, std::string const& msg); - bool logging_enabled() const { return !_configuration.log.empty(); } - bool is_neo_driver() { return _neo_driver; } - void add_network(uint32_t net_id); - void remove_network(uint32_t net_id); - - void add_program(uint32_t prog_id); - void remove_program(uint32_t prog_id); - - std::mutex& get_cache_mutex() { return cache_mutex; } - -private: - configuration _configuration; - device_impl::cptr _device; - bool _neo_driver = false; - std::map> _program_states; - std::map _command_queues_w; - std::shared_ptr _device_cache; - bool _serialize = false; - - std::string _extensions; - - struct ocl_logger; - std::unique_ptr _logger; - - // returns whether a barrier has been added - std::ofstream& open_log(); - - std::string get_device_version() { return device().getInfo(); } - - gpu_queue& get_command_queue(uint32_t id); - gpu_program_state& get_program_state(uint32_t id); - - std::mutex toolkit_mutex; - // mutex for kernels_cache must be static to ensure that all threads run program build in a thread-safe fashion - // including the case when multiple IE cores are created. - static std::mutex cache_mutex; -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp index 1c2704a5b26..218e50f018d 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp @@ -9,7 +9,7 @@ #include "kernel_selector_helper.h" #include "one_hot/one_hot_kernel_selector.h" #include "one_hot/one_hot_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include namespace cldnn { @@ -19,6 +19,10 @@ struct one_hot_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const one_hot_node& arg) { auto oh_params = get_default_params(arg, 1); auto oh_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp index 31bc335b904..4c8e1f54975 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp @@ -5,7 +5,7 @@ #include "permute_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "permute/permute_kernel_selector.h" #include "permute/permute_kernel_ref.h" @@ -19,6 +19,10 @@ struct permute_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const permute_node& arg) { auto permute_params = get_default_params(arg); auto permute_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp index 138a235d6ce..a10439bb1c9 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp @@ -5,7 +5,7 @@ #include "pooling_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "pooling/pooling_kernel_selector.h" #include "pooling/pooling_kernel_base.h" @@ -68,12 +68,15 @@ struct pooling_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); if (!instance.argument.argmax.empty()) - args.inputs.push_back((memory_impl::cptr) &instance.dep_memory(1)); + args.inputs.push_back(instance.dep_memory_ptr(1)); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h index c08058d9c67..9cd55a0f9f4 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h +++ b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h @@ -8,9 +8,7 @@ #include #include "primitive_inst.h" #include "program_impl.h" -#include "kernel.h" -#include "events_waiter.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "network_impl.h" #include "register_gpu.hpp" @@ -31,27 +29,42 @@ For example, all gpu convolution implementations should derive from typed_primit template struct typed_primitive_gpu_impl : public typed_primitive_impl { const typed_program_node& _outer; - device_info_internal _device_info; kernel_selector::kernel_data _kernel_data; - std::vector _kernels; - std::vector _intermediates_memory; + std::vector _kernel_ids; + std::vector _kernels; + std::vector _intermediates_memory; + + typed_primitive_gpu_impl(const typed_primitive_gpu_impl& other) + : typed_primitive_impl(other._weights_reorder_params, other._kernel_name) + , _outer(other._outer) + , _kernel_data(other._kernel_data) + , _kernel_ids(other._kernel_ids) + , _kernels({}) + , _intermediates_memory({}) { + _kernels.reserve(other._kernels.size()); + for (size_t k = 0; k < other._kernels.size(); ++k) { + _kernels.emplace_back(other._kernels[k]->clone()); + } + for (auto& mem : other._intermediates_memory) { + auto& engine = _outer.get_program().get_engine(); + auto new_mem = engine.allocate_memory(mem->get_layout(), mem->get_allocation_type()); + _intermediates_memory.push_back(new_mem); + } + } typed_primitive_gpu_impl(const typed_program_node& arg, const kernel_selector::kernel_data& kd) : typed_primitive_impl(kd.weightsReorderParams, kd.kernelName), _outer(arg), - _device_info(arg.get_program().get_engine().get_context()->get_device_info()), _kernel_data(kd) { // weights reorder params got copied to parent, clear in _kernel_data to release shared ptr _kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE; _kernel_data.weightsReorderParams.cpuKernel = nullptr; _kernel_data.weightsReorderParams.clKernel = nullptr; - _kernels.reserve(kd.kernels.size()); + _kernel_ids.reserve(kd.kernels.size()); + // Add selected kernels to kernels_cache for the following compilation and save output ids for (size_t i = 0; i < kd.kernels.size(); ++i) { - gpu::kernel kernel(_outer.get_program().get_engine().get_context(), - kd.kernels[i].kernelString, - _outer.get_program().get_id()); - _kernels.emplace_back(std::move(kernel)); + _kernel_ids.emplace_back(_outer.get_program().add_kernel(kd.kernels[i].code.kernelString)); } for (auto size : kd.internalBufferSizes) { @@ -62,7 +75,7 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl { {1, 1, 1, (tensor::value_type)(size / bpp)}}; auto& eimpl = arg.get_program().get_engine(); - _intermediates_memory.push_back(eimpl.allocate_memory(expected_layout, 0)); + _intermediates_memory.push_back(eimpl.allocate_memory(expected_layout)); } } bool is_cpu() const override { return false; } @@ -70,22 +83,21 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl { protected: virtual bool optimized_out(typed_primitive_inst&) const { return false; } - virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t /*split*/) const { - kernel::kernel_arguments_data args; + virtual kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t /*split*/) const { + kernel_arguments_data args; for (size_t i = 0; i < instance.inputs_memory_count(); i++) { - args.inputs.push_back((memory_impl::cptr)&instance.input_memory(i)); + args.inputs.push_back(instance.input_memory_ptr(i)); } if (instance.has_fused_primitives()) { size_t count = instance.get_fused_mem_count(); for (size_t i = 0; i < count; i++) { - args.fused_op_inputs.push_back((memory_impl::cptr) &instance.fused_memory(i)); + args.fused_op_inputs.push_back(instance.fused_memory(i)); } } - args.output = (memory_impl::cptr) &instance.output_memory(); + args.output = instance.output_memory_ptr(); return args; } @@ -94,93 +106,96 @@ protected: virtual uint32_t get_groups() const { return 1; } virtual bool get_depthwise_sep_opt() const { return false; } - event_impl::ptr aggregate_events(const std::vector& events, - uint32_t net_id, - bool group = false) const { - if (events.size() == 1) + event::ptr aggregate_events(const std::vector& events, stream& stream, bool group = false, bool is_output = false) const { + if (events.size() == 1 && !is_output) return events[0]; - if (group) - return _outer.get_program().get_engine().get_context()->group_events(net_id, events); + if (group && !is_output) + return stream.group_events(events); - return events_waiter(_outer.get_program().get_engine().get_context()).run(net_id, events); + return stream.enqueue_marker(events, is_output); + } + + void init_kernels() override { + if (is_cpu()) { + return; + } + _kernels.clear(); + + _kernels.reserve(_kernel_ids.size()); + for (size_t k = 0; k < _kernel_ids.size(); ++k) { + _kernels.emplace_back(std::move(_outer.get_program().get_kernel(_kernel_ids[k]))); + } } void set_arguments_impl(typed_primitive_inst& instance) override { - uint32_t net_id = instance.get_network().get_id(); if (optimized_out(instance) || is_cpu()) { return; } auto split = get_split(); + stream& stream = instance.get_network().get_stream(); + // we iterate over split first in order to be able parallelism with OOOQ mechanism. for (size_t k = 0; k < _kernels.size(); ++k) { for (decltype(split) i = 0; i < split; i++) { auto args = get_arguments(instance, i); - args.scalars = &_kernel_data.kernels[k].scalars; + args.scalars = &_kernel_data.kernels[k].params.scalars; args.split = i; for (const auto& m : _intermediates_memory) { args.intermediates.push_back(m); } - _kernels[k].set_arguments(net_id, _kernel_data.kernels[k], args); + + stream.set_arguments(*_kernels[k], _kernel_data.kernels[k].params, args); } } } - void cleanup_impl(typed_primitive_inst& instance) override { - uint32_t net_id = instance.get_network().get_id(); - if (optimized_out(instance) || is_cpu()) { - return; - } - - for (size_t k = 0; k < _kernels.size(); ++k) { - _kernels[k].cleanup(net_id); - } - } - - event_impl::ptr execute_impl(const std::vector& events, - typed_primitive_inst& instance) override { - uint32_t net_id = instance.get_network().get_id(); + event::ptr execute_impl(const std::vector& events, + typed_primitive_inst& instance) override { + stream& stream = instance.get_network().get_stream(); if (optimized_out(instance)) { - return aggregate_events(events, net_id); + return aggregate_events(events, stream, false, instance.is_output()); } - std::vector tmp_events(events); - std::vector all_events; + std::vector tmp_events(events); + std::vector all_events; // TODO - split should be handle in kernel selector by providing multiple kernels. auto split = get_split(); // we iterate over split first in order to be able parallelism with OOOQ mechanism. for (size_t k = 0; k < _kernels.size(); ++k) { - std::vector new_events; + std::vector new_events; for (decltype(split) i = 0; i < split; i++) { - // is any user of the prim's users is an detecion output, set prim as a output event (event won't be - // nullptr) + // is any user of the prim's users is an detecion output, set prim as a output event (event won't be nullptr) auto users = instance.node.get_users(); - bool next_prim_is_cpu = is_any_user_cpu(users); - if (next_prim_is_cpu) { - _kernels[k].set_output_event(net_id, true); - } else { - _kernels[k].set_output_event(net_id, instance.node.is_output()); + bool is_output_event = is_any_user_cpu(users) || instance.node.is_output(); + + auto args = get_arguments(instance, i); + args.scalars = &_kernel_data.kernels[k].params.scalars; + args.split = i; + + for (const auto& m : _intermediates_memory) { + args.intermediates.push_back(m); } - auto event = _kernels[k].run(net_id, _kernel_data.kernels[k], tmp_events); - new_events.push_back(event); - all_events.push_back(event); + auto ev = stream.enqueue_kernel(*_kernels[k], _kernel_data.kernels[k].params, args, tmp_events, is_output_event); + new_events.push_back(ev); + all_events.push_back(ev); } tmp_events = new_events; } if ((all_events.size() == 0) && (tmp_events.size() > 0)) - return aggregate_events(tmp_events, net_id); + return aggregate_events(tmp_events, stream); bool group_events = (all_events.size() > 1); - return aggregate_events(all_events, net_id, group_events); + return aggregate_events(all_events, stream, group_events); } }; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp index 88970323467..3bcc61c5ad0 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp @@ -3,12 +3,11 @@ // #include "proposal_inst.h" -#include "kernel.h" +#include "cldnn/runtime/engine.hpp" #include "implementation_map.h" #include "network_impl.h" -#include "engine_impl.h" #include "math_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "register_gpu.hpp" #include @@ -197,10 +196,14 @@ struct proposal_gpu : typed_primitive_impl { explicit proposal_gpu(const proposal_node& arg) : outer(arg) {} + std::unique_ptr clone() const override { + return make_unique(*this); + } + template - void read_image_info(proposal_inst& instance, im_info_t& im_info) { - auto& image_info = instance.dep_memory(proposal_inst::image_info_index); - mem_lock image_info_ptr{image_info}; + void read_image_info(stream& stream, proposal_inst& instance, im_info_t& im_info) { + auto image_info = instance.dep_memory_ptr(proposal_inst::image_info_index); + mem_lock image_info_ptr{image_info, stream}; const dtype* image_info_mem = image_info_ptr.data(); bool swap_xy = instance.argument.swap_xy; @@ -212,7 +215,7 @@ struct proposal_gpu : typed_primitive_impl { int min_bbox_x = 1; int min_bbox_y = 1; - auto image_info_size = image_info.get_layout().size; + auto image_info_size = image_info->get_layout().size; auto image_info_count = image_info_size.feature[0] == 1 ? image_info_size.batch[0] : image_info_size.feature[0]; int scaled_min_bbox_size = instance.argument.min_bbox_size; @@ -259,13 +262,13 @@ struct proposal_gpu : typed_primitive_impl { } template - void execute(proposal_inst& instance, im_info_t im_info, dtype* proposal_prob_ptr = nullptr) { + void execute(stream& stream, proposal_inst& instance, im_info_t im_info, dtype* proposal_prob_ptr = nullptr) { const std::vector& anchors = instance.get_anchors(); size_t anchors_num = anchors.size(); - auto& cls_scores = instance.dep_memory(proposal_inst::cls_scores_index); - auto& bbox_pred = instance.dep_memory(proposal_inst::bbox_pred_index); + auto cls_scores = instance.dep_memory_ptr(proposal_inst::cls_scores_index); + auto bbox_pred = instance.dep_memory_ptr(proposal_inst::bbox_pred_index); bool swap_xy = instance.argument.swap_xy; bool initial_clip = instance.argument.initial_clip; @@ -277,14 +280,14 @@ struct proposal_gpu : typed_primitive_impl { bool for_deformable = instance.argument.for_deformable; // feat map sizes - const auto& score_size = cls_scores.get_layout().size; + const auto& score_size = cls_scores->get_layout().size; int fm_h = score_size.spatial[1]; int fm_w = score_size.spatial[0]; int fm_sz = fm_w * fm_h; - mem_lock cls_scores_ptr{cls_scores}; - mem_lock bbox_pred_ptr{bbox_pred}; + mem_lock cls_scores_ptr{cls_scores, stream}; + mem_lock bbox_pred_ptr{bbox_pred, stream}; const dtype* cls_scores_mem = cls_scores_ptr.data(); const dtype* bbox_pred_mem = bbox_pred_ptr.data(); @@ -347,9 +350,9 @@ struct proposal_gpu : typed_primitive_impl { instance.argument.post_nms_topn, coordinates_offset); - auto& output = instance.output_memory(); + auto output = instance.output_memory_ptr(); - mem_lock output_ptr{output}; + mem_lock output_ptr{output, stream}; dtype* top_data = output_ptr.data() + n * instance.argument.post_nms_topn * 5; dtype* top_data_prob = proposal_prob_ptr == nullptr ? nullptr : proposal_prob_ptr + n * instance.argument.post_nms_topn; @@ -386,17 +389,19 @@ struct proposal_gpu : typed_primitive_impl { } } - event_impl::ptr execute_impl(const std::vector& events, proposal_inst& instance) override { + event::ptr execute_impl(const std::vector& events, proposal_inst& instance) override { for (auto& a : events) { a->wait(); } - auto ev = instance.get_network().get_engine().create_user_event(instance.get_network().get_id(), false); + auto& stream = instance.get_network().get_stream(); + + auto ev = instance.get_network().get_stream().create_user_event(false); im_info_t im_info; if (instance.dep_memory(proposal_inst::image_info_index).get_layout().data_type == data_types::f16) { - read_image_info::type>(instance, im_info); + read_image_info::type>(stream, instance, im_info); } else { - read_image_info::type>(instance, im_info); + read_image_info::type>(stream, instance, im_info); } if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type != @@ -404,19 +409,19 @@ struct proposal_gpu : typed_primitive_impl { throw std::runtime_error("clDNN: proposal primitive doesn't support mixed bbox and scores types"); if (instance.dependencies().size() == 4) { - auto &proposal_probabilities = instance.dep_memory(proposal_inst::proposal_probabilities_out); + auto proposal_probabilities = instance.dep_memory_ptr(proposal_inst::proposal_probabilities_out); if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) { - mem_lock::type> proposal_prob_ptr{proposal_probabilities}; - execute::type>(instance, im_info, proposal_prob_ptr.data()); + mem_lock::type> proposal_prob_ptr{proposal_probabilities, stream}; + execute::type>(stream, instance, im_info, proposal_prob_ptr.data()); } else { - mem_lock::type> proposal_prob_ptr{proposal_probabilities}; - execute::type>(instance, im_info, proposal_prob_ptr.data()); + mem_lock::type> proposal_prob_ptr{proposal_probabilities, stream}; + execute::type>(stream, instance, im_info, proposal_prob_ptr.data()); } } else { if (instance.dep_memory(proposal_inst::cls_scores_index).get_layout().data_type == data_types::f16) { - execute::type>(instance, im_info); + execute::type>(stream, instance, im_info); } else { - execute::type>(instance, im_info); + execute::type>(stream, instance, im_info); } } @@ -424,6 +429,8 @@ struct proposal_gpu : typed_primitive_impl { return ev; } + void init_kernels() override {} + static primitive_impl* create(const proposal_node& arg) { const layout& l = arg.image_info().get_output_layout(); const size_t count = l.size.feature[0] == 1 ? static_cast(l.size.batch[0]) : static_cast(l.size.feature[0]); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/push_pop_map.h b/inference-engine/thirdparty/clDNN/src/gpu/push_pop_map.h deleted file mode 100644 index c4fc57ed14c..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/push_pop_map.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include -#include -#include -#include -#include - -namespace cldnn { -namespace gpu { - -template , - class Allocator = std::allocator>> -class push_pop_map { - std::mutex _mutex; - std::map _map; - -public: - void push(const Key& key, Type value) { - std::lock_guard lock{_mutex}; - _map.insert({key, std::move(value)}); - } - - Type pop(const Key& key) { - std::lock_guard lock{_mutex}; - auto it = _map.find(key); - if (it == _map.end()) - throw std::out_of_range("Invalud push_pop_map key"); - auto x = std::move(it->second); - _map.erase(it); - return std::move(x); - } - - bool empty() { - std::lock_guard lock{_mutex}; - return _map.empty(); - } -}; - -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp index 3b16e678c2c..8259257d92c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp @@ -7,7 +7,7 @@ #include "kernel_selector_helper.h" #include "pyramid_roi_align/pyramid_roi_align_kernel_selector.h" #include "pyramid_roi_align/pyramid_roi_align_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "pyramid_roi_align_inst.h" #include "network_impl.h" @@ -20,6 +20,10 @@ struct pyramid_roi_align_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const pyramid_roi_align_node& arg) { auto prim = arg.get_primitive(); auto params = get_default_params(arg, 1); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp index aefaa20eb8c..627a2e1799a 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "quantize/quantize_kernel_selector.h" #include "quantize/quantize_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -19,23 +19,26 @@ struct quantize_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args; + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args; for (size_t i = 0; i < instance.inputs_memory_count(); i++) { - args.inputs.push_back((memory_impl::cptr) &instance.input_memory(i)); + args.inputs.push_back(instance.input_memory_ptr(i)); } if (instance.node.get_scale_shift_opt()) { if (instance.node.get_dependencies().size() == 9) { - args.inputs.push_back((memory_impl::cptr) &instance.dep_memory(5)); - args.inputs.push_back((memory_impl::cptr) &instance.dep_memory(6)); - args.inputs.push_back((memory_impl::cptr) &instance.dep_memory(7)); - args.inputs.push_back((memory_impl::cptr) &instance.dep_memory(8)); + args.inputs.push_back(instance.dep_memory_ptr(5)); + args.inputs.push_back(instance.dep_memory_ptr(6)); + args.inputs.push_back(instance.dep_memory_ptr(7)); + args.inputs.push_back(instance.dep_memory_ptr(8)); } } - args.output = (memory_impl::cptr) &instance.output_memory(); + args.output = instance.output_memory_ptr(); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp index 2bf2f7535b2..5ad63259500 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp @@ -9,7 +9,7 @@ #include "reduce/reduce_kernel_selector.h" #include "reduce/reduce_kernel_ref.h" #include "reduce/reduce_kernel_b_fs_yx_fsv16.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "data_inst.h" using namespace cldnn; @@ -53,6 +53,10 @@ struct reduce_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const reduce_node& arg) { auto reduce_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp index 5bbe70ce8ed..e5518f66cb8 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "region_yolo/region_yolo_kernel_selector.h" #include "region_yolo/region_yolo_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -17,6 +17,10 @@ struct region_yolo_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const region_yolo_node& arg) { auto ry_params = get_default_params(arg); auto ry_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp index 6bba0170822..d6d2db15d6b 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp @@ -5,69 +5,69 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/activation.hpp" -#include "api/arg_max_min.hpp" -#include "api/average_unpooling.hpp" -#include "api/batch_to_space.hpp" -#include "api/binary_convolution.hpp" -#include "api/border.hpp" -#include "api/broadcast.hpp" -#include "api/concatenation.hpp" -#include "api/condition.hpp" -#include "api/convolution.hpp" -#include "api/crop.hpp" -#include "api/custom_gpu_primitive.hpp" -#include "api/data.hpp" -#include "api/deconvolution.hpp" -#include "api/depth_to_space.hpp" -#include "api/detection_output.hpp" -#include "api/eltwise.hpp" -#include "api/fully_connected.hpp" -#include "api/gather.hpp" -#include "api/gather_nd.hpp" -#include "api/gemm.hpp" -#include "api/input_layout.hpp" -#include "api/lrn.hpp" -#include "api/lstm.hpp" -#include "api/lstm_dynamic.hpp" -#include "api/max_unpooling.hpp" -#include "api/mutable_data.hpp" -#include "api/mvn.hpp" -#include "api/normalize.hpp" -#include "api/one_hot.hpp" -#include "api/permute.hpp" -#include "api/pooling.hpp" -#include "api/prior_box.hpp" -#include "api/proposal.hpp" -#include "api/pyramid_roi_align.hpp" -#include "api/quantize.hpp" -#include "api/reduce.hpp" -#include "api/region_yolo.hpp" -#include "api/reorder.hpp" -#include "api/reorg_yolo.hpp" -#include "api/reshape.hpp" -#include "api/reverse_sequence.hpp" -#include "api/roi_pooling.hpp" -#include "api/scale.hpp" -#include "api/scatter_update.hpp" -#include "api/scatter_elements_update.hpp" -#include "api/scatter_nd_update.hpp" -#include "api/select.hpp" -#include "api/shuffle_channels.hpp" -#include "api/softmax.hpp" -#include "api/space_to_batch.hpp" -#include "api/strided_slice.hpp" -#include "api/tile.hpp" -#include "api/resample.hpp" -#include "api/gather_tree.hpp" -#include "api_extension/fused_conv_eltwise.hpp" -#include "api_extension/lstm_dynamic_input.hpp" -#include "api_extension/lstm_dynamic_timeloop.hpp" +#include "cldnn/primitives/activation.hpp" +#include "cldnn/primitives/arg_max_min.hpp" +#include "cldnn/primitives/average_unpooling.hpp" +#include "cldnn/primitives/batch_to_space.hpp" +#include "cldnn/primitives/binary_convolution.hpp" +#include "cldnn/primitives/border.hpp" +#include "cldnn/primitives/broadcast.hpp" +#include "cldnn/primitives/concatenation.hpp" +#include "cldnn/primitives/condition.hpp" +#include "cldnn/primitives/convolution.hpp" +#include "cldnn/primitives/crop.hpp" +#include "cldnn/primitives/custom_gpu_primitive.hpp" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/deconvolution.hpp" +#include "cldnn/primitives/depth_to_space.hpp" +#include "cldnn/primitives/detection_output.hpp" +#include "cldnn/primitives/eltwise.hpp" +#include "cldnn/primitives/fully_connected.hpp" +#include "cldnn/primitives/gather.hpp" +#include "cldnn/primitives/gather_nd.hpp" +#include "cldnn/primitives/gemm.hpp" +#include "cldnn/primitives/input_layout.hpp" +#include "cldnn/primitives/lrn.hpp" +#include "cldnn/primitives/lstm.hpp" +#include "cldnn/primitives/lstm_dynamic.hpp" +#include "cldnn/primitives/max_unpooling.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/mvn.hpp" +#include "cldnn/primitives/normalize.hpp" +#include "cldnn/primitives/one_hot.hpp" +#include "cldnn/primitives/permute.hpp" +#include "cldnn/primitives/pooling.hpp" +#include "cldnn/primitives/prior_box.hpp" +#include "cldnn/primitives/proposal.hpp" +#include "cldnn/primitives/pyramid_roi_align.hpp" +#include "cldnn/primitives/quantize.hpp" +#include "cldnn/primitives/reduce.hpp" +#include "cldnn/primitives/region_yolo.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/reorg_yolo.hpp" +#include "cldnn/primitives/reshape.hpp" +#include "cldnn/primitives/reverse_sequence.hpp" +#include "cldnn/primitives/roi_pooling.hpp" +#include "cldnn/primitives/scale.hpp" +#include "cldnn/primitives/scatter_update.hpp" +#include "cldnn/primitives/scatter_elements_update.hpp" +#include "cldnn/primitives/scatter_nd_update.hpp" +#include "cldnn/primitives/select.hpp" +#include "cldnn/primitives/shuffle_channels.hpp" +#include "cldnn/primitives/softmax.hpp" +#include "cldnn/primitives/space_to_batch.hpp" +#include "cldnn/primitives/strided_slice.hpp" +#include "cldnn/primitives/tile.hpp" +#include "cldnn/primitives/resample.hpp" +#include "cldnn/primitives/gather_tree.hpp" +#include "cldnn/primitives/fused_conv_eltwise.hpp" +#include "cldnn/primitives/lstm_dynamic_input.hpp" +#include "cldnn/primitives/lstm_dynamic_timeloop.hpp" +#include "cldnn/primitives/non_max_suppression.hpp" +#include "cldnn/primitives/grn.hpp" +#include "cldnn/primitives/ctc_greedy_decoder.hpp" +#include "cldnn/primitives/loop.hpp" #include "generic_layer.hpp" -#include "api/non_max_suppression.hpp" -#include "api/grn.hpp" -#include "api/ctc_greedy_decoder.hpp" -#include "api/loop.hpp" namespace cldnn { namespace gpu { diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp index e3961faaa00..93fafb1d14c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "reorder/reorder_kernel_selector.h" #include "reorder/reorder_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -17,20 +17,24 @@ struct reorder_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: bool optimized_out(reorder_inst& instance) const override { return parent::optimized_out(instance) || _outer.can_be_optimized(); } - kernel::kernel_arguments_data get_arguments(reorder_inst& instance, int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); + kernel_arguments_data get_arguments(reorder_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); auto input = &instance.input_memory(); auto input_layout = input->get_layout(); if (_outer.has_mean()) { if (input_layout.format == cldnn::format::nv12) { - args.bias = (memory_impl::cptr) &instance.mean_nv12_memory(); + args.bias = instance.mean_nv12_memory(); } else { - args.bias = (memory_impl::cptr) &instance.mean_memory(); + args.bias = instance.mean_memory(); } } return args; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp index b8badf3584e..5cc78810ad4 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "reorg_yolo/reorg_yolo_kernel_selector.h" #include "reorg_yolo/reorg_yolo_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -17,6 +17,10 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const reorg_yolo_node& arg) { auto ry_params = get_default_params(arg); auto ry_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp index 18697841ac4..1ec432673ee 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp @@ -5,7 +5,7 @@ #include "resample_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "kernel_selector/core/actual_kernels/resample/resample_kernel_selector.h" #include "kernel_selector/core/actual_kernels/resample/resample_kernel_base.h" @@ -100,6 +100,10 @@ struct resample_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const resample_node& arg) { auto us_params = get_default_params(arg); auto us_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp index 04a89b42002..5a78f82cca1 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "reshape/reshape_kernel_ref.h" #include "reshape/reshape_kernel_selector.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -17,6 +17,10 @@ struct reshape_gpu : public typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(reshape_node const& arg) { if (arg.can_be_optimized()) { diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp index 01a03b9a251..01d6aaf8933 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "reverse_sequence/reverse_sequence_kernel_selector.h" #include "reverse_sequence/reverse_sequence_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -18,6 +18,10 @@ struct reverse_sequence_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const reverse_sequence_node& arg) { auto reverse_sequence_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp index c8f1fde2290..fef441b15b8 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp @@ -5,7 +5,7 @@ #include "roi_pooling_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "roi_pooling/roi_pooling_kernel_selector.h" #include "roi_pooling/roi_pooling_kernel_ref.h" @@ -37,20 +37,23 @@ struct roi_pooling_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, - int32_t) const override { - kernel::kernel_arguments_data args; + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t) const override { + kernel_arguments_data args; if (instance.argument.mode == pooling_mode::deformable_bilinear && !instance.argument.no_trans) args.inputs = { - (memory_impl::cptr) &instance.input_memory(), - (memory_impl::cptr) &instance.rois_memory(), - (memory_impl::cptr) &instance.trans_memory()}; + instance.input_memory_ptr(), + instance.rois_memory(), + instance.trans_memory()}; else - args.inputs = {(memory_impl::cptr) &instance.input_memory(), (memory_impl::cptr) &instance.rois_memory()}; + args.inputs = {instance.input_memory_ptr(), instance.rois_memory()}; - args.output = (memory_impl::cptr) &instance.output_memory(); + args.output = instance.output_memory_ptr(); return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp index 0f6a91b3f54..cfdf1a3085c 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "eltwise/eltwise_kernel_selector.h" #include "eltwise/eltwise_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -19,14 +19,18 @@ struct scale_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + protected: - kernel::kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { - kernel::kernel_arguments_data args = parent::get_arguments(instance, split); - args.inputs = {(memory_impl::cptr) &instance.input_memory(), (memory_impl::cptr) &instance.scale_memory()}; - args.output = (memory_impl::cptr) &instance.output_memory(); + kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { + kernel_arguments_data args = parent::get_arguments(instance, split); + args.inputs = {instance.input_memory_ptr(), instance.scale_memory()}; + args.output = instance.output_memory_ptr(); if (_outer.bias_term()) { - args.inputs.push_back((memory_impl::cptr) &instance.bias_memory()); + args.inputs.push_back(instance.bias_memory()); } return args; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp index 00d756142fd..85e482e02e0 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "scatter_update/scatter_elements_update_kernel_selector.h" #include "scatter_update/scatter_elements_update_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -38,6 +38,10 @@ struct scatter_elements_update_gpu : typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const scatter_elements_update_node& arg) { auto scatter_elements_update_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp index c9014120b51..445361bf5b8 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "scatter_update/scatter_nd_update_kernel_selector.h" #include "scatter_update/scatter_nd_update_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -19,6 +19,10 @@ struct scatter_nd_update_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const scatter_nd_update_node& arg) { auto scatter_nd_update_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp index 8bf3e7122a2..10629a08090 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "scatter_update/scatter_update_kernel_selector.h" #include "scatter_update/scatter_update_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -38,6 +38,10 @@ struct scatter_update_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const scatter_update_node& arg) { auto scatter_update_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp index 88d08811305..09572da455a 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp @@ -5,7 +5,7 @@ #include "select_inst.h" #include "primitive_gpu_base.h" #include "implementation_map.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" #include "select/select_kernel_selector.h" #include "select/select_kernel_base.h" @@ -17,6 +17,10 @@ struct select_gpu : typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const select_node& arg) { auto select_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp index 95a7705aa3a..b7a29684772 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "shuffle_channels/shuffle_channels_kernel_selector.h" #include "shuffle_channels/shuffle_channels_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -19,6 +19,10 @@ struct shuffle_channels_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const shuffle_channels_node& arg) { auto shuffle_channels_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp index 70cdf63d3cf..868ca8913dd 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "softmax/softmax_kernel_selector.h" #include "softmax/softmax_kernel_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" namespace cldnn { namespace gpu { @@ -17,6 +17,10 @@ struct softmax_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + static primitive_impl* create(const softmax_node& arg) { auto sm_params = get_default_params(arg); auto sm_optional_params = diff --git a/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp index 5961ee1858c..ebc97b591f2 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "space_to_batch/space_to_batch_kernel_selector.h" #include "space_to_batch/space_to_batch_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "data_inst.h" #include @@ -20,6 +20,10 @@ struct space_to_batch_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const space_to_batch_node& arg) { auto space_to_batch_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp index 0866cebd143..bf92acea8b4 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "space_to_depth/space_to_depth_kernel_selector.h" #include "space_to_depth/space_to_depth_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -18,6 +18,10 @@ struct space_to_depth_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const space_to_depth_node& arg) { auto space_to_depth_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp index d78a08eef69..d1ddf2d4d69 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "strided_slice/strided_slice_kernel_ref.h" #include "strided_slice/strided_slice_kernel_selector.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "data_inst.h" #include @@ -21,6 +21,10 @@ struct strided_slice_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const strided_slice_node& arg) { auto params = get_default_params(arg); @@ -30,21 +34,22 @@ public: // Getting data from constant inputs. There are 3 args: Begin, End, Stride for (size_t i = 1; i < arg.get_dependencies().size(); ++i) { auto& input = arg.get_dependency(i).as(); - auto& mem = input.get_attached_memory(); + auto mem = input.get_attached_memory_ptr(); std::vector sizes; if (input.get_output_layout().data_type == cldnn::data_types::i64) { - int64_t* data = static_cast(mem.lock()); + mem_lock lock{mem, arg.get_program().get_stream()}; + int64_t* data = lock.data(); std::vector sizes_i64 = std::vector(data, data + input.get_output_layout().count()); sizes.resize(sizes_i64.size()); for (size_t j = 0; j < sizes.size(); j++) sizes[j] = static_cast(sizes_i64[j]); } else { - int32_t* data = static_cast(mem.lock()); + mem_lock lock{mem, arg.get_program().get_stream()}; + int32_t* data = lock.data(); sizes = std::vector(data, data + input.get_output_layout().count()); } pad_vector_to_size(sizes, dims_num, i != 1); // for "begin" completion used 0 value, for other - 1 params.striding_params.push_back(sizes); - mem.unlock(); } params.end_mask = arg.get_primitive()->end_mask; diff --git a/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp index 02f95954f58..d508e2bea31 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp @@ -8,7 +8,7 @@ #include "kernel_selector_helper.h" #include "tile/tile_kernel_selector.h" #include "tile/tile_kernel_ref.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" using namespace cldnn; @@ -19,6 +19,10 @@ struct tile_gpu : typed_primitive_gpu_impl { using parent = typed_primitive_gpu_impl; using parent::parent; + std::unique_ptr clone() const override { + return make_unique(*this); + } + public: static primitive_impl* create(const tile_node& arg) { auto tile_params = get_default_params(arg); diff --git a/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp index 38a822dca52..00c74080a38 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp @@ -10,7 +10,6 @@ #include "register_gpu.hpp" #include "network_impl.h" -#include "events_waiter.h" #include namespace cldnn { @@ -20,13 +19,16 @@ class wait_for_events_gpu : public primitive_impl { public: explicit wait_for_events_gpu(const program_node& /*node*/) {} - void set_arguments(primitive_inst& /*instance*/) override {} - void cleanup(primitive_inst& /*instance*/) override {} + std::unique_ptr clone() const override { + return make_unique(*this); + } - event_impl::ptr execute(const std::vector& events, primitive_inst& instance) override { - uint32_t net_id = instance.get_network().get_id(); - events_waiter events_waiter(instance.get_network().get_engine().get_context()); - return events_waiter.run(net_id, events); + void init_kernels() override {} + void set_arguments(primitive_inst& /*instance*/) override {} + + event::ptr execute(const std::vector& events, primitive_inst& instance) override { + auto& stream = instance.get_network().get_stream(); + return stream.enqueue_marker(events); } bool validate(const primitive_inst&) const override { return true; } diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp index 7d61c77158d..249325eaacc 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp @@ -49,7 +49,7 @@ void add_required_reorders::run(program_impl& p) { auto& usr = *usr_itr++; if (usr->get_dependencies().size() == 0) continue; // only nodes with dependencies - if (usr->is_type() || usr->is_type()) + if (usr->is_type()) continue; if (usr->type()->does_an_implementation_exist(p.get_engine(), *usr)) continue; diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp index 540a81707aa..1dd8b0af12a 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/basic_memory_dependencies.cpp @@ -9,7 +9,7 @@ #include "layout_optimizer.h" #include "program_impl.h" #include "program_helpers.h" -#include "cldnn_itt.h" +#include "runtime/cldnn_itt.hpp" #include #include #include diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp index 387512cae04..00f74be8593 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp @@ -25,9 +25,8 @@ void calculate_prior_boxes::run(program_impl& p) { p.remove_connection(pb_node.input(), pb_node); auto result = pb_node.get_result_buffer(); - auto cpp_mem = memory(result.detach()); - auto& data_node = p.get_or_create(std::make_shared("_cldnn_tmp_" + pb_node.id() + "_result", cpp_mem)); + auto& data_node = p.get_or_create(std::make_shared("_cldnn_tmp_" + pb_node.id() + "_result", result)); p.replace(pb_node, data_node); } } diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp index fe236c180d8..8c3335aecbb 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp @@ -5,19 +5,18 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "pass_manager.h" -#include "internal_primitive.h" #include "data_inst.h" #include "mutable_data_inst.h" #include "program_node.h" -#include "engine_impl.h" -#include "cldnn_itt.h" +#include "cldnn/runtime/engine.hpp" +#include "runtime/cldnn_itt.hpp" using namespace cldnn; void compile_graph::run(program_impl& p) { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph"); for (auto& node : p.get_processing_order()) { - if (!node->is_type() && !node->is_type()) { + if (!node->is_type()) { node->get_output_layout(); if (!node->is_type() && !(node->is_type() && node->get_dependencies().empty())) { node->selected_impl = node->type()->choose_impl(p.get_engine(), *node); diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp index 2e949169e3d..9acd92fc726 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/concat_input_order.cpp @@ -9,7 +9,7 @@ #include "convolution_inst.h" #include "fully_connected_inst.h" #include "data_inst.h" -#include "memory_impl.h" +#include "cldnn/runtime/memory.hpp" #include "program_impl.h" #include @@ -21,7 +21,7 @@ namespace { using shuffle_range = std::pair; -bool can_shuffle_features(program_node& node) { +bool can_shuffle_features(program_node& node, stream& stream) { if (node.is_type()) { auto& conv_node = node.as(); auto& wei_node = conv_node.weights(); @@ -46,7 +46,7 @@ bool can_shuffle_features(program_node& node) { if (pass_through) { // Primitives that are feature order invariant, pass-through shuffled features to users for (auto& user : node.get_users()) { - if (!can_shuffle_features(*user)) + if (!can_shuffle_features(*user, stream)) return false; } return true; @@ -55,17 +55,19 @@ bool can_shuffle_features(program_node& node) { return false; } -void shuffle_weights(data_node& node, const std::vector& ranges) { +void shuffle_weights(data_node& node, const std::vector& ranges, stream& stream) { // Correct for shuffled features by shuffling input feature dimension in weights. // This allows to restore correct feature order on output and only changes calculation order. auto wei_layout = node.get_output_layout(); - auto& old_weights_memory = node.get_attached_memory(); + auto old_weights_memory = node.get_attached_memory_ptr(); bool need_reset = static_cast(wei_layout.data_padding) || wei_layout.format.is_blocked(); - auto new_weights_memory = old_weights_memory.get_engine()->allocate_memory(wei_layout, old_weights_memory.get_net_id(), need_reset); + auto new_weights_memory = old_weights_memory->get_engine()->allocate_memory(wei_layout, old_weights_memory->get_allocation_type(), need_reset); auto bytes_per_elem = data_type_traits::size_of(wei_layout.data_type); - auto old_ptr = static_cast(old_weights_memory.lock()); - auto new_ptr = static_cast(new_weights_memory->lock()); + mem_lock old_weights_memory_lock{old_weights_memory, stream}; + mem_lock new_weights_memory_lock{new_weights_memory, stream}; + auto old_ptr = old_weights_memory_lock.data(); + auto new_ptr = new_weights_memory_lock.data(); for (int32_t ofi = 0; ofi < wei_layout.size.batch[0]; ++ofi) { int32_t new_ifi = 0; for (auto& range : ranges) { @@ -88,23 +90,21 @@ void shuffle_weights(data_node& node, const std::vector& ranges) } } } - old_weights_memory.unlock(); - new_weights_memory->unlock(); - node.attach_memory(*new_weights_memory, false); + node.attach_memory(new_weights_memory, false); } -void shuffle_features(program_node& node, const std::vector& ranges) { +void shuffle_features(program_node& node, const std::vector& ranges, stream& stream) { if (node.is_type()) { auto& conv = node.as(); - shuffle_weights(conv.weights().as(), ranges); + shuffle_weights(conv.weights().as(), ranges, stream); } else if (node.is_type()) { auto& fc = node.as(); - shuffle_weights(fc.weights().as(), ranges); + shuffle_weights(fc.weights().as(), ranges, stream); } else { // General case for pass-through layers for (auto& user : node.get_users()) { - shuffle_features(*user, ranges); + shuffle_features(*user, ranges, stream); } } } @@ -155,7 +155,7 @@ void concat_input_order::run(program_impl& p) { // Check that we can fuse shuffling to users bool can_shuffle_users = true; for (auto user : concat_node.get_users()) { - can_shuffle_users &= can_shuffle_features(*user); + can_shuffle_users &= can_shuffle_features(*user, p.get_stream()); } if (!along_f || !no_fusing || !correct_format || !single_format || already_aligned || !can_shuffle_users) @@ -207,8 +207,7 @@ void concat_input_order::run(program_impl& p) { mutable_prim->input = new_input_ids; // Correct users for shuffled features for (auto& user : concat_node.get_users()) { - shuffle_features(*user, shuffled_ranges); + shuffle_features(*user, shuffled_ranges, p.get_stream()); } } } - diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp index c30a3348548..b0c28bd93a6 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/tensor.hpp" +#include "cldnn/runtime/tensor.hpp" #include "pass_manager.h" diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp index 137ced81fc2..84496690536 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp @@ -7,7 +7,7 @@ #include "pass_manager.h" #include "border_inst.h" #include "convolution_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include using namespace cldnn; @@ -113,4 +113,4 @@ void handle_input_padding::run(program_impl& p) { } } } -} \ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp index 380b18b951f..ccf9903701a 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/oooq_memory_dependencies.cpp @@ -9,7 +9,7 @@ #include "layout_optimizer.h" #include "program_impl.h" #include "program_helpers.h" -#include "cldnn_itt.h" +#include "runtime/cldnn_itt.hpp" #include #include #include diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp index f98eabd2cf6..b4371112d11 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp @@ -45,9 +45,8 @@ void post_input_reorder::run(program_impl& p) { const auto impl = node->get_selected_impl(); // add a reorder if primitive's input format doesn't match implementation's input format if (node->is_type()) { - const auto& fc_impl = dynamic_cast&>(*impl); - const auto& fc_params = - *static_cast(fc_impl._kernel_data.params.get()); + const auto& fc_impl = dynamic_cast&>(*impl); + const auto& fc_params = *static_cast(fc_impl._kernel_data.params.get()); auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout()); auto& input = node->get_dependencies()[0]; diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp index 271293e4dc4..85ea94cd476 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp @@ -6,7 +6,7 @@ #include "pass_manager.h" #include "program_helpers.h" -#include "api_extension/fused_conv_eltwise.hpp" +#include "cldnn/primitives/fused_conv_eltwise.hpp" #include "include/fused_conv_eltwise_inst.h" #include "include/binary_convolution_inst.h" #include "include/deformable_convolution_inst.h" @@ -36,7 +36,7 @@ post_optimize_weights::weights_bias_offset post_optimize_weights::get_weights_bi template void post_optimize_weights::optimize_weights(T& node, program_impl& p) { auto offsets = get_weights_bias_offset(node); - auto* impl = node.get_selected_impl().get(); + auto impl = node.get_selected_impl(); auto output_layout = node.get_output_layout(); auto& weights_reorder_params = impl->_weights_reorder_params; diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp index 874eedb91ce..ccb00d3726e 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_replace_deconv.cpp @@ -16,10 +16,13 @@ #include #include #include -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" void pre_replace_deconv::run(program_impl& p) { bool update_processing_order = false; + + auto& stream = p.get_stream(); + auto itr = p.nodes_map.begin(); while (itr != p.nodes_map.end()) { auto node_itr = itr++; @@ -242,16 +245,16 @@ void pre_replace_deconv::run(program_impl& p) { auto target_weights_layout = layout{ weights_layout.data_type, weights_layout.format, target_weights_size }; { - memory_impl::ptr data_to_allocate = p.get_engine().allocate_memory(target_weights_layout, 0); + memory::ptr data_to_allocate = p.get_engine().allocate_memory(target_weights_layout); std::vector weights_vec_float; if (weights_data_type == data_types::f16) { - mem_lock src{ cur_weights_node_ptr->as().get_attached_memory() }; + mem_lock src{ cur_weights_node_ptr->as().get_attached_memory_ptr(), stream }; for (uint32_t i = 0; i < weights_layout.size.count(); i++) weights_vec_float.push_back(static_cast(src.data()[i])); } else { - mem_lock src{ cur_weights_node_ptr->as().get_attached_memory() }; + mem_lock src{ cur_weights_node_ptr->as().get_attached_memory_ptr(), stream }; for (uint32_t i = 0; i < weights_layout.size.count(); i++) weights_vec_float.push_back(src.data()[i]); } @@ -266,17 +269,16 @@ void pre_replace_deconv::run(program_impl& p) { subpixel_weights); if (weights_data_type == data_types::f16) { - mem_lock dst{ data_to_allocate }; + mem_lock dst{ data_to_allocate, stream}; program_helpers::set_weights_values(dst.data(), subpixel_weights); } else if (weights_data_type == data_types::f32) { - mem_lock dst{ data_to_allocate }; + mem_lock dst{ data_to_allocate, stream }; program_helpers::set_weights_values(dst.data(), subpixel_weights); } else { throw std::logic_error("Not supported data type."); } - memory api_memory = memory(data_to_allocate.detach()); - auto data_node_weights_replace = std::make_shared(weights_vec[0] + "_conv_rpl", api_memory); + auto data_node_weights_replace = std::make_shared(weights_vec[0] + "_conv_rpl", data_to_allocate); p.get_or_create(data_node_weights_replace); auto data_node_weights_replace_node_ptr = p.nodes_map.find(weights_vec[0] + "_conv_rpl")->second; auto& data_node = data_node_weights_replace_node_ptr->as(); @@ -285,10 +287,10 @@ void pre_replace_deconv::run(program_impl& p) { float bias = 0; if (bias_data_type == data_types::f16) { - mem_lock src{ bias_id_node_ptr->as().get_attached_memory() }; + mem_lock src{ bias_id_node_ptr->as().get_attached_memory_ptr(), stream }; bias = static_cast(src.data()[0]); } else { - mem_lock src{ bias_id_node_ptr->as().get_attached_memory() }; + mem_lock src{ bias_id_node_ptr->as().get_attached_memory_ptr(), stream }; bias = src.data()[0]; } diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp index abd9985cb72..eebd65149ad 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp @@ -4,8 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/eltwise.hpp" -#include "api/pooling.hpp" +#include "pooling_inst.h" #include "fused_conv_eltwise_inst.h" #include "primitive_inst.h" #include "activation_inst.h" diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index 8dc449053ce..ffabb96f2e4 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -4,13 +4,12 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/pooling.hpp" -#include "api/proposal.hpp" -#include "api/roi_pooling.hpp" - #include "program_helpers.h" #include "pass_manager.h" +#include "pooling_inst.h" +#include "proposal_inst.h" +#include "roi_pooling_inst.h" #include "quantize_inst.h" #include "binary_convolution_inst.h" #include "activation_inst.h" @@ -51,7 +50,7 @@ #include #include #include -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" void prepare_primitive_fusing::run(program_impl& p) { fuse_reorders(p); @@ -1334,24 +1333,16 @@ void prepare_conv_eltw_read_write_opt::conv_eltwise_read_write_opt(program_impl& // buffer shared between primitives, if second input is mutable data, then we can reuse this memory auto shared_buffer_mem = second_input_node->is_type() ? second_input_node->as().get_attached_memory_ptr() - : p.get_engine().allocate_memory(node->get_output_layout(), 0); - - float zero = 0.0f; - layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1)); + : p.get_engine().allocate_memory(node->get_output_layout()); // this one is the first one to write data to - auto rw_output_prim0 = std::make_shared(fused_conv_eltw_node->id() + "_RW_OPT_use", - memory::attach(dummy_layout, &zero, 1)); + auto rw_output_prim0 = std::make_shared(fused_conv_eltw_node->id() + "_RW_OPT_use", shared_buffer_mem); // this one already expects data to be inside - auto rw_output_prim1 = std::make_shared(fused_conv_eltw_node->id() + "_RW_OPT_reuse", - memory::attach(dummy_layout, &zero, 1)); + auto rw_output_prim1 = std::make_shared(fused_conv_eltw_node->id() + "_RW_OPT_reuse", shared_buffer_mem); auto& rw_output_node0 = p.get_or_create(rw_output_prim0); auto& rw_output_node1 = p.get_or_create(rw_output_prim1); - rw_output_node0.as().attach_memory(*shared_buffer_mem, false); - rw_output_node1.as().attach_memory(*shared_buffer_mem, false); - // add connection between second input node -> rw_output_node0 -> node p.add_intermediate(rw_output_node0, *node, 1, true); // replace other connections with rw_output_node0 diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp index 134494d2ba7..35d31d3a71d 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_quantization.cpp @@ -4,11 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/quantize.hpp" -#include "api/binary_convolution.hpp" -#include "api/scale.hpp" -#include "api/pooling.hpp" - +#include "pooling_inst.h" #include "quantize_inst.h" #include "binary_convolution_inst.h" #include "scale_inst.h" @@ -16,10 +12,10 @@ #include "data_inst.h" #include "pass_manager.h" #include "program_helpers.h" -#include #include "to_string_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" +#include #include #include #include @@ -46,19 +42,23 @@ void prepare_quantization::prepare_packed_quantize(program_impl& p) { auto &input_low = input_low_node.as(); auto &input_high = input_high_node.as(); - auto &mem_input_low = input_low.get_attached_memory(); - auto &mem_input_high = input_high.get_attached_memory(); + auto mem_input_low = input_low.get_attached_memory_ptr(); + auto mem_input_high = input_high.get_attached_memory_ptr(); auto output_dt = quantize_node.get_output_layout().data_type; + auto& stream = p.get_stream(); + if (levels == 2) { bool is_binarization = true; - switch (mem_input_high.get_layout().data_type) { + switch (mem_input_high->get_layout().data_type) { case data_types::f32: { - auto data_input_low = static_cast(mem_input_low.lock()); - auto data_input_high = static_cast(mem_input_high.lock()); + mem_lock data_input_low_lock{mem_input_low, stream}; + mem_lock data_input_high_lock{mem_input_high, stream}; + auto data_input_low = data_input_low_lock.data(); + auto data_input_high = data_input_high_lock.data(); - for (size_t i = 0; i < mem_input_high.get_layout().count(); i++) { + for (size_t i = 0; i < mem_input_high->get_layout().count(); i++) { if (data_input_high[i] != data_input_low[i]) { is_binarization = false; break; @@ -67,10 +67,12 @@ void prepare_quantization::prepare_packed_quantize(program_impl& p) { break; } case data_types::f16: { - auto data_input_low = static_cast(mem_input_low.lock()); - auto data_input_high = static_cast(mem_input_high.lock()); + mem_lock data_input_low_lock{mem_input_low, stream}; + mem_lock data_input_high_lock{mem_input_high, stream}; + auto data_input_low = data_input_low_lock.data(); + auto data_input_high = data_input_high_lock.data(); - for (size_t i = 0; i < mem_input_high.get_layout().count(); i++) { + for (size_t i = 0; i < mem_input_high->get_layout().count(); i++) { if (data_input_high[i] != data_input_low[i]) { is_binarization = false; break; @@ -81,8 +83,6 @@ void prepare_quantization::prepare_packed_quantize(program_impl& p) { default: CLDNN_ERROR_MESSAGE(node->id(), "prepare_quantization: Unsupported precision of quantize inputs"); } - mem_input_low.unlock(); - mem_input_high.unlock(); if (is_binarization) { output_dt = data_types::bin; @@ -106,6 +106,8 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { if (levels == 2 || levels > 256 || quantize_node.get_scale_shift_opt() || quantize_node.is_constant()) return; + auto& stream = p.get_stream(); + program_node &input_low_node = quantize_node.get_dependency(1); program_node &input_high_node = quantize_node.get_dependency(2); program_node &output_low_node = quantize_node.get_dependency(3); @@ -121,20 +123,20 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { auto &output_low = output_low_node.as(); auto &output_high = output_high_node.as(); - auto &mem_input_low = input_low.get_attached_memory(); - auto &mem_input_high = input_high.get_attached_memory(); - auto &mem_output_low = output_low.get_attached_memory(); - auto &mem_output_high = output_high.get_attached_memory(); + auto mem_input_low = input_low.get_attached_memory_ptr(); + auto mem_input_high = input_high.get_attached_memory_ptr(); + auto mem_output_low = output_low.get_attached_memory_ptr(); + auto mem_output_high = output_high.get_attached_memory_ptr(); - auto scales_layout = mem_input_low.get_layout(); - scales_layout.size = tensor::max(scales_layout.size, mem_input_high.get_layout().size); - scales_layout.size = tensor::max(scales_layout.size, mem_output_low.get_layout().size); - scales_layout.size = tensor::max(scales_layout.size, mem_output_high.get_layout().size); + auto scales_layout = mem_input_low->get_layout(); + scales_layout.size = tensor::max(scales_layout.size, mem_input_high->get_layout().size); + scales_layout.size = tensor::max(scales_layout.size, mem_output_low->get_layout().size); + scales_layout.size = tensor::max(scales_layout.size, mem_output_high->get_layout().size); - auto mem_input_scale = p.get_engine().allocate_memory(scales_layout, mem_input_low.get_net_id(), false); - auto mem_input_shift = p.get_engine().allocate_memory(scales_layout, mem_input_high.get_net_id(), false); - auto mem_output_scale = p.get_engine().allocate_memory(scales_layout, mem_output_low.get_net_id(), false); - auto mem_output_shift = p.get_engine().allocate_memory(scales_layout, mem_output_high.get_net_id(), false); + auto mem_input_scale = p.get_engine().allocate_memory(scales_layout, false); + auto mem_input_shift = p.get_engine().allocate_memory(scales_layout, false); + auto mem_output_scale = p.get_engine().allocate_memory(scales_layout, false); + auto mem_output_shift = p.get_engine().allocate_memory(scales_layout, false); auto get_offset_safe = [](layout l, tensor idx) -> int { auto sizes = l.size; @@ -164,17 +166,26 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { float out_shift_val = 0.0f; float in_lo_val = 0.0f; float in_hi_val = 0.0f; - switch (mem_output_high.get_layout().data_type) { + switch (mem_output_high->get_layout().data_type) { case data_types::f32: { // TODO [LOW PRECISION]: Output low/high values can be removed. - auto data_input_low = static_cast(mem_input_low.lock()); - auto data_input_high = static_cast(mem_input_high.lock()); - auto data_output_low = static_cast(mem_output_low.lock()); - auto data_output_high = static_cast(mem_output_high.lock()); - auto data_input_scale = static_cast(mem_input_scale->lock()); - auto data_input_shift = static_cast(mem_input_shift->lock()); - auto data_output_scale = static_cast(mem_output_scale->lock()); - auto data_output_shift = static_cast(mem_output_shift->lock()); + mem_lock data_input_low_lock{mem_input_low, stream}; + mem_lock data_input_high_lock{mem_input_high, stream}; + mem_lock data_output_low_lock{mem_output_low, stream}; + mem_lock data_output_high_lock{mem_output_high, stream}; + mem_lock data_input_scale_lock{mem_input_scale, stream}; + mem_lock data_input_shift_lock{mem_input_shift, stream}; + mem_lock data_output_scale_lock{mem_output_scale, stream}; + mem_lock data_output_shift_lock{mem_output_shift, stream}; + + auto data_input_low = data_input_low_lock.data(); + auto data_input_high = data_input_high_lock.data(); + auto data_output_low = data_output_low_lock.data(); + auto data_output_high = data_output_high_lock.data(); + auto data_input_scale = data_input_scale_lock.data(); + auto data_input_shift = data_input_shift_lock.data(); + auto data_output_scale = data_output_scale_lock.data(); + auto data_output_shift = data_output_shift_lock.data(); for (int b = 0; b < scales_layout.size.batch[0]; b++) { for (int f = 0; f < scales_layout.size.feature[0]; f++) { @@ -182,11 +193,11 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { for (int x = 0; x < scales_layout.size.spatial[0]; x++) { auto idx = cldnn::tensor(format::bfyx, {b, f, y, x}, 0); auto s_offset = scales_layout.get_linear_offset(idx); - auto in_lo = data_input_low[get_offset_safe(mem_input_low.get_layout(), idx)]; - auto in_hi = data_input_high[get_offset_safe(mem_input_high.get_layout(), idx)]; + auto in_lo = data_input_low[get_offset_safe(mem_input_low->get_layout(), idx)]; + auto in_hi = data_input_high[get_offset_safe(mem_input_high->get_layout(), idx)]; - auto out_lo = data_output_low[get_offset_safe(mem_output_low.get_layout(), idx)]; - auto out_hi = data_output_high[get_offset_safe(mem_output_high.get_layout(), idx)]; + auto out_lo = data_output_low[get_offset_safe(mem_output_low->get_layout(), idx)]; + auto out_hi = data_output_high[get_offset_safe(mem_output_high->get_layout(), idx)]; data_input_scale[s_offset] = (static_cast(levels) - 1) / (in_hi - in_lo); data_input_shift[s_offset] = - in_lo * (static_cast(levels) - 1) / (in_hi - in_lo); data_output_scale[s_offset] = (out_hi - out_lo) / (static_cast(levels) - 1); @@ -225,21 +236,30 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { if (data_input_shift[i] != 0.0f) need_pre_shift = true; - if (in_lo_val != data_input_low[i % mem_input_low.get_layout().count()] || - in_hi_val != data_input_high[i % mem_input_high.get_layout().count()]) + if (in_lo_val != data_input_low[i % mem_input_low->get_layout().count()] || + in_hi_val != data_input_high[i % mem_input_high->get_layout().count()]) per_tensor_in_range = false; } break; } case data_types::f16: { - auto data_input_low = static_cast(mem_input_low.lock()); - auto data_input_high = static_cast(mem_input_high.lock()); - auto data_output_low = static_cast(mem_output_low.lock()); - auto data_output_high = static_cast(mem_output_high.lock()); - auto data_input_scale = static_cast(mem_input_scale->lock()); - auto data_input_shift = static_cast(mem_input_shift->lock()); - auto data_output_scale = static_cast(mem_output_scale->lock()); - auto data_output_shift = static_cast(mem_output_shift->lock()); + mem_lock data_input_low_lock{mem_input_low, stream}; + mem_lock data_input_high_lock{mem_input_high, stream}; + mem_lock data_output_low_lock{mem_output_low, stream}; + mem_lock data_output_high_lock{mem_output_high, stream}; + mem_lock data_input_scale_lock{mem_input_scale, stream}; + mem_lock data_input_shift_lock{mem_input_shift, stream}; + mem_lock data_output_scale_lock{mem_output_scale, stream}; + mem_lock data_output_shift_lock{mem_output_shift, stream}; + + auto data_input_low = data_input_low_lock.data(); + auto data_input_high = data_input_high_lock.data(); + auto data_output_low = data_output_low_lock.data(); + auto data_output_high = data_output_high_lock.data(); + auto data_input_scale = data_input_scale_lock.data(); + auto data_input_shift = data_input_shift_lock.data(); + auto data_output_scale = data_output_scale_lock.data(); + auto data_output_shift = data_output_shift_lock.data(); for (int b = 0; b < scales_layout.size.batch[0]; b++) { for (int f = 0; f < scales_layout.size.feature[0]; f++) { @@ -247,11 +267,11 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { for (int x = 0; x < scales_layout.size.spatial[0]; x++) { auto idx = cldnn::tensor(format::bfyx, {b, f, y, x}, 0); auto s_offset = scales_layout.get_linear_offset(idx); - auto in_lo = half_to_float(data_input_low[get_offset_safe(mem_input_low.get_layout(), idx)]); - auto in_hi = half_to_float(data_input_high[get_offset_safe(mem_input_high.get_layout(), idx)]); + auto in_lo = half_to_float(data_input_low[get_offset_safe(mem_input_low->get_layout(), idx)]); + auto in_hi = half_to_float(data_input_high[get_offset_safe(mem_input_high->get_layout(), idx)]); - auto out_lo = half_to_float(data_output_low[get_offset_safe(mem_output_low.get_layout(), idx)]); - auto out_hi = half_to_float(data_output_high[get_offset_safe(mem_output_high.get_layout(), idx)]); + auto out_lo = half_to_float(data_output_low[get_offset_safe(mem_output_low->get_layout(), idx)]); + auto out_hi = half_to_float(data_output_high[get_offset_safe(mem_output_high->get_layout(), idx)]); data_input_scale[s_offset] = float_to_half((static_cast(levels) - 1) / (in_hi - in_lo)); data_input_shift[s_offset] = float_to_half(- in_lo * (static_cast(levels) - 1) / (in_hi - in_lo)); data_output_scale[s_offset] = float_to_half((out_hi - out_lo) / (static_cast(levels) - 1)); @@ -289,8 +309,8 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { if (half_to_float(data_input_shift[i]) != 0.0f) need_pre_shift = true; - if (in_lo_val != half_to_float(data_input_low[i % mem_input_low.get_layout().count()]) || - in_hi_val != half_to_float(data_input_high[i % mem_input_high.get_layout().count()])) + if (in_lo_val != half_to_float(data_input_low[i % mem_input_low->get_layout().count()]) || + in_hi_val != half_to_float(data_input_high[i % mem_input_high->get_layout().count()])) per_tensor_in_range = false; } break; @@ -303,22 +323,15 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { return; } - layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1)); - float zero = 0.f; - auto in_scale_prim = std::make_shared(quantize_node.id() + "_in_scale", memory::attach(dummy_layout, &zero, 1)); - auto in_shift_prim = std::make_shared(quantize_node.id() + "_in_shift", memory::attach(dummy_layout, &zero, 1)); - auto out_scale_prim = std::make_shared(quantize_node.id() + "_output_scale", memory::attach(dummy_layout, &zero, 1)); - auto out_shift_prim = std::make_shared(quantize_node.id() + "_output_shift", memory::attach(dummy_layout, &zero, 1)); + auto in_scale_prim = std::make_shared(quantize_node.id() + "_in_scale", mem_input_scale); + auto in_shift_prim = std::make_shared(quantize_node.id() + "_in_shift", mem_input_shift); + auto out_scale_prim = std::make_shared(quantize_node.id() + "_output_scale", mem_output_scale); + auto out_shift_prim = std::make_shared(quantize_node.id() + "_output_shift", mem_output_shift); auto& in_scale_node = p.get_or_create(in_scale_prim); auto& in_shift_node = p.get_or_create(in_shift_prim); auto& out_scale_node = p.get_or_create(out_scale_prim); auto& out_shift_node = p.get_or_create(out_shift_prim); - in_scale_node.as().attach_memory(*mem_input_scale); - in_shift_node.as().attach_memory(*mem_input_shift); - out_scale_node.as().attach_memory(*mem_output_scale); - out_shift_node.as().attach_memory(*mem_output_shift); - auto& inputs = p.get_inputs(); inputs.push_back(&in_scale_node); @@ -381,15 +394,6 @@ void prepare_quantization::prepare_scale_shift_opt(program_impl &p) { quantize_node.set_per_tensor_output_shift(); quantize_node.set_output_shift_val(out_shift_val); } - - mem_input_low.unlock(); - mem_input_high.unlock(); - mem_output_low.unlock(); - mem_output_high.unlock(); - mem_input_scale->unlock(); - mem_input_shift->unlock(); - mem_output_scale->unlock(); - mem_output_shift->unlock(); }); } } @@ -453,6 +457,8 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) { auto node_itr = itr++; auto& node = (*node_itr); + auto& stream = p.get_stream(); + // Detects if given eltwise node performs zero point subtraction auto is_zero_point_node = [](eltwise_node& node) -> bool { auto prim = node.get_primitive(); @@ -480,8 +486,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) { return true; }; - auto fill_compensation = [&](int groups, memory_impl* w, memory_impl* azp, memory_impl* wzp, - memory_impl::ptr compensation) { + auto fill_compensation = [&](int groups, const memory::ptr w, const memory::ptr azp, const memory::ptr wzp, memory::ptr compensation) { auto wl = w->get_layout(); int GS = groups; @@ -492,40 +497,40 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) { auto w_dt = wl.data_type; auto azp_dt = azp->get_layout().data_type; - mem_lock comp_lock{compensation}; + mem_lock comp_lock{compensation, stream}; if (w_dt == data_types::u8 && azp_dt == data_types::u8) { - mem_lock w_lock(*w); - mem_lock azp_lock(*azp); + mem_lock w_lock(w, stream); + mem_lock azp_lock(azp, stream); if (wzp) { - mem_lock wzp_lock(*wzp); + mem_lock wzp_lock(wzp, stream); fill_compensation_typed(w_lock.data(), azp_lock.data(), wzp_lock.data(), comp_lock.data(), GS, OC, IC, KS); } else { fill_compensation_typed(w_lock.data(), azp_lock.data(), static_cast(nullptr), comp_lock.data(), GS, OC, IC, KS); } } else if (w_dt == data_types::i8 && azp_dt == data_types::u8) { - mem_lock w_lock(*w); - mem_lock azp_lock(*azp); + mem_lock w_lock(w, stream); + mem_lock azp_lock(azp, stream); if (wzp) { - mem_lock wzp_lock(*wzp); + mem_lock wzp_lock(wzp, stream); fill_compensation_typed(w_lock.data(), azp_lock.data(), wzp_lock.data(), comp_lock.data(), GS, OC, IC, KS); } else { fill_compensation_typed(w_lock.data(), azp_lock.data(), static_cast(nullptr), comp_lock.data(), GS, OC, IC, KS); } } else if (w_dt == data_types::i8 && azp_dt == data_types::i8) { - mem_lock w_lock(*w); - mem_lock azp_lock(*azp); + mem_lock w_lock(w, stream); + mem_lock azp_lock(azp, stream); if (wzp) { - mem_lock wzp_lock(*wzp); + mem_lock wzp_lock(wzp, stream); fill_compensation_typed(w_lock.data(), azp_lock.data(), wzp_lock.data(), comp_lock.data(), GS, OC, IC, KS); } else { fill_compensation_typed(w_lock.data(), azp_lock.data(), static_cast(nullptr), comp_lock.data(), GS, OC, IC, KS); } } else if (w_dt == data_types::u8 && azp_dt == data_types::i8) { - mem_lock w_lock(*w); - mem_lock azp_lock(*azp); + mem_lock w_lock(w, stream); + mem_lock azp_lock(azp, stream); if (wzp) { - mem_lock wzp_lock(*wzp); + mem_lock wzp_lock(wzp, stream); fill_compensation_typed(w_lock.data(), azp_lock.data(), wzp_lock.data(), comp_lock.data(), GS, OC, IC, KS); } else { fill_compensation_typed(w_lock.data(), azp_lock.data(), static_cast(nullptr), comp_lock.data(), GS, OC, IC, KS); @@ -590,13 +595,14 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) { auto l = layout{new_a_zp->get_output_layout().data_type, format::bfyx, tensor{1, ifm_aligned, 1, 1}}; int s = new_a_zp->get_output_layout().size.feature[0]; - auto azp_aligned = p.get_engine().allocate_memory(l, 0, false); - mem_lock new_data{azp_aligned}; - mem_lock old_data{new_a_zp->as().get_attached_memory()}; + auto azp_aligned = p.get_engine().allocate_memory(l); + auto old_ptr = new_a_zp->as().get_attached_memory_ptr(); + mem_lock new_data{azp_aligned, stream}; + mem_lock old_data{old_ptr, stream}; for (int i = 0; i < ifm_aligned; i++) { new_data.data()[i] = old_data.data()[i % s]; } - new_a_zp->as().attach_memory(*azp_aligned); + new_a_zp->as().attach_memory(azp_aligned); input = new_input->id(); a_zero_points.push_back(new_a_zp->id()); @@ -609,13 +615,14 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) { auto l = layout{new_w_zp->get_output_layout().data_type, format::bfyx, tensor{ofm_aligned, 1, 1, 1}}; int s = new_w_zp->get_output_layout().size.batch[0]; - auto wzp_aligned = p.get_engine().allocate_memory(l, 0, false); - mem_lock new_data{wzp_aligned}; - mem_lock old_data{new_w_zp->as().get_attached_memory()}; + auto wzp_aligned = p.get_engine().allocate_memory(l); + auto old_ptr = new_w_zp->as().get_attached_memory_ptr(); + mem_lock new_data{wzp_aligned, stream}; + mem_lock old_data{old_ptr, stream}; for (int i = 0; i < ofm_aligned; i++) { new_data.data()[i] = old_data.data()[i % s]; } - new_w_zp->as().attach_memory(*wzp_aligned); + new_w_zp->as().attach_memory(wzp_aligned); weights = { new_weights->id() }; w_zero_points.push_back(new_w_zp->id()); @@ -623,19 +630,16 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) { if (need_compensation) { auto l = layout{data_types::f32, format::bfyx, tensor{1, ofm_aligned, 1, 1}}; - auto data_to_allocate = p.get_engine().allocate_memory(l, 0, false); - auto w = &new_weights->as().get_attached_memory(); - auto azp = asymmetric_data ? &new_a_zp->as().get_attached_memory() : nullptr; - auto wzp = asymmetric_weights ? &new_w_zp->as().get_attached_memory() : nullptr; + auto data_to_allocate = p.get_engine().allocate_memory(l); + auto w = new_weights->as().get_attached_memory_ptr(); + auto azp = asymmetric_data ? new_a_zp->as().get_attached_memory_ptr() : nullptr; + auto wzp = asymmetric_weights ? new_w_zp->as().get_attached_memory_ptr() : nullptr; fill_compensation(groups, w, azp, wzp, data_to_allocate); - layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1)); - float zero = 0.f; - auto compensation_prim = std::make_shared(convolution_node.id() + "_compensation", memory::attach(dummy_layout, &zero, 1)); + auto compensation_prim = std::make_shared(convolution_node.id() + "_compensation", data_to_allocate); new_compenstation = &p.get_or_create(compensation_prim); p.get_inputs().push_back(new_compenstation); compensation.push_back(new_compenstation->id()); - new_compenstation->as().attach_memory(*data_to_allocate); } // Collect dependencies of a new convolution node @@ -745,11 +749,11 @@ void prepare_quantization::prepare_dequantize_merge(program_impl &p) { } } - auto get_scale_shift_mem = [](const eltwise_node& eltw, size_t dep_id) -> memory_impl& { + auto get_scale_shift_mem = [](const eltwise_node& eltw, size_t dep_id) -> memory::ptr { if (dep_id >= eltw.get_dependencies().size()) CLDNN_ERROR_MESSAGE(eltw.id(), "Invalid dependency id in dequantize optimization"); - return eltw.get_dependency(dep_id).as().get_attached_memory(); + return eltw.get_dependency(dep_id).as().get_attached_memory_ptr(); }; auto eltw_mode = node.get_primitive()->mode; @@ -757,6 +761,7 @@ void prepare_quantization::prepare_dequantize_merge(program_impl &p) { return; auto& input = node.input(); + auto& stream = p.get_stream(); for (auto& user : input.get_users()) { if (user == &node) @@ -781,20 +786,20 @@ void prepare_quantization::prepare_dequantize_merge(program_impl &p) { bool same_params = true; for (size_t i = 1; i < node.get_dependencies().size(); i++) { - auto& mem0 = get_scale_shift_mem(eltwise_dep, i); - auto& mem1 = get_scale_shift_mem(node, i); + auto mem0 = get_scale_shift_mem(eltwise_dep, i); + auto mem1 = get_scale_shift_mem(node, i); - auto ptr0 = static_cast(mem0.lock()); - auto ptr1 = static_cast(mem1.lock()); + mem_lock mem0_lock{mem0, stream}; + mem_lock mem1_lock{mem1, stream}; + auto ptr0 = mem0_lock.data(); + auto ptr1 = mem1_lock.data(); - for (size_t j = 0; j < mem0.get_layout().bytes_count(); j++) { + for (size_t j = 0; j < mem0->get_layout().bytes_count(); j++) { if (ptr0[j] != ptr1[j]) { same_params = false; break; } } - mem0.unlock(); - mem1.unlock(); } if (same_params) { diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp index 0229027776c..83e2742063f 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp @@ -6,11 +6,11 @@ #include "pass_manager.h" #include "program_node.h" -#include "engine_impl.h" +#include "cldnn/runtime/engine.hpp" #include "program_impl.h" #include "network_impl.h" #include "data_inst.h" -#include "cldnn_itt.h" +#include "runtime/cldnn_itt.hpp" #include #include #include @@ -71,10 +71,8 @@ void propagate_constants::run(program_impl& p) { auto& id_to_replace = cout.first; auto mem_impl = cout.second; - memory api_memory = memory(mem_impl.detach()); - auto const_data = - std::make_shared("_cldnn_const_prop_" + id_to_replace, api_memory /* <<< REMOVE ME WHEN POSSIBLE */); + std::make_shared("_cldnn_const_prop_" + id_to_replace, mem_impl /* <<< REMOVE ME WHEN POSSIBLE */); auto& new_node = p.get_or_create(const_data); auto& curr_node = p.get_node(id_to_replace); @@ -109,21 +107,22 @@ bool propagate_constants::has_non_const_user(program_node& node) const { return false; } -std::list> propagate_constants::calculate(engine_impl& engine, build_options bo) { +std::list> propagate_constants::calculate(engine& engine, build_options bo) { if (!has_non_trivial_constants) return {}; bo.set_option(build_option::optimize_data(false)); bo.set_option(build_option::outputs(const_outputs)); - network_impl::ptr net = engine.build_network(nodes, bo, true); - for (auto& cin : const_inputs) net->set_input_data(cin->id(), cin->get_attached_memory()); + network_impl::ptr net = network_impl::build_network(engine, nodes, bo, true); + for (auto& cin : const_inputs) + net->set_input_data(cin->id(), cin->get_attached_memory_ptr()); net->execute({}); net->reset_execution(true); // wait for computations to complete auto outputs = net->get_outputs(); - std::list> ret; - for (auto& out : outputs) ret.push_back({out->id(), (memory_impl::ptr) &out->output_memory()}); + std::list> ret; + for (auto& out : outputs) ret.push_back({out->id(), out->output_memory_ptr()}); return ret; } diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp index 689157262cc..15257fe0e95 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp @@ -4,13 +4,14 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/binary_convolution.hpp" #include "pass_manager.h" #include "program_node.h" #include "layout_optimizer.h" #include "program_impl.h" #include "program_helpers.h" +#include "binary_convolution_inst.h" #include "mvn_inst.h" + #include #include #include diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp index bb9578142b2..a3ac6478673 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/skipped_branch_memory_dependencies.cpp @@ -9,7 +9,7 @@ #include "layout_optimizer.h" #include "program_impl.h" #include "program_helpers.h" -#include "cldnn_itt.h" +#include "runtime/cldnn_itt.hpp" #include #include #include diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp index 50ddea1771a..3546a7d427c 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/strided_slice_optimize.cpp @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "cldnn/runtime/error_handler.hpp" #include "pass_manager.h" #include "program_helpers.h" #include "strided_slice_inst.h" diff --git a/inference-engine/thirdparty/clDNN/src/include/activation_inst.h b/inference-engine/thirdparty/clDNN/src/include/activation_inst.h index f5c66dac45d..989b2b55724 100644 --- a/inference-engine/thirdparty/clDNN/src/include/activation_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/activation_inst.h @@ -4,9 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/activation.hpp" +#include "cldnn/primitives/activation.hpp" #include "primitive_inst.h" #include "kernel_selector/core/actual_kernels/activation/activation_kernel_base.h" + #include #include @@ -49,7 +50,7 @@ public: public: typed_primitive_inst(network_impl& network, activation_node const& node); - memory_impl& slope_memory() const { return dep_memory(1); } + memory::ptr slope_memory() const { return dep_memory_ptr(1); } bool is_parameterized() const { return !argument.additional_params_input.empty(); } }; diff --git a/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h b/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h index 48a899dbc1c..605a6bb7e95 100644 --- a/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/arg_max_min_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/arg_max_min.hpp" +#include "cldnn/primitives/arg_max_min.hpp" #include "primitive_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h index e20df8e7597..0eef5fe75e2 100644 --- a/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/average_unpooling_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/average_unpooling.hpp" +#include "cldnn/primitives/average_unpooling.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h b/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h index 930db1ae93f..1b0feedd2f3 100644 --- a/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/batch_to_space_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/batch_to_space.hpp" +#include "cldnn/primitives/batch_to_space.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h index f4ce140d4f8..ef7f7afb2a8 100644 --- a/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/binary_convolution_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/binary_convolution.hpp" +#include "cldnn/primitives/binary_convolution.hpp" #include "primitive_inst.h" #include @@ -62,11 +62,11 @@ public: public: typed_primitive_inst(network_impl& network, binary_convolution_node const& node); - memory_impl& weights_memory(size_t index) const { + memory::ptr weights_memory(size_t index) const { if (static_cast(index) >= node.get_split()) throw std::range_error("weights offset too big"); - return dep_memory(1 + index); + return dep_memory_ptr(1 + index); } }; diff --git a/inference-engine/thirdparty/clDNN/src/include/border_inst.h b/inference-engine/thirdparty/clDNN/src/include/border_inst.h index d724b426891..ed181ed0b16 100644 --- a/inference-engine/thirdparty/clDNN/src/include/border_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/border_inst.h @@ -4,10 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once - -#include - +#include "cldnn/primitives/border.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h b/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h index 9b8471d44db..75be67373c7 100644 --- a/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include +#include "cldnn/primitives/broadcast.hpp" #include "primitive_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h b/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h index ffdb51a63de..82d3d39676f 100644 --- a/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/concatenation.hpp" +#include "cldnn/primitives/concatenation.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/condition_inst.h b/inference-engine/thirdparty/clDNN/src/include/condition_inst.h index 8a8859b55b6..4bff69b6e49 100644 --- a/inference-engine/thirdparty/clDNN/src/include/condition_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/condition_inst.h @@ -5,10 +5,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include - +#include #include "network_impl.h" #include "primitive_inst.h" + #include #include @@ -26,9 +26,10 @@ private: void set(const program_node& node) { add_or_change_input_layout(node); - _program = node.get_program().get_engine().build_program(_topology, - node.get_program().get_options(), - true); // rebuild program + _program = program_impl::build_program(node.get_program().get_engine(), + _topology, + node.get_program().get_options(), + true); // rebuild program } program_impl::ptr get() const { return _program; } @@ -89,8 +90,10 @@ public: static std::string to_string(condition_node const& node); typed_primitive_inst(network_impl& network, condition_node const& node); - memory_impl& input_memory() const { return dep_memory(0); } - memory_impl& compare_memory() const { return dep_memory(1); } + memory::ptr input_memory_ptr() const { return dep_memory_ptr(0); } + memory::ptr compare_memory_ptr() const { return dep_memory_ptr(1); } + memory& input_memory() const { return dep_memory(0); } + memory& compare_memory() const { return dep_memory(1); } network_impl::ptr get_net_true() const { return _net_true; } network_impl::ptr get_net_false() const { return _net_false; } primitive_id result_id() const { return node.result_id(); } diff --git a/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h index b90d667134c..899ad44dc9b 100644 --- a/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/convolution.hpp" +#include "cldnn/primitives/convolution.hpp" #include "primitive_inst.h" #include @@ -120,48 +120,48 @@ public: public: typed_primitive_inst(network_impl& network, convolution_node const& node); - memory_impl& weights_memory(size_t index) const { + memory::ptr weights_memory(size_t index) const { if (node.get_groups() == 1) { if (static_cast(index) >= node.get_split()) throw std::range_error("weights offset too big"); - return dep_memory(1 + index + node.get_trans_dep_offset()); + return dep_memory_ptr(1 + index + node.get_trans_dep_offset()); } else { // all weights are in one buffer - return dep_memory(1 + node.get_trans_dep_offset()); + return dep_memory_ptr(1 + node.get_trans_dep_offset()); } } - memory_impl& bias_memory(size_t index) const { + memory::ptr bias_memory(size_t index) const { if (node.get_groups() == 1) { if (static_cast(index) >= node.get_split()) throw std::range_error("bias offset too big"); - return dep_memory(1 + node.get_split() + index + node.get_trans_dep_offset()); + return dep_memory_ptr(1 + node.get_split() + index + node.get_trans_dep_offset()); } else { // all bias are in one buffer - return dep_memory(2 + node.get_trans_dep_offset()); + return dep_memory_ptr(2 + node.get_trans_dep_offset()); } } - memory_impl& weights_zero_points_memory(size_t) const { + memory::ptr weights_zero_points_memory(size_t) const { if (node.get_split() > 1) throw std::range_error("Split is unsupported for quantized convolutions"); - return dep_memory(2 + 1 * bias_term() + node.get_trans_dep_offset()); + return dep_memory_ptr(2 + 1 * bias_term() + node.get_trans_dep_offset()); } - memory_impl& trans_memory() const { + memory::ptr trans_memory() const { if (!node.get_trans_dep_offset()) throw std::range_error("trans input exists only in deformable mode"); - return dep_memory(1); + return dep_memory_ptr(1); } - memory_impl& activations_zero_points_memory(size_t) const { + memory::ptr activations_zero_points_memory(size_t) const { if (node.get_split() > 1) throw std::range_error("Split is unsupported for quantized convolutions"); - return dep_memory(2 + 1 * bias_term() + 1 * weights_zero_points_term() + node.get_trans_dep_offset()); + return dep_memory_ptr(2 + 1 * bias_term() + 1 * weights_zero_points_term() + node.get_trans_dep_offset()); } - memory_impl& compensation_memory(size_t) const { + memory::ptr compensation_memory(size_t) const { if (node.get_split() > 1) throw std::range_error("Split is unsupported for quantized convolutions"); - return dep_memory(2 + 1 * bias_term() + 1 * weights_zero_points_term() + 1*activations_zero_points_term() + node.get_trans_dep_offset()); + return dep_memory_ptr(2 + 1 * bias_term() + 1 * weights_zero_points_term() + 1*activations_zero_points_term() + node.get_trans_dep_offset()); } bool bias_term() const { return node.bias_term(); } diff --git a/inference-engine/thirdparty/clDNN/src/include/crop_inst.h b/inference-engine/thirdparty/clDNN/src/include/crop_inst.h index 4ae7a095f0b..e8842e5bed9 100644 --- a/inference-engine/thirdparty/clDNN/src/include/crop_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/crop_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/crop.hpp" +#include "cldnn/primitives/crop.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h b/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h index 7e856ff2e0c..ea5d2bcda58 100644 --- a/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/ctc_greedy_decoder_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/ctc_greedy_decoder.hpp" +#include "cldnn/primitives/ctc_greedy_decoder.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h b/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h index 452076299e8..fb9cecf7fc5 100644 --- a/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/cum_sum_inst.h @@ -4,8 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/cum_sum.hpp" - +#include "cldnn/primitives/cum_sum.hpp" #include "primitive_inst.h" namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h index 125dbcfc2b6..20c40ecdf74 100644 --- a/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/custom_gpu_primitive.hpp" +#include "cldnn/primitives/custom_gpu_primitive.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/data_inst.h b/inference-engine/thirdparty/clDNN/src/include/data_inst.h index 311257992c9..e1cafad9c72 100644 --- a/inference-engine/thirdparty/clDNN/src/include/data_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/data_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/data.hpp" +#include "cldnn/primitives/data.hpp" #include "primitive_inst.h" + #include #include @@ -17,11 +18,12 @@ struct typed_program_node : public typed_program_node_base { typed_program_node(const std::shared_ptr prim, program_impl& prog); - memory_impl& get_attached_memory() const { return *mem; } - void attach_memory(memory_impl& new_mem, bool invalidate_users_if_changed = true); + memory& get_attached_memory() const { return *mem; } + memory::ptr get_attached_memory_ptr() const { return mem; } + void attach_memory(memory::ptr new_mem, bool invalidate_users_if_changed = true); private: - memory_impl::ptr mem; + memory::ptr mem; }; using data_node = typed_program_node; diff --git a/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h index c96d179aa98..f8d93300244 100644 --- a/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h @@ -4,8 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/deconvolution.hpp" + +#include "cldnn/primitives/deconvolution.hpp" #include "primitive_inst.h" + #include #include @@ -90,25 +92,25 @@ public: public: typed_primitive_inst(network_impl& network, deconvolution_node const& node); - memory_impl& weights_memory(size_t index) const { + memory::ptr weights_memory(size_t index) const { if (node.get_groups() == 1) { if (static_cast(index) >= node.get_split()) throw std::range_error("weights offset too big"); - return dep_memory(1 + index); + return dep_memory_ptr(1 + index); } else { // all weights are in one buffer - return dep_memory(1); + return dep_memory_ptr(1); } } - memory_impl& bias_memory(size_t index) const { + memory::ptr bias_memory(size_t index) const { if (node.get_groups() == 1) { if (argument.bias.size() == 0 && static_cast(index) >= node.get_split()) throw std::range_error("no bias data"); if (static_cast(index) > node.get_split()) throw std::range_error("bias offset too big"); - return dep_memory(1 + node.get_split() + index); + return dep_memory_ptr(1 + node.get_split() + index); } else { // all bias are in one buffer - return dep_memory(2); + return dep_memory_ptr(2); } } diff --git a/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h index 63c4556b65f..c7889f78600 100644 --- a/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/deformable_convolution_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/convolution.hpp" +#include "cldnn/primitives/convolution.hpp" #include "primitive_inst.h" #include @@ -75,23 +75,23 @@ public: public: typed_primitive_inst(network_impl& network, deformable_conv_node const& node); - memory_impl& weights_memory(size_t index) const { + memory::ptr weights_memory(size_t index) const { if (node.get_groups() == 1) { if (static_cast(index) >= node.get_split()) throw std::range_error("weights offset too big"); - return dep_memory(1 + index); + return dep_memory_ptr(1 + index); } else { // all weights are in one buffer - return dep_memory(1); + return dep_memory_ptr(1); } } - memory_impl& bias_memory(size_t index) const { + memory::ptr bias_memory(size_t index) const { if (node.get_groups() == 1) { if (static_cast(index) >= node.get_split()) throw std::range_error("bias offset too big"); - return dep_memory(1 + node.get_split()); + return dep_memory_ptr(1 + node.get_split()); } else { // all bias are in one buffer - return dep_memory(2); + return dep_memory_ptr(2); } } @@ -154,7 +154,7 @@ public: public: typed_primitive_inst(network_impl& network, deformable_interp_node const& node); - memory_impl& trans_memory() const { return dep_memory(1); } + memory& trans_memory() const { return dep_memory(1); } }; using deformable_interp_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h index 0d85e72663b..f551ee7ae34 100644 --- a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h @@ -4,9 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/depth_to_space.hpp" +#include "cldnn/primitives/depth_to_space.hpp" #include "primitive_inst.h" #include "kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_base.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h index 398767ed581..6ce0a82eb4b 100644 --- a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h @@ -4,9 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/detection_output.hpp" +#include "cldnn/primitives/detection_output.hpp" #include "primitive_inst.h" #include "topology_impl.h" + #include #define PRIOR_BOX_SIZE 4 // Each prior-box consists of [xmin, ymin, xmax, ymax]. @@ -41,9 +42,9 @@ public: public: typed_primitive_inst(network_impl& network, detection_output_node const& node); - memory_impl& location_memory() const { return dep_memory(0); } - memory_impl& confidence_memory() const { return dep_memory(1); } - memory_impl& prior_box_memory() const { return dep_memory(2); } + memory::ptr location_memory() const { return dep_memory_ptr(0); } + memory::ptr confidence_memory() const { return dep_memory_ptr(1); } + memory::ptr prior_box_memory() const { return dep_memory_ptr(2); } }; using detection_output_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/device_impl.h b/inference-engine/thirdparty/clDNN/src/include/device_impl.h deleted file mode 100644 index 9c692ae49bc..00000000000 --- a/inference-engine/thirdparty/clDNN/src/include/device_impl.h +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include -#include "gpu/device_info.h" -#include "api/device.hpp" -#include "refcounted_obj.h" -#include "gpu/configuration.h" - -#include -#include -#include -#include - -namespace cldnn { -enum class allocation_type { - unknown, // Not specified (i.e simple_attached_memory class). - cl_mem, // Use standard OpenCL cl_mem allocations. - usm_host, // Accessible only by host. Not Migratable - usm_shared, // Accessible by host and device. Migrtable. - usm_device, // Accessible only by device. Not migratable. -}; - -struct device_impl; - -class memory_capabilities { -public: - memory_capabilities(bool support_usm, const cl::Device& cl_dev) : _caps({ allocation_type::cl_mem }) { - if (support_usm) { - if (does_device_support(CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, cl_dev)) { - _caps.push_back(allocation_type::usm_host); - } - if (does_device_support(CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, cl_dev)) { - _caps.push_back(allocation_type::usm_shared); - } - if (does_device_support(CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, cl_dev)) { - _caps.push_back(allocation_type::usm_device); - } - } - } - - bool supports_usm() const { return find_in_caps(allocation_type::cl_mem) && _caps.size() > 1; } - bool support_allocation_type(allocation_type type) const { return find_in_caps(type); } - - static bool is_usm_type(allocation_type type) { - if (type == allocation_type::usm_host || - type == allocation_type::usm_shared || - type == allocation_type::usm_device) - return true; - return false; - } - -private: - std::vector _caps; - - bool does_device_support(int32_t param, const cl::Device& device) { - cl_device_unified_shared_memory_capabilities_intel capabilities; - auto err = clGetDeviceInfo(device.get(), param, sizeof(cl_device_unified_shared_memory_capabilities_intel), &capabilities, NULL); - if (err) throw std::runtime_error("[CLDNN ERROR]. clGetDeviceInfo error " + std::to_string(err)); - return !((capabilities & CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL) == 0u); - } - - bool find_in_caps(const allocation_type& type) const { - return std::find_if(_caps.begin(), _caps.end(), [&](const allocation_type& t) { return t == type; }) != _caps.end(); - } -}; - - -struct device_impl : public refcounted_obj { -public: - explicit device_impl(const cl::Device dev, const cl::Context& ctx, const cl_platform_id platform, const gpu::device_info_internal& info) - : _context(ctx), _device(dev), _platform(platform), _info(info), _mem_caps(_info.supports_usm, _device) { } - - gpu::device_info_internal get_info() const { return _info; } - cl::Device get_device() const { return _device; } - cl::Context get_context() const { return _context; } - cl_platform_id get_platform() const { return _platform; } - memory_capabilities mem_caps() const { return _mem_caps; } - - ~device_impl() = default; - -private: - cl::Context _context; - cl::Device _device; - cl_platform_id _platform; - gpu::device_info_internal _info; - memory_capabilities _mem_caps; -}; - -struct device_query_impl : public refcounted_obj { -public: - explicit device_query_impl(void* user_context = nullptr, void* user_device = nullptr); - - std::map get_available_devices() const { - return _available_devices; - } - - ~device_query_impl() = default; -private: - std::map _available_devices; -}; -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h index f1c72138fad..9b62e0b73b2 100644 --- a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h @@ -4,11 +4,12 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/eltwise.hpp" +#include "cldnn/primitives/eltwise.hpp" #include "primitive_inst.h" -#include #include "topology_impl.h" #include "kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h" + +#include #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h b/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h index 1b5982083ff..79151251ce9 100644 --- a/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/embedding_bag_inst.h @@ -4,9 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/embedding_bag.hpp" - +#include "cldnn/primitives/embedding_bag.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/engine_impl.h b/inference-engine/thirdparty/clDNN/src/include/engine_impl.h deleted file mode 100644 index addc1e1daf0..00000000000 --- a/inference-engine/thirdparty/clDNN/src/include/engine_impl.h +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "api/memory.hpp" -#include "event_impl.h" -#include "refcounted_obj.h" -#include "implementation_map.h" -#include "memory_pool.h" -#include "device_impl.h" - -#include -#include -#include -#include -#include - -namespace cldnn { -namespace gpu { -class gpu_toolkit; -} - -class build_options; -using gpu_toolkit = gpu::gpu_toolkit; - -struct memory_impl; -struct event_impl; -struct topology_impl; -struct program_impl; -struct network_impl; -struct program_node; - -template -struct typed_program_node; - -struct engine_impl : public refcounted_obj { -public: - explicit engine_impl(const device_impl& dev, const engine_configuration& conf); - ~engine_impl(); - engine_types type() const { return engine_types::ocl; } - refcounted_obj_ptr allocate_memory(const layout& layout, uint32_t net_id, bool reset = true); - refcounted_obj_ptr allocate_memory(const layout& layout, allocation_type type, uint32_t net_id = 0, bool reset = true); - refcounted_obj_ptr allocate_memory(const layout& layout, - primitive_id, - uint32_t network_id, - std::set, - allocation_type type, - bool reusable = true); - refcounted_obj_ptr reinterpret_buffer(const memory_impl& memory, const layout& new_layout); - refcounted_obj_ptr reinterpret_handle(const layout& new_layout, - const shared_mem_params* params, - uint32_t net_id); - bool is_the_same_buffer(const memory_impl& mem1, const memory_impl& mem2); - - refcounted_obj_ptr create_user_event(uint32_t net_id, bool set = false); - void wait_for_events(std::vector const& events); - - refcounted_obj_ptr build_program(const topology_impl& topology, - const build_options& options, - bool is_internal = false, - bool no_optimizations = false); - refcounted_obj_ptr build_program(const std::set>& nodes, - const build_options& options, - bool is_internal); - void compile_program(program_impl& prog); - - refcounted_obj_ptr allocate_network(const program_impl& program, - uint16_t stream_id, - bool is_internal = false); - refcounted_obj_ptr build_network(const topology_impl& topology, - const build_options& options, - uint16_t stream_id, - bool is_internal = false); - refcounted_obj_ptr build_network(const std::set>& nodes, - const build_options& options, - bool is_internal); - void flush_network(uint32_t net_id); - void release_pending_memory(uint32_t net_id); - - template - std::unique_ptr create_primitive_impl(typed_program_node const& node) { - if (&node.get_program().get_engine() != this) - throw std::invalid_argument( - "engine_impl::create_primitive_impl: program's engine does not match called engine"); - - auto factory = implementation_map::get(type(), node); - return std::move(std::unique_ptr(factory(node))); - } - - template - bool does_an_implementation_exist(typed_program_node const& node) { - if (&node.get_program().get_engine() != this) - throw std::invalid_argument( - "engine_impl::create_primitive_impl: program's engine does not match called engine"); - return implementation_map::check(type(), node); - } - - template - bool does_possible_implementation_exist(typed_program_node const& node) { - if (&node.get_program().get_engine() != this) - throw std::invalid_argument( - "engine_impl::create_primitive_impl: program's engine does not match called engine"); - return implementation_map::check_io_eq(type(), node); - } - - const engine_configuration& configuration() const { return _configuration; } - void set_mem_pool(bool flag) { _configuration.enable_memory_pool = flag; } - std::shared_ptr get_context() const { return _context; } - gpu::device_info_internal get_device_info() const; - void* get_user_context() const; - memory_pool& get_memory_pool() { return _memory_pool; } - - uint64_t get_max_used_device_memory() const { return _memory_pool.get_max_peak_device_memory_used(); } - uint64_t get_used_device_memory() const { return _memory_pool.get_temp_memory_used(); } - - void dump_memory_pool(const program_impl& program, std::string& path, std::string& dependencies) { - _memory_pool.dump_memory_pool(program, path, dependencies); - } - bool use_memory_pool() const; - bool use_unified_shared_memory() const; - bool supports_allocation(allocation_type type) const; - allocation_type get_lockable_preffered_memory_allocation_type(bool is_image_layout = false) const; - -private: - engine_configuration _configuration; - std::shared_ptr _context; - memory_pool _memory_pool; -}; -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h index 00ec93d57db..2eb4dd7f792 100644 --- a/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/extract_image_patches_inst.h @@ -4,8 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once - -#include "api/extract_image_patches.hpp" +#include "cldnn/primitives/extract_image_patches.hpp" #include "primitive_inst.h" namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h b/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h index 75e00f7dd92..db3a156dec5 100644 --- a/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/fully_connected_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/fully_connected.hpp" +#include "cldnn/primitives/fully_connected.hpp" #include "primitive_inst.h" + #include #include @@ -37,8 +38,8 @@ public: public: typed_primitive_inst(network_impl& network, fully_connected_node const& node); - memory_impl& weights_memory() const { return dep_memory(1); } - memory_impl& bias_memory() const { return dep_memory(2); } + memory::ptr weights_memory() const { return dep_memory_ptr(1); } + memory::ptr bias_memory() const { return dep_memory_ptr(2); } bool bias_term() const { return !argument.bias.empty(); } }; diff --git a/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h index 6f4aa5a47a0..65b23e3d8ec 100644 --- a/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api_extension/fused_conv_eltwise.hpp" +#include "cldnn/primitives/fused_conv_eltwise.hpp" #include "primitive_inst.h" #include @@ -83,18 +83,18 @@ public: public: typed_primitive_inst(network_impl& network, fused_conv_eltwise_node const& node); - memory_impl& weights_memory(size_t index) const { + memory::ptr weights_memory(size_t index) const { if (static_cast(index) >= node.get_split()) throw std::range_error("weights offset too big"); - return dep_memory(2 + index); + return dep_memory_ptr(2 + index); } - memory_impl& bias_memory(size_t index) const { + memory::ptr bias_memory(size_t index) const { if (static_cast(index) >= node.get_split()) throw std::range_error("bias offset too big"); - return dep_memory(2 + node.get_split() + index); + return dep_memory_ptr(2 + node.get_split() + index); } bool bias_term() const { return node.bias_term(); } diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_inst.h index f8e2c924cea..9acd82a499c 100644 --- a/inference-engine/thirdparty/clDNN/src/include/gather_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/gather_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/gather.hpp" +#include "cldnn/primitives/gather.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h index b8732f7171c..86a3b0b4cec 100644 --- a/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/gather_nd_inst.h @@ -1,22 +1,9 @@ -/* -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ -/////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/gather_nd.hpp" +#include "cldnn/primitives/gather_nd.hpp" #include "primitive_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h index cb18b6e7133..589c004af1a 100644 --- a/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/gather_tree_inst.h @@ -3,10 +3,9 @@ // #pragma once - -#include - +#include "cldnn/primitives/gather_tree.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h b/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h index 501f3029c56..670edf13063 100644 --- a/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/gemm_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/gemm.hpp" +#include "cldnn/primitives/gemm.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/generic_layer.hpp b/inference-engine/thirdparty/clDNN/src/include/generic_layer.hpp index 7809c2d58f3..79f0616b916 100644 --- a/inference-engine/thirdparty/clDNN/src/include/generic_layer.hpp +++ b/inference-engine/thirdparty/clDNN/src/include/generic_layer.hpp @@ -4,9 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/primitive.hpp" -#include "api/memory.hpp" +#include "cldnn/primitives/primitive.hpp" +#include "cldnn/runtime/memory.hpp" #include "kernel_selector_helper.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h b/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h index e04fa801f3e..32c90371abb 100644 --- a/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h @@ -6,6 +6,7 @@ #pragma once #include "generic_layer.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/grn_inst.h b/inference-engine/thirdparty/clDNN/src/include/grn_inst.h index 1023d3ba4fa..c470a44d9dd 100644 --- a/inference-engine/thirdparty/clDNN/src/include/grn_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/grn_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/grn.hpp" +#include "cldnn/primitives/grn.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/implementation_map.h b/inference-engine/thirdparty/clDNN/src/include/implementation_map.h index 44b36bc9478..1a6bb516006 100644 --- a/inference-engine/thirdparty/clDNN/src/include/implementation_map.h +++ b/inference-engine/thirdparty/clDNN/src/include/implementation_map.h @@ -132,6 +132,8 @@ public: using factory_type = std::function&)>; using map_type = singleton_map; + // TODO: Replace enigne_type here with impl_type + // And add a check that engine do support specific impl_type static factory_type get(engine_types engine_type, const typed_program_node& primitive) { // lookup in database; throw if not found auto key = key_builder()(engine_type, primitive); diff --git a/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h b/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h index 3db3e7db12a..c6199634634 100644 --- a/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h @@ -4,13 +4,14 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/input_layout.hpp" +#include "cldnn/primitives/input_layout.hpp" #include "primitive_inst.h" + #include #include namespace cldnn { -struct memory_impl; +struct memory; template <> struct typed_program_node : public typed_program_node_base { @@ -33,7 +34,7 @@ public: public: typed_primitive_inst(network_impl& network, input_layout_node const& node); - void set_data(memory_impl& mem); + void set_data(memory::ptr mem); }; using input_layout_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/internal_primitive.h b/inference-engine/thirdparty/clDNN/src/include/internal_primitive.h deleted file mode 100644 index 927871b63d6..00000000000 --- a/inference-engine/thirdparty/clDNN/src/include/internal_primitive.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "api/primitive.hpp" - -#include "primitive_type.h" - -namespace cldnn { - -struct internal_primitive : public primitive { -public: - // a helper structure which returns true when compared with any primitive_type which is internal - struct internal_primitive_generic_type { - friend bool operator==(internal_primitive_generic_type, primitive_type_id type) { - return type->is_internal_type(); - } - - friend bool operator==(primitive_type_id type, internal_primitive_generic_type) { - return type->is_internal_type(); - } - - friend bool operator==(internal_primitive_generic_type, internal_primitive_generic_type) { return true; } - }; - - static internal_primitive_generic_type type_id() { return {}; } - -private: - internal_primitive() = delete; - internal_primitive(internal_primitive const&) = delete; - internal_primitive(internal_primitive&&) = delete; -}; - -} // namespace cldnn \ No newline at end of file diff --git a/inference-engine/thirdparty/clDNN/src/include/internal_primitive_type_base.h b/inference-engine/thirdparty/clDNN/src/include/internal_primitive_type_base.h deleted file mode 100644 index 64aa1342a1f..00000000000 --- a/inference-engine/thirdparty/clDNN/src/include/internal_primitive_type_base.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "meta_utils.h" -#include "primitive_type.h" -#include "internal_primitive.h" -#include "program_node.h" -#include "primitive_inst.h" -#include -#include - -namespace cldnn { - -template -struct internal_primitive_type_base : public primitive_type { - static_assert(meta::is_internal_primitive::value, - "Primitive type passed to internal_primitive_type_base should derive from internal_primitive"); - - [[noreturn]] std::shared_ptr create_node(program_impl&, - const std::shared_ptr) const override { - throw std::runtime_error( - "Trying to create generic program_node for an internal primitive - internal primitives' nodes should be " - "created manually"); - } - - std::shared_ptr create_instance(network_impl& network, - const cldnn::program_node& node) const override { - if (node.type() != this) - throw std::invalid_argument("internal_primitive_type_base::create_instance: primitive type mismatch"); - - return std::make_shared>(network, node); - } - - [[noreturn]] std::unique_ptr choose_impl(cldnn::engine_impl&, - const cldnn::program_node&) const override { - throw std::runtime_error( - "primitive_type_id::choose_impl called for internal primitive - internal primitives should have manually " - "attached executable"); - } - - [[noreturn]] cldnn::layout calc_output_layout(const cldnn::program_node&) const override { - throw std::runtime_error( - "primitive_type_id::calc_output_layout called for internal primitive - internal primitives should have " - "output layouts precalculated"); - } - - std::string to_string(const cldnn::program_node& node) const override { - if (node.type() != this) - throw std::invalid_argument("primitive_type_base::to_string: primitive type mismatch"); - - return typed_primitive_inst::to_string(node); - } - - bool is_internal_type() const override { return true; } -}; - -#define CLDNN_DEFINE_INTERNAL_PRIM(PType) \ - struct PType : public internal_primitive { \ - static primitive_type_id type_id() { \ - static internal_primitive_type_base instance; \ - return &instance; \ - } \ - }; \ - using PType##_node = typed_program_node; - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h index 46926eecbca..f97f74ebbbc 100644 --- a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h +++ b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h @@ -4,17 +4,17 @@ #pragma once -#include "api/cldnn.hpp" -#include "api/tensor.hpp" -#include "api/eltwise.hpp" -#include "api/scale.hpp" -#include "api/quantize.hpp" -#include "api/activation.hpp" +#include "cldnn/runtime/utils.hpp" +#include "cldnn/runtime/tensor.hpp" +#include "cldnn/runtime/error_handler.hpp" +#include "cldnn/primitives/eltwise.hpp" +#include "cldnn/primitives/scale.hpp" +#include "cldnn/primitives/quantize.hpp" +#include "cldnn/primitives/activation.hpp" #include "kernel_selector_params.h" #include "kernel_selector_common.h" #include "tensor_type.h" -#include "error_handler.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h b/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h index 8bd0da81f09..ab15eed29be 100644 --- a/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h +++ b/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h @@ -4,9 +4,9 @@ #pragma once -#include "memory_impl.h" -#include "engine_impl.h" -#include "meta_utils.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/utils.hpp" #include "data_inst.h" #include "reorder_inst.h" diff --git a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h index b555f37e716..e39bb90f431 100644 --- a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h @@ -5,17 +5,17 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/loop.hpp" -#include "api/mutable_data.hpp" -#include "api/input_layout.hpp" -#include "api/memory.hpp" +#include "cldnn/primitives/loop.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/input_layout.hpp" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "network_impl.h" #include "primitive_inst.h" #include #include #include -#include "error_handler.h" namespace cldnn { template<> @@ -31,7 +31,7 @@ private: bool use_current_iteration; bool use_execution_condition; mutable program_impl::ptr body_program; - mutable std::map backedge_mem_impls; + mutable std::map backedge_mem_impls; mutable std::map> backedge_layers; mutable std::map> backedge_mem; @@ -40,8 +40,8 @@ private: void setup_internal_mutabledata_node(primitive_id md_id, layout md_layout, std::vector md_inputs_id = {}, uint32_t net_id = 0) const { if (body.get_primitives().count(md_id) == 0) { backedge_mem_impls[md_id] = get_program().get_engine().allocate_memory(md_layout, net_id); - backedge_mem[md_id] = std::make_shared(backedge_mem_impls[md_id].get()); - backedge_layers[md_id] = std::make_shared(md_id, md_inputs_id, *backedge_mem[md_id]); + backedge_mem[md_id] = backedge_mem_impls[md_id]; + backedge_layers[md_id] = std::make_shared(md_id, md_inputs_id, backedge_mem[md_id]); body.add(backedge_layers[md_id]); } } @@ -266,7 +266,7 @@ public: auto opts = get_program().get_options(); std::vector output_names_vec(output_names.begin(), output_names.end()); opts.set_option(build_option::outputs(output_names_vec)); - body_program = get_program().get_engine().build_program(body, opts, false); + body_program = program_impl::build_program(get_program().get_engine(), body, opts, false); } const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; } @@ -298,17 +298,19 @@ public: }; std::shared_ptr from_primitive; std::shared_ptr to_primitive; - std::vector from_mems; - memory_impl::ptr initial_mem; + std::vector from_mems; + memory::ptr initial_mem; + cldnn::stream& stream; backedge_type type; size_t total_bytes; backedge_memory_mapping( std::shared_ptr from_primitive, std::shared_ptr to_primitive, - std::vector from_mems, memory_impl::ptr initial_mem, backedge_type type = CONCAT_OUTPUT): + std::vector from_mems, memory::ptr initial_mem, cldnn::stream& stream, backedge_type type = CONCAT_OUTPUT): from_primitive(from_primitive), to_primitive(to_primitive), from_mems(from_mems), + stream(stream), type(type), total_bytes(initial_mem->get_layout().bytes_count()) { validate_backedge_memory(); @@ -316,11 +318,12 @@ public: backedge_memory_mapping( std::shared_ptr from_primitive, std::shared_ptr to_primitive, - memory_impl::ptr from_mem, memory_impl::ptr initial_mem, backedge_type type = SINGLE_SHARED): + memory::ptr from_mem, memory::ptr initial_mem, cldnn::stream& stream, backedge_type type = SINGLE_SHARED): from_primitive(from_primitive), to_primitive(to_primitive), from_mems{from_mem}, initial_mem(initial_mem), + stream(stream), type(type), total_bytes(initial_mem->get_layout().bytes_count()) { validate_backedge_memory(); @@ -328,10 +331,11 @@ public: backedge_memory_mapping( std::shared_ptr from_primitive, std::shared_ptr to_primitive, - memory_impl::ptr initial_mem, backedge_type type = SINGLE): + memory::ptr initial_mem, cldnn::stream& stream, backedge_type type = SINGLE): from_primitive(from_primitive), to_primitive(to_primitive), initial_mem(initial_mem), + stream(stream), type(type), total_bytes(initial_mem->get_layout().bytes_count()) { validate_backedge_memory(); @@ -340,22 +344,22 @@ public: void setup_iteration(int64_t iter) const { if (type == CONCAT_OUTPUT) { if (iter == 0) { - to_primitive->set_output_memory(*initial_mem); + to_primitive->set_output_memory(initial_mem); } else if (iter > 0) { - to_primitive->set_output_memory(*from_mems.at(iter - 1)); + to_primitive->set_output_memory(from_mems.at(iter - 1)); } else { throw std::runtime_error("Invalid iteraton count" + std::to_string(iter)); } } else if (type == SINGLE_SHARED && iter == 0) { - copy_data(initial_mem, from_mems.front()); + from_mems.front()->copy_from(stream, *initial_mem); } else if (type == SINGLE) { - memory_impl::ptr mem1 = (memory_impl::ptr)&to_primitive->output_memory(); + memory::ptr mem1 = to_primitive->output_memory_ptr(); if (iter == 0) { - copy_data(initial_mem, mem1); + mem1->copy_from(stream, *initial_mem); } else { - memory_impl::ptr mem2 = (memory_impl::ptr)&from_primitive->output_memory(); - to_primitive->set_output_memory(*mem2); - from_primitive->set_output_memory(*mem1); + memory::ptr mem2 = from_primitive->output_memory_ptr(); + to_primitive->set_output_memory(mem2); + from_primitive->set_output_memory(mem1); } } } @@ -370,26 +374,20 @@ private: } } } - - void copy_data(cldnn::memory_impl::ptr src_mem, cldnn::memory_impl::ptr dst_mem) const { - mem_lock from_lock {src_mem}; - mem_lock to_lock {dst_mem}; - const auto src = from_lock.begin(); - const auto dst = to_lock.begin(); - std::copy(src, src + total_bytes, dst); - } }; struct concatenated_memory_mapping { concatenated_memory_mapping(int64_t axis, - memory_impl::ptr concatenated_mem, - std::vector sliced_mems, - int64_t iteration_elements = 0, - int64_t stride = 0, - int64_t initial_offset = 0) : + memory::ptr concatenated_mem, + std::vector sliced_mems, + stream& stream, + int64_t iteration_elements = 0, + int64_t stride = 0, + int64_t initial_offset = 0) : axis(axis), concatenated_mem(concatenated_mem), sliced_mems(sliced_mems), + stream(stream), bytes_per_element(data_type_traits::size_of(concatenated_mem->get_layout().data_type)), batch_size(get_batch_size(concatenated_mem->get_layout(), axis)), bytes_batch_stride((static_cast(concatenated_mem->get_layout().count()) / batch_size) * bytes_per_element), @@ -410,13 +408,13 @@ private: } void restore_concatenated_mem() const { - mem_lock concat_mem_lock{ concatenated_mem }; + mem_lock concat_mem_lock{ concatenated_mem, stream }; int64_t iteration_offset = bytes_iteration_initial_offset; for (const auto& sliced_mem : sliced_mems) { for (int64_t batch = 0; batch < batch_size; ++batch) { const int64_t src_offset = batch * bytes_iteration; const int64_t dst_offset = batch * bytes_batch_stride + iteration_offset; - mem_lock sliced_mem_lock{ sliced_mem }; + mem_lock sliced_mem_lock{ sliced_mem, stream }; uint8_t* src = sliced_mem_lock.data() + src_offset; uint8_t* dst = concat_mem_lock.data() + dst_offset; std::copy(src, src + bytes_iteration, dst); @@ -427,18 +425,18 @@ private: void setup_concatenated_output_memory(uint64_t iteration) const { const auto& sliced_output_mem = sliced_mems.at(iteration); - concat_data_prim->set_output_memory(*sliced_output_mem); + concat_data_prim->set_output_memory(sliced_output_mem); } - memory_impl::ptr get_sliced_mem(int64_t iteration) const { - mem_lock from_lock{ concatenated_mem }; + memory::ptr get_sliced_mem(int64_t iteration) const { + mem_lock from_lock{ concatenated_mem, stream }; int64_t batch_offset = 0; const int64_t iteration_offset = bytes_iteration_initial_offset + bytes_iteration_stride * iteration; for (int64_t batch = 0; batch < batch_size; ++batch) { const int64_t src_offset = batch_offset + iteration_offset; const int64_t dst_offset = batch * bytes_iteration; - mem_lock to_lock{ sliced_mems.at(iteration) }; + mem_lock to_lock{ sliced_mems.at(iteration), stream }; const auto src = from_lock.begin() + src_offset; const auto dst = to_lock.begin() + dst_offset; std::copy(src, src + bytes_iteration, dst); @@ -450,8 +448,9 @@ private: const int64_t axis; std::shared_ptr concat_data_prim; std::shared_ptr sliced_data_prim; - memory_impl::ptr concatenated_mem; - std::vector sliced_mems; + memory::ptr concatenated_mem; + std::vector sliced_mems; + cldnn::stream& stream; // element size const int64_t bytes_per_element; // number of higher level of dimension of slicing axis @@ -483,8 +482,8 @@ public: private: network_impl::ptr body_network; - memory_impl::ptr get_external_memory(const primitive_id& external_id) const; - std::vector get_sliced_mem(const primitive_id& internal_id) const; + memory::ptr get_external_memory(const primitive_id& external_id) const; + std::vector get_sliced_mem(const primitive_id& internal_id) const; }; using loop_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h b/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h index 47df087be8f..a7bcd3f33c0 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lrn_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/lrn.hpp" +#include "cldnn/primitives/lrn.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h index 5676fe49914..9ad70dffef0 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_input_inst.h @@ -4,9 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api_extension/lstm_dynamic_input.hpp" +#include "cldnn/primitives/lstm_dynamic_input.hpp" #include "primitive_inst.h" -#include "error_handler.h" + #include #include @@ -47,11 +47,11 @@ public: public: typed_primitive_inst(network_impl& network, lstm_dynamic_input_node const& node); - memory_impl& dyn_length_memory() const { return dep_memory(1); } - memory_impl& weights_memory() const { return dep_memory(2); } - memory_impl& bias_memory() const { + memory::ptr dyn_length_memory() const { return dep_memory_ptr(1); } + memory::ptr weights_memory() const { return dep_memory_ptr(2); } + memory::ptr bias_memory() const { CLDNN_ERROR_BOOL(id(), "Bias term", !bias_term(), "Trying to get non existing bias memory."); - return dep_memory(3); + return dep_memory_ptr(3); } int32_t direction() const { return node.direction(); } bool bias_term() const { return node.bias_term(); } diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h index c64530ffae9..fe281d35ff4 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_inst.h @@ -4,9 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/lstm_dynamic.hpp" +#include "cldnn/primitives/lstm_dynamic.hpp" #include "primitive_inst.h" -#include "error_handler.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h index 6330abb5efc..b7ded70f51b 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lstm_dynamic_timeloop_inst.h @@ -4,9 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api_extension/lstm_dynamic_timeloop.hpp" +#include "cldnn/primitives/lstm_dynamic_timeloop.hpp" #include "primitive_inst.h" -#include "error_handler.h" + #include #include #include @@ -68,12 +68,12 @@ public: public: typed_primitive_inst(network_impl& network, lstm_dynamic_timeloop_node const& node); - memory_impl& dyn_length_memory() const { return get_dependency_memory("dyn_length"); } - memory_impl& recurrent_memory() const { return get_dependency_memory("recurrent"); } - memory_impl& last_hidden_output_memory() const { return get_dependency_memory("last_hidden_output"); } - memory_impl& last_cell_output_memory() const { return get_dependency_memory("last_cell_output"); } - memory_impl& initial_hidden_memory() const { return get_dependency_memory("initial_hidden"); } - memory_impl& initial_cell_memory() const { return get_dependency_memory("initial_cell"); } + memory::ptr dyn_length_memory() const { return get_dependency_memory("dyn_length"); } + memory::ptr recurrent_memory() const { return get_dependency_memory("recurrent"); } + memory::ptr last_hidden_output_memory() const { return get_dependency_memory("last_hidden_output"); } + memory::ptr last_cell_output_memory() const { return get_dependency_memory("last_cell_output"); } + memory::ptr initial_hidden_memory() const { return get_dependency_memory("initial_hidden"); } + memory::ptr initial_cell_memory() const { return get_dependency_memory("initial_cell"); } bool dyn_length_term() const { return node.dyn_length_term(); } bool initial_hidden_term() const { return node.initial_hidden_term(); } @@ -82,7 +82,7 @@ public: bool last_cell_output_term() const { return node.last_cell_output_term(); } private: - memory_impl& get_dependency_memory(std::string val) const { return dep_memory(node.get_dependency_idx(val)); } + memory::ptr get_dependency_memory(std::string val) const { return dep_memory_ptr(node.get_dependency_idx(val)); } }; using lstm_dynamic_timeloop_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h index 7cdb1483f4d..e87d75dba08 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/lstm.hpp" +#include "cldnn/primitives/lstm.hpp" #include "primitive_inst.h" + #include namespace cldnn { @@ -43,7 +44,7 @@ public: public: typed_primitive_inst(network_impl& network, lstm_elt_node const& node); - memory_impl& cell_memory() const { return dep_memory(1); } + memory::ptr cell_memory() const { return dep_memory_ptr(1); } bool cell_term() const { return !argument.cell.empty(); } lstm_weights_order offset_order() const { return argument.offset_order; } float clip() const { diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h index 7d88493fccb..bc2ae664afe 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lstm_gemm_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/lstm.hpp" +#include "cldnn/primitives/lstm.hpp" #include "primitive_inst.h" + #include namespace cldnn { @@ -39,10 +40,10 @@ public: public: typed_primitive_inst(network_impl& network, lstm_gemm_node const& node); - memory_impl& weights_memory() const { return dep_memory(1); } - memory_impl& recurrent_memory() const { return dep_memory(2); } - memory_impl& bias_memory() const { return dep_memory(3); } - memory_impl& hidden_memory() const { return bias_term() ? dep_memory(4) : dep_memory(3); } + memory::ptr weights_memory() const { return dep_memory_ptr(1); } + memory::ptr recurrent_memory() const { return dep_memory_ptr(2); } + memory::ptr bias_memory() const { return dep_memory_ptr(3); } + memory::ptr hidden_memory() const { return bias_term() ? dep_memory_ptr(4) : dep_memory_ptr(3); } bool bias_term() const { return !argument.bias.empty(); } bool hidden_term() const { return !argument.hidden.empty(); } uint32_t direction() const { return argument.direction; } diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h index 6f42da675f0..2f94e236283 100644 --- a/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/lstm_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/lstm.hpp" +#include "cldnn/primitives/lstm.hpp" #include "primitive_inst.h" + #include #include @@ -51,14 +52,14 @@ public: public: typed_primitive_inst(network_impl& network, lstm_node const& node); - memory_impl& weights_memory() const { return dep_memory(1); } - memory_impl& recurrent_memory() const { return dep_memory(2); } - memory_impl& bias_memory() const { return dep_memory(3); } - memory_impl& initial_hidden_memory() const { return dep_memory(bias_term() ? 4 : 3); } - memory_impl& initial_cell_memory() const { + memory& weights_memory() const { return dep_memory(1); } + memory& recurrent_memory() const { return dep_memory(2); } + memory& bias_memory() const { return dep_memory(3); } + memory& initial_hidden_memory() const { return dep_memory(bias_term() ? 4 : 3); } + memory& initial_cell_memory() const { return dep_memory(bias_term() ? (initial_hidden_term() ? 5 : 4) : (initial_hidden_term() ? 4 : 2)); } - memory_impl& peepholes_memory() const { return dep_memory(6); } + memory& peepholes_memory() const { return dep_memory(6); } bool bias_term() const { return !argument.bias.empty(); } bool peepholes_term() const { return !argument.peepholes.empty(); } bool initial_hidden_term() const { return !argument.initial_hidden.empty(); } diff --git a/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h index 85079daf5b0..543344011de 100644 --- a/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/max_unpooling.hpp" +#include "cldnn/primitives/max_unpooling.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h deleted file mode 100644 index a0353637b33..00000000000 --- a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include "api/memory.hpp" - -#include "engine_impl.h" -#include "refcounted_obj.h" - -namespace cldnn { - -struct memory_impl : refcounted_obj { - memory_impl(const engine_impl::ptr& engine, const layout& layout, uint32_t net_id, allocation_type type, bool reused = false) - : _engine(engine.get()), _layout(layout), _net_id(net_id), _bytes_count(_layout.bytes_count()), _type(type), _reused(reused) {} - - virtual ~memory_impl() { - if (_engine != nullptr && !_reused) { - _engine->get_memory_pool().subtract_memory_used(_bytes_count); - } - } - virtual void* lock() = 0; - virtual void unlock() = 0; - virtual void fill(unsigned char pattern, event_impl::ptr ev) = 0; - size_t size() const { return _bytes_count; } - virtual shared_mem_params get_internal_params() const = 0; - virtual bool is_allocated_by(const engine_impl& engine) const { return &engine == _engine; } - refcounted_obj_ptr get_engine() const { return engine_impl::ptr(_engine); } - const layout& get_layout() const { return _layout; } - uint32_t get_net_id() const { return _net_id; } - void set_net(uint32_t id) { _net_id = id; } - allocation_type get_allocation_type() const { return _type; } - virtual bool is_memory_reset_needed(layout l) { - // To avoid memory reset, output memory must meet the following requirements: - // - To be Weights format (Data memory can be reused by memory_pool, which can lead to errors) - // - To have zero paddings - // - To be completely filled with data - if ((!format::is_weights_format(l.format) && !format::is_simple_data_format(l.format)) || - format::is_winograd(l.format) || format::is_image_2d(l.format)) { - return true; - } - - if (l.data_padding.lower_size() != tensor(0) || l.data_padding.upper_size() != tensor(0)) { - return true; - } - - if (_bytes_count == (l.data_type == data_types::bin ? ceil_div(l.count(), 32) : l.count()) * data_type_traits::size_of(l.data_type)) { - return false; - } - - return true; - } - -protected: - engine_impl *const _engine; - const layout _layout; - uint32_t _net_id; - size_t _bytes_count; - -private: - // layout bytes count, needed because of traits static map destruction - // before run of memory_impl destructor, when engine is static - allocation_type _type; - bool _reused; -}; - -struct simple_attached_memory : memory_impl { - simple_attached_memory(const layout& layout, void* pointer, uint32_t net_id) - : memory_impl((engine_impl::ptr) nullptr, layout, net_id, allocation_type::unknown), _pointer(pointer) {} - - void* lock() override { return _pointer; } - void unlock() override {} - void fill(unsigned char, event_impl::ptr) override {} - shared_mem_params get_internal_params() const override { return { shared_mem_type::shared_mem_empty, nullptr, nullptr, nullptr, -#ifdef _WIN32 - nullptr, -#else - 0, -#endif - 0}; }; - -private: - void* _pointer; -}; - -template -struct mem_lock { - explicit mem_lock(memory_impl::ptr mem) : mem(mem), ptr(reinterpret_cast(mem->lock())) {} - - explicit mem_lock(memory_impl& mem) : mem_lock((memory_impl::ptr) &mem) {} - - ~mem_lock() { - ptr = nullptr; - mem->unlock(); - } - - size_t size() const { return mem->size() / sizeof(T); } - -#if defined(_SECURE_SCL) && (_SECURE_SCL > 0) - auto begin() & { return stdext::make_checked_array_iterator(ptr, size()); } - auto end() & { return stdext::make_checked_array_iterator(ptr, size(), size()); } -#else - T* begin() & { return ptr; } - T* end() & { return ptr + size(); } -#endif - - T* data() const { return ptr; } - -private: - memory_impl::ptr mem; - T* ptr; -}; - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/meta_utils.h b/inference-engine/thirdparty/clDNN/src/include/meta_utils.h index ac412a5ac0e..7c523ef2359 100644 --- a/inference-engine/thirdparty/clDNN/src/include/meta_utils.h +++ b/inference-engine/thirdparty/clDNN/src/include/meta_utils.h @@ -4,9 +4,9 @@ #pragma once +#include "cldnn/runtime/utils.hpp" + #include -#include "api/meta_utils.hpp" -#include "internal_primitive.h" namespace cldnn { @@ -14,22 +14,6 @@ struct primitive; namespace meta { -template -struct pack {}; - -// helper type for deducing return type from member function pointer -// doesn't require passing arguments like std::result_of -template -struct deduce_ret_type; - -template -struct deduce_ret_type { - using type = Ret; -}; - -template -using deduce_ret_type_t = typename deduce_ret_type::type; - template struct is_primitive : public std::integral_constant::type>::value && std::is_same::type>::value> {}; -template -struct is_api_primitive - : public std::integral_constant::value && !std::is_base_of::value> {}; - -template -struct is_internal_primitive - : public std::integral_constant::value && - !std::is_same::type>::value && - std::is_same::type>::value> {}; } // namespace meta -} // namespace cldnn \ No newline at end of file +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h b/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h index 02ed1f1ff3b..311873fb015 100644 --- a/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/mutable_data_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/mutable_data.hpp" +#include "cldnn/primitives/mutable_data.hpp" #include "primitive_inst.h" + #include #include @@ -17,18 +18,14 @@ struct typed_program_node : public typed_program_node_base prim, program_impl& prog); - memory_impl& get_attached_memory() const { return *mem; } - memory_impl::ptr get_attached_memory_ptr() const { return mem; } - void attach_memory(memory_impl& new_mem, bool invalidate_users_if_changed = true); + memory& get_attached_memory() const { return *mem; } + memory::ptr get_attached_memory_ptr() const { return mem; } + void attach_memory(memory::ptr new_mem, bool invalidate_users_if_changed = true); program_node& input(size_t idx = 0) const { return get_dependency(idx); } private: - memory_impl::ptr mem; - - void fill_memory(); - void fill_memory_xavier(); - void fill_memory_constant(float value); + memory::ptr mem; }; using mutable_data_node = typed_program_node; @@ -41,7 +38,6 @@ public: static layout calc_output_layout(mutable_data_node const& node) { return node.get_attached_memory().get_layout(); } static std::string to_string(mutable_data_node const& node); -public: typed_primitive_inst(network_impl& network, mutable_data_node const& node); }; diff --git a/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h b/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h index 31b2537746e..d4787d67608 100644 --- a/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/mvn_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/mvn.hpp" +#include "cldnn/primitives/mvn.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/network_impl.h b/inference-engine/thirdparty/clDNN/src/include/network_impl.h index a4bfcf1b26d..8c5c570cbd4 100644 --- a/inference-engine/thirdparty/clDNN/src/include/network_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/network_impl.h @@ -5,12 +5,12 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/network.hpp" - -#include "engine_impl.h" -#include "event_impl.h" +#include "cldnn/graph/network.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/stream.hpp" #include "program_impl.h" -#include "refcounted_obj.h" +#include "implementation_map.h" #include #include @@ -24,30 +24,49 @@ namespace cldnn { class primitive_inst; -struct network_impl : public refcounted_obj { +struct network_impl { public: - explicit network_impl(const program_impl& program, uint16_t stream_id, bool is_internal = false); - network_impl(engine_impl& engine, + using ptr = std::shared_ptr; + explicit network_impl(program_impl::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = false); + network_impl(engine& engine, const topology_impl& topo, const build_options& options = build_options(), - uint16_t stream_id = 0, bool is_internal = false); - network_impl(engine_impl& engine, + network_impl(engine& engine, const std::set>& nodes, const build_options& options, bool is_internal); ~network_impl(); - const program_impl& get_program() const { return *_program; } - engine_impl& get_engine() const { return _program->get_engine(); } + + static ptr build_network(engine& engine, + const topology_impl& topology, + const build_options& options, + bool is_internal = false); + static ptr build_network(engine& engine, + const std::set>& nodes, + const build_options& options, + bool is_internal); + + static ptr allocate_network(stream::ptr stream, + program_impl::ptr program, + bool is_internal = false, + bool is_primary_stream = false); + + static ptr allocate_network(engine& engine, + program_impl::ptr program, + bool is_internal = false, + bool is_primary_stream = false); + program_impl::cptr get_program() const { return _program; } + program_impl::ptr get_program() { return _program; } + engine& get_engine() const { return _program->get_engine(); } void reset_execution(bool wait = true); - void set_input_data(const primitive_id& id, memory_impl& data); - void set_output_memory(const primitive_id& id, memory_impl& mem); + void set_input_data(const primitive_id& id, memory::ptr data); + void set_output_memory(const primitive_id& id, memory::ptr mem); void set_learning_rate(const float lr); float get_learning_rate(); - uint16_t get_stream_id() const { return _stream_id; } std::vector> const& get_outputs() { return _outputs; } @@ -62,31 +81,33 @@ public: std::vector get_all_primitive_org_ids() const; const program_impl::primitives_info& get_primitives_info() const; const program_impl::graph_optimizer_info& get_optimizer_passes_info() const; - void execute(const std::vector& events); + void execute(const std::vector& events); void validate_primitives(); void set_arguments(); // Implementation specific calls std::shared_ptr get_primitive(const primitive_id& id); std::string get_primitive_info(const primitive_id& id) const; - const event_impl::ptr& get_primitive_event(const primitive_id& id) const { return _events.at(id); } + const event::ptr& get_primitive_event(const primitive_id& id) const { return _events.at(id); } bool has_event(const primitive_id& id) const { return _events.count(id); } std::vector> get_primitives(const std::vector& ids); std::vector> get_primitives(const std::vector& nodes); void execute_primitive(const std::shared_ptr& primitive, - const std::vector& events); + const std::vector& events); void allocate_primitives(); void build_insts_deps(); uint32_t get_id() const { return net_id; } + stream& get_stream() const { return *_stream; } + stream::ptr get_stream_ptr() const { return _stream; } void build_exec_order(); bool is_internal() const { return _internal; } - bool is_primary_stream(); - bool is_secondary_stream(); + bool is_primary_stream() { return _is_primary_stream; } private: uint32_t net_id = 0; - const program_impl::cptr _program; - uint16_t _stream_id; + program_impl::ptr _program; + stream::ptr _stream; bool _internal; + bool _is_primary_stream; bool _reset_arguments; float _learning_rate = static_cast(0.00001); @@ -96,11 +117,10 @@ private: std::list> _exec_order; std::list> _data_outputs; - std::unordered_map _events; + std::unordered_map _events; void allocate_primitive_instance(program_node const& node); void transfer_memory_to_device(std::shared_ptr instance, program_node const& node); - void allocate_mutable_data_for_streams(std::vector>& mutable_data_nodes); void add_to_exec_order(const primitive_id& id); std::shared_ptr find_in_internal_networks(const primitive_id& id); std::shared_ptr find_primitive(const primitive_id& id); diff --git a/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h b/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h index b4ff4c3e481..11e3151ec6b 100644 --- a/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/non_max_suppression_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/non_max_suppression.hpp" +#include "cldnn/primitives/non_max_suppression.hpp" #include "primitive_inst.h" #include @@ -94,62 +94,62 @@ public: static layout calc_output_layout(non_max_suppression_node const& node); static std::string to_string(non_max_suppression_node const& node); - memory_impl& input_boxes_mem() const { - return dep_memory(0); + memory::ptr input_boxes_mem() const { + return dep_memory_ptr(0); } - memory_impl& input_scores_mem() const { - return dep_memory(1); + memory::ptr input_scores_mem() const { + return dep_memory_ptr(1); } bool has_num_select_per_class() const { return node.has_num_select_per_class(); } - memory_impl& num_select_per_class_mem() const { - return dep_memory(2); + memory::ptr num_select_per_class_mem() const { + return dep_memory_ptr(2); } bool has_iou_threshold() const { return node.has_iou_threshold(); } - memory_impl& iou_threshold_mem() const { + memory::ptr iou_threshold_mem() const { size_t offset = 2; offset += has_num_select_per_class(); - return dep_memory(offset); + return dep_memory_ptr(offset); } bool has_score_threshold() const { return node.has_score_threshold(); } - memory_impl& score_threshold_mem() const { + memory::ptr score_threshold_mem() const { size_t offset = 2; offset += has_num_select_per_class(); offset += has_iou_threshold(); - return dep_memory(offset); + return dep_memory_ptr(offset); } bool has_soft_nms_sigma() const { return node.has_soft_nms_sigma(); } - memory_impl& soft_nms_sigma_mem() const { + memory::ptr soft_nms_sigma_mem() const { size_t offset = 2; offset += has_num_select_per_class(); offset += has_iou_threshold(); offset += has_score_threshold(); - return dep_memory(offset); + return dep_memory_ptr(offset); } bool has_second_output() const { return node.has_second_output(); } - memory_impl& second_output_mem() const { + memory::ptr second_output_mem() const { size_t offset = 2; offset += has_num_select_per_class(); offset += has_iou_threshold(); offset += has_score_threshold(); offset += has_soft_nms_sigma(); - return dep_memory(offset); + return dep_memory_ptr(offset); } bool has_third_output() const { return node.has_third_output(); } - memory_impl& third_output_mem() const { + memory::ptr third_output_mem() const { size_t offset = 2; offset += has_num_select_per_class(); offset += has_iou_threshold(); offset += has_score_threshold(); offset += has_soft_nms_sigma(); offset += has_second_output(); - return dep_memory(offset); + return dep_memory_ptr(offset); } }; diff --git a/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h b/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h index 7d93b24cd78..4f14fe2f3e8 100644 --- a/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/normalize_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/normalize.hpp" +#include "cldnn/primitives/normalize.hpp" #include "primitive_inst.h" + #include namespace cldnn { @@ -34,7 +35,7 @@ public: public: typed_primitive_inst(network_impl& network, normalize_node const& node); - memory_impl& scale_memory() const { return dep_memory(1); } + memory::ptr scale_memory() const { return dep_memory_ptr(1); } }; using normalize_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h b/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h index 13477f9f396..0db8759d476 100644 --- a/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h @@ -4,10 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once - -#include - +#include "cldnn/primitives/one_hot.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h index 3faf9d996ce..0047c7bec52 100644 --- a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h +++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h @@ -265,7 +265,7 @@ public: private: void run(program_impl& p) override; - std::list> calculate(engine_impl& engine, build_options bo); + std::list> calculate(engine& engine, build_options bo); bool has_non_const_user(program_node& node) const; void handle_constant(program_impl& prog, program_node& node); void add_constant(program_impl& prog, program_node& node); diff --git a/inference-engine/thirdparty/clDNN/src/include/permute_inst.h b/inference-engine/thirdparty/clDNN/src/include/permute_inst.h index be8b8d8049c..b6250a2b54d 100644 --- a/inference-engine/thirdparty/clDNN/src/include/permute_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/permute_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/permute.hpp" +#include "cldnn/primitives/permute.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h index 5fc2f05b4bd..6bcc208cefb 100644 --- a/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/pooling.hpp" +#include "cldnn/primitives/pooling.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h index 57966207224..80987966343 100644 --- a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h @@ -4,15 +4,15 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once - -#include "api/primitive.hpp" -#include "api/concatenation.hpp" - -#include "event_impl.h" -#include "memory_impl.h" -#include "meta_utils.h" +#include "cldnn/primitives/primitive.hpp" +#include "cldnn/primitives/concatenation.hpp" +#include "cldnn/runtime/error_handler.hpp" +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/memory.hpp" #include "kernel_selector_helper.h" +#include "meta_utils.h" #include "program_node.h" +#include "primitive_type.h" #include #include @@ -44,16 +44,17 @@ struct primitive_impl { virtual ~primitive_impl() = default; virtual void set_arguments(primitive_inst& instance) = 0; - virtual void cleanup(primitive_inst& instance) = 0; - virtual event_impl::ptr execute(const std::vector& events, primitive_inst& instance) = 0; + virtual event::ptr execute(const std::vector& events, primitive_inst& instance) = 0; virtual bool validate(const primitive_inst& instance) const = 0; std::string get_kernel_name() const { return _kernel_name; } // TODO: added a derived class for weights reordering (maybe for all static data reordering) kernel_selector::weights_reorder_params _weights_reorder_params; // class typed_primitive_gpu_impl override this with return false; virtual bool is_cpu() const { return true; } + virtual void init_kernels() = 0; + virtual std::unique_ptr clone() const = 0; -private: +protected: std::string _kernel_name; }; @@ -74,8 +75,10 @@ public: return reinterpret_cast> const&>(_deps); } - memory_impl& dep_memory(size_t index) const { return dependencies().at(index)->output_memory(); } - memory_impl& output_memory() const { return *_output; } + memory& dep_memory(size_t index) const { return dependencies().at(index)->output_memory(); } + memory::ptr dep_memory_ptr(size_t index) const { return dependencies().at(index)->output_memory_ptr(); } + memory& output_memory() const { return *_output; } + memory::ptr output_memory_ptr() const { return _output; } size_t inputs_memory_count() const { return _node.get_primitive()->input_size(); } primitive_type_id type() const { return _node.type(); } primitive_id id() const { return _node.id(); } @@ -84,22 +87,28 @@ public: std::shared_ptr desc() const { return _node.get_primitive(); } network_impl& get_network() const { return _network; } uint32_t get_network_id() const; - void set_output_memory(memory_impl& mem); - void check_memory_to_set(const memory_impl& mem, const layout& layout) const; + void set_output_memory(memory::ptr mem); + void check_memory_to_set(const memory& mem, const layout& layout) const; const std::list& get_users() const { return _node.get_users(); } // return pointer to const to prevent arbitrary 'execute' call -> use primitive_inst.execute() instead - primitive_impl* get_impl() const { return _impl.get(); } + const primitive_impl* get_impl() const { return _impl.get(); } - memory_impl& input_memory(size_t index = 0) const { + memory& input_memory(size_t index = 0) const { if (index >= inputs_memory_count()) throw std::range_error("input offset too big"); return dep_memory(index); } - event_impl::ptr execute(const std::vector& events); + memory::ptr input_memory_ptr(size_t index = 0) const { + if (index >= inputs_memory_count()) + throw std::range_error("input offset too big"); + return dep_memory_ptr(index); + } + + event::ptr execute(const std::vector& events); + void init_kernels(); void set_arguments(); - void cleanup(); bool validate() const { if (_impl == nullptr) throw std::invalid_argument("[Internal cldnn error]. Validation method for nullptr impl is not allowed."); @@ -110,8 +119,8 @@ public: void build_deps(); - memory_impl& fused_memory(size_t dep_id) const { - return dep_memory(get_fused_mem_offset() + dep_id); + memory::ptr fused_memory(size_t dep_id) const { + return dep_memory_ptr(get_fused_mem_offset() + dep_id); } bool has_fused_primitives() const { return !_node.get_fused_primitives().empty(); } @@ -136,7 +145,7 @@ protected: network_impl& _network; program_node const& _node; - std::shared_ptr _impl; + std::unique_ptr _impl; // this is a set of dependencies in terms of memory, if execution of this primitive requires data from another one, // it should be added to this set @@ -154,14 +163,14 @@ protected: // _output is optional because its initialization might be postponed (reshape_inst may either allocate it's own // buffer or attach input as output // depending on reshape_node.is_in_place()) - memory_impl::ptr _output; + memory::ptr _output; bool _output_changed; // todo: implement output reuse if neither of inputs has changed bool _has_valid_input = true; // by default all primitives has valid inputs, exception is input_layout (see input_layout_inst) bool _has_mutable_input = false; - memory_impl::ptr allocate_output(); + memory::ptr allocate_output(); static std::vector> build_exec_deps( std::vector> const& mem_deps); @@ -185,7 +194,7 @@ struct typed_primitive_impl : public primitive_impl { using primitive_impl::primitive_impl; private: - event_impl::ptr execute(const std::vector>& event, + event::ptr execute(const std::vector& event, primitive_inst& instance) override { if (instance.type() != PType::type_id()) throw std::invalid_argument("Implementation type does not match primitive type"); @@ -206,19 +215,9 @@ private: return set_arguments_impl(reinterpret_cast&>(instance)); } - void cleanup(primitive_inst& instance) override { - if (instance.type() != PType::type_id()) - throw std::invalid_argument("Implementation type does not match primitive type"); - if (instance.get_impl() != this) - throw std::invalid_argument( - "Trying to cleanup primitive implementation with mismatching primitive instance"); - - return cleanup_impl(reinterpret_cast&>(instance)); - } virtual void set_arguments_impl(typed_primitive_inst& /*instance*/) {} - virtual void cleanup_impl(typed_primitive_inst& /*instance*/) {} - virtual event_impl::ptr execute_impl(const std::vector& event, + virtual event::ptr execute_impl(const std::vector& event, typed_primitive_inst& instance) = 0; bool validate(const primitive_inst& instance) const override { @@ -233,13 +232,8 @@ private: virtual bool validate_impl(const typed_primitive_inst&) const { return true; } }; -namespace details { template -class api_typed_primitive_inst_base : public primitive_inst { - static_assert(meta::is_api_primitive::value, - "PType should name a non-const, non-volatile type derived from cldnn::primitive but not from " - "cldnn::internal_primitive"); - +class typed_primitive_inst_base : public primitive_inst { public: using typed_node = typed_program_node; using typed_impl = typed_primitive_impl; @@ -247,16 +241,16 @@ public: const typed_node& node; const PType& argument; - api_typed_primitive_inst_base(network_impl& network, typed_node const& node) - : api_typed_primitive_inst_base(network, node, do_allocate_memory(node)) {} + typed_primitive_inst_base(network_impl& network, typed_node const& node) + : typed_primitive_inst_base(network, node, do_allocate_memory(node)) {} protected: - api_typed_primitive_inst_base(network_impl& network, typed_node const& node, bool allocate_memory) + typed_primitive_inst_base(network_impl& network, typed_node const& node, bool allocate_memory) : primitive_inst(network, node, allocate_memory), node(_node), argument(*node.get_primitive()) {} - api_typed_primitive_inst_base(network_impl& network, typed_node const& node, memory_impl& buffer) - : api_typed_primitive_inst_base(network, node, false) { - _output = (memory_impl::ptr) &buffer; + typed_primitive_inst_base(network_impl& network, typed_node const& node, memory::ptr buffer) + : typed_primitive_inst_base(network, node, false) { + _output = buffer; } private: @@ -269,48 +263,6 @@ private: } }; -template -class internal_typed_primitive_inst_base : public primitive_inst { - static_assert(meta::is_internal_primitive::value, - "PType should name a non-const, non-volatile type derived from cldnn::internal_primitive"); - -public: - using typed_node = typed_program_node; - using typed_impl = typed_primitive_impl; - - const typed_node& node; - - internal_typed_primitive_inst_base(network_impl& network, typed_node const& node) - : internal_typed_primitive_inst_base( - network, - node, - false) // by default, do not allocate output buffer automatically for internal primitives - {} - - template - [[noreturn]] void desc(Guard&&...) const { - static_assert(meta::always_false>::value, "Trying to get primitive from internal node"); - } - -protected: - internal_typed_primitive_inst_base(network_impl& network, typed_node const& node, bool allocate_memory) - : primitive_inst(network, node, allocate_memory), node(_node) {} - - internal_typed_primitive_inst_base(network_impl& network, typed_node const& node, memory_impl::ptr buffer) - : internal_typed_primitive_inst_base(network, node, false) { - _output = buffer; - } -}; -} // namespace details - -/* - Base class for all concrete primitive instances. -*/ -template -using typed_primitive_inst_base = typename std::conditional::value, - details::api_typed_primitive_inst_base, - details::internal_typed_primitive_inst_base>::type; - /* Template class which represents instance of primitive 'PType'. Each new primitive should explicitly specialize this class. @@ -330,14 +282,4 @@ class typed_primitive_inst : public typed_primitive_inst_base { static_assert(meta::always_false::value, "Missing typed_primitive_inst specialization"); }; -#define CLDNN_DEFINE_SIMPLE_PRIM_INST(PType) \ - template <> \ - struct typed_primitive_inst : public typed_primitive_inst_base { \ - using typed_primitive_inst_base::typed_primitive_inst_base; \ - static std::string to_string(PType##_node const& arg) { \ - return primitive_inst::generic_to_string(arg, #PType); \ - } \ - }; \ - using PType##_inst = typed_primitive_inst; - } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h index b608dff8536..8ccd776183c 100644 --- a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h +++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h @@ -4,10 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/memory.hpp" -#include "api/primitive.hpp" -#include "api/program.hpp" - +#include "cldnn/runtime/memory.hpp" +#include "cldnn/primitives/primitive.hpp" +#include "cldnn/graph/program.hpp" #include "topology_impl.h" #include @@ -15,7 +14,7 @@ namespace cldnn { struct network_impl; -struct engine_impl; +class engine; struct program_node; struct primitive_impl; class primitive_inst; @@ -28,14 +27,12 @@ struct primitive_type { const std::shared_ptr prim) const = 0; virtual std::shared_ptr create_instance(network_impl& network, const program_node& node) const = 0; - virtual std::unique_ptr choose_impl(engine_impl& engine, + virtual std::unique_ptr choose_impl(const engine& engine, const program_node& node) const = 0; - virtual bool does_an_implementation_exist(engine_impl& engine, const program_node& node) const = 0; - virtual bool does_possible_implementation_exist(engine_impl& engine, + virtual bool does_an_implementation_exist(const engine& engine, const program_node& node) const = 0; + virtual bool does_possible_implementation_exist(const engine& engine, const program_node& node) const = 0; virtual layout calc_output_layout(const program_node& node) const = 0; virtual std::string to_string(const program_node& node) const = 0; - - virtual bool is_internal_type() const { return false; } }; } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h index faffe78c4ea..3dc14aa77d2 100644 --- a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h +++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h @@ -4,21 +4,22 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once + +#include "cldnn/runtime/engine.hpp" + #include "meta_utils.h" #include "primitive_type.h" #include "program_node.h" #include "primitive_inst.h" #include "network_impl.h" -#include "engine_impl.h" +#include "implementation_map.h" + #include #include namespace cldnn { template struct primitive_type_base : primitive_type { - static_assert(meta::is_api_primitive::value, - "Primitive type passed to primitive_type_base should derive from cldnn::primitive"); - std::shared_ptr create_node(program_impl& program, const std::shared_ptr prim) const override { if (prim->type != this) @@ -35,23 +36,26 @@ struct primitive_type_base : primitive_type { return std::make_shared>(network, node); } - std::unique_ptr choose_impl(engine_impl& engine, const cldnn::program_node& node) const override { + // TODO: Should we get rid of engine type in impl map? Or we must pass internal build engine to get real ocl type? + std::unique_ptr choose_impl(const engine& /* engine */, const cldnn::program_node& node) const override { if (node.type() != this) throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch"); - return engine.create_primitive_impl(node.as()); + auto factory = implementation_map::get(engine_types::ocl, node); + return std::move(std::unique_ptr(factory(node))); } - bool does_an_implementation_exist(engine_impl& engine, const cldnn::program_node& node) const override { + bool does_an_implementation_exist(const engine& /* engine */, const cldnn::program_node& node) const override { if (node.type() != this) throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch"); - return engine.does_an_implementation_exist(node.as()); + + return implementation_map::check(engine_types::ocl, node); } - bool does_possible_implementation_exist(engine_impl& engine, const cldnn::program_node& node) const override { + bool does_possible_implementation_exist(const engine& /* engine */, const cldnn::program_node& node) const override { if (node.type() != this) throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch"); - return engine.does_possible_implementation_exist(node.as()); + return implementation_map::check_io_eq(engine_types::ocl, node); } cldnn::layout calc_output_layout(const cldnn::program_node& node) const override { diff --git a/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h b/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h index 8aba188276d..16c6681509f 100644 --- a/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/prior_box_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/prior_box.hpp" +#include "cldnn/primitives/prior_box.hpp" #include "primitive_inst.h" + #include #include @@ -21,10 +22,10 @@ struct typed_program_node : typed_program_node_base { bool is_clustered() const { return get_primitive()->is_clustered(); } void calc_result(); - memory_impl::ptr get_result_buffer() const { return result; } + memory::ptr get_result_buffer() const { return result; } private: - memory_impl::ptr result; + memory::ptr result; }; using prior_box_node = typed_program_node; @@ -40,7 +41,7 @@ public: public: typed_primitive_inst(network_impl& network, prior_box_node const& node); - memory_impl& input_memory() const { return dep_memory(0); } + memory& input_memory() const { return dep_memory(0); } }; using prior_box_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h b/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h index 5f8e687cd90..381b1ea9cdf 100644 --- a/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h +++ b/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h @@ -6,7 +6,6 @@ #include "program_impl.h" #include "program_node.h" -#include "gpu/ocl_toolkit.h" #include #include @@ -18,4 +17,4 @@ void dump_graph_optimized(std::ofstream&, const program_impl&); void dump_graph_processing_order(std::ofstream&, const program_impl&); void dump_graph_init(std::ofstream&, const program_impl&, std::function const&); void dump_graph_info(std::ofstream&, const program_impl&, std::function const&); -} // namespace cldnn \ No newline at end of file +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h index cdef2a3b18d..60291deb1e7 100644 --- a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h +++ b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h @@ -7,8 +7,10 @@ #pragma once #include "program_node.h" -#include "engine_impl.h" +#include "cldnn/runtime/engine.hpp" #include "program_impl.h" +#include "data_inst.h" + #include #include #include @@ -95,7 +97,7 @@ struct program_helpers { else do_for_types(node, rest...); } - static void merge_buffers(engine_impl& engine, + static void merge_buffers(engine& engine, program_node& node, const layout& target_layout, size_t begin_offset, diff --git a/inference-engine/thirdparty/clDNN/src/include/program_impl.h b/inference-engine/thirdparty/clDNN/src/include/program_impl.h index 4588ec4ec80..fe8c8cb55f2 100644 --- a/inference-engine/thirdparty/clDNN/src/include/program_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/program_impl.h @@ -6,10 +6,10 @@ #pragma once -#include "api/program.hpp" - -#include "refcounted_obj.h" -#include "engine_impl.h" +#include "cldnn/graph/program.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/stream.hpp" +#include "runtime/kernels_cache.hpp" #include #include @@ -19,6 +19,10 @@ #include #include +namespace kernel_selector { +class TuningCache; +} // namespace kernel_selector + namespace cldnn { struct topology_impl; @@ -28,10 +32,19 @@ class layout_optimizer; class pass_manager; class base_pass; class program_impl_wrapper; + +struct gpu_program_state { + kernels_cache _kernels_cache; + + gpu_program_state(engine& engine) : _kernels_cache(engine) {} +}; + /* cldnn_program implementation */ -struct program_impl : public refcounted_obj { +struct program_impl { + using ptr = std::shared_ptr; + using cptr = std::shared_ptr; friend class calculate_prior_boxes; // to be removed when possible friend class graph_initializations; // to be removed when possible friend class prepare_padding; // to be removed when possible @@ -121,18 +134,18 @@ public: typedef std::vector> graph_optimizer_info; typedef std::pair> optimized_info; - program_impl(engine_impl& engine_ref, + program_impl(engine& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal, bool no_optimizations = false); /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */ - program_impl(engine_impl& engine_ref, + program_impl(engine& engine_ref, std::set> const& nodes, build_options const& options, bool is_internal); ~program_impl(); - engine_impl& get_engine() const { return *engine; } + engine& get_engine() const { return _engine; } const build_options& get_options() const { return options; } std::list& get_inputs() { return inputs; @@ -144,6 +157,7 @@ public: const nodes_ordering& get_processing_order() const; nodes_ordering& get_processing_order(); uint32_t get_prog_id() { return prog_id; } + stream& get_stream() { return *_stream; } const std::list& get_optimized_out() const { return optimized_out; } const std::list& get_optimized() const { return optimized; } bool has_node(const primitive_id& prim) const { return nodes_map.count(prim) > 0; } @@ -151,7 +165,6 @@ public: program_node const& get_node(primitive_id const& id) const; std::shared_ptr get_node_ptr(const primitive_id& prim) { return nodes_map.at(prim); } std::shared_ptr get_node_ptr(const primitive_id& prim) const { return nodes_map.at(prim); } - void dump_memory_pool() const; // returns already existing program_node for given primitive 'prim' (lookup in 'nodes_map') // if it was previously created, otherwise creates and then returns program_node @@ -211,14 +224,36 @@ public: void reset_program(); uint32_t get_id() const { return prog_id; } + static ptr build_program(engine& engine, + const topology_impl& topology, + const build_options& options, + bool is_internal = false, + bool no_optimizations = false); + static ptr build_program(engine& engine, + const std::set>& nodes, + const build_options& options, + bool is_internal); + static void init_primitives(); + void compile(); + void init_kernels(); + kernel_id add_kernel(const std::shared_ptr kernel_sring); + kernel::ptr get_kernel(kernel_id id); + + void load_tuning_cache(); + std::shared_ptr get_tuning_cache() const { return tuning_cache; } + private: uint32_t prog_id = 0; - engine_impl::ptr engine; + engine& _engine; + stream::ptr _stream; + gpu_program_state program_state; build_options options; std::list inputs; std::vector outputs; nodes_ordering processing_order; std::unique_ptr pm; + std::shared_ptr tuning_cache; + std::map> nodes_map; std::list optimized_out; diff --git a/inference-engine/thirdparty/clDNN/src/include/program_node.h b/inference-engine/thirdparty/clDNN/src/include/program_node.h index 3e8711d8625..d0e904d3c56 100644 --- a/inference-engine/thirdparty/clDNN/src/include/program_node.h +++ b/inference-engine/thirdparty/clDNN/src/include/program_node.h @@ -4,15 +4,14 @@ #pragma once +#include "cldnn/primitives/primitive.hpp" +#include "cldnn/primitives/activation.hpp" + +#include "kernel_selector_helper.h" +#include "meta_utils.h" + #include #include - -#include "api/primitive.hpp" -#include "api/activation.hpp" -#include "internal_primitive.h" -#include "kernel_selector_helper.h" - -#include "meta_utils.h" #include #include #include @@ -21,6 +20,7 @@ namespace cldnn { struct program_impl; +struct primitive_impl; class reorder_inputs; class graph_initializations; class prepare_quantization; @@ -29,9 +29,6 @@ class pre_replace_deconv; template struct typed_program_node; -template -struct internal_primitive_type_base; - class json_composite; class xml_composite; @@ -92,10 +89,10 @@ public: } program_impl& get_program() { return myprog; } - program_impl const& get_program() const { return myprog; } + program_impl& get_program() const { return myprog; } - std::shared_ptr get_selected_impl() const { return selected_impl; } - void set_selected_impl(std::shared_ptr impl) { selected_impl = impl; } + primitive_impl* get_selected_impl() const { return selected_impl.get(); } + void set_selected_impl(std::unique_ptr impl); std::vector const& get_dependencies() const { return dependencies; } program_node& get_dependency(size_t idx) const { return *dependencies.at(idx); } @@ -312,7 +309,7 @@ protected: std::shared_ptr desc; program_impl& myprog; - std::shared_ptr selected_impl; + std::unique_ptr selected_impl; bool valid_output_layout = false; layout output_layout = layout(data_types::f32, format::bfyx, tensor()); @@ -353,12 +350,16 @@ protected: void invalidate_users() const; }; -namespace details { +/* +Template class used to indicate that usage context requires 'program_node' to wrap primitive +of type 'PType'. Successful conversion from 'program_node' to 'typed_program_node' means +that this restriction in fact holds and functions/method/etc. may saftly use uderlaying primitive. + +This class shadows 'get_primitive' method from base class which now returns pointer to more specific +type. +*/ template -struct api_typed_program_node_base : public program_node { - static_assert(meta::is_api_primitive::value, - "PType should name a non-const, non-volatile type derived from cldnn::primitive but not from " - "cldnn::internal_primitive"); +struct typed_program_node_base : public program_node { friend class cldnn::graph_initializations; friend class cldnn::pre_replace_deconv; friend class cldnn::prepare_quantization; @@ -376,57 +377,6 @@ protected: std::shared_ptr typed_desc() const { return std::static_pointer_cast(desc); } }; -struct internal_program_node_base : public program_node { - friend struct cldnn::program_impl; - - explicit internal_program_node_base(program_impl& prog); - - const primitive_id& id() const override { return internal_id; } - - void set_implementation(std::unique_ptr&& impl); - -private: - primitive_id internal_id; - - static primitive_id get_next_internal_id(); -}; - -template -struct internal_typed_program_node_base : public internal_program_node_base { - static_assert(meta::is_internal_primitive::value, - "PType should name a non-const, non-volatile type derived from cldnn::internal_primitive"); - -public: - using internal_program_node_base::internal_program_node_base; - - primitive_type_id type() const override { return PType::type_id(); } - - template - [[noreturn]] void get_primitive(Guard&&...) { - static_assert(meta::always_false>::value, "Trying to get primitive from internal node"); - } - -protected: - template - [[noreturn]] void typed_desc(Guard&&...) { - static_assert(meta::always_false>::value, "Trying to get primitive from internal node"); - } -}; -} // namespace details - -/* -Template class used to indicate that usage context requires 'program_node' to wrap primitive -of type 'PType'. Successful conversion from 'program_node' to 'typed_program_node' means -that this restriction in fact holds and functions/method/etc. may saftly use uderlaying primitive. - -This class shadows 'get_primitive' method from base class which now returns pointer to more specific -type. -*/ -template -using typed_program_node_base = typename std::conditional::value, - details::api_typed_program_node_base, - details::internal_typed_program_node_base>::type; - /* Actual template class used in context which requires 'program_node' to wrap primitive of type 'PType'. This class is introduced to provide possibility of explicit specialization. diff --git a/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h b/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h index d1fa03f8d9e..3c67afe4003 100644 --- a/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/proposal_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/proposal.hpp" +#include "cldnn/primitives/proposal.hpp" #include "primitive_inst.h" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h b/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h index 45cd6638f74..829f5228f61 100644 --- a/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h @@ -3,7 +3,7 @@ // #pragma once -#include "api/pyramid_roi_align.hpp" +#include "cldnn/primitives/pyramid_roi_align.hpp" #include "primitive_inst.h" #include @@ -36,11 +36,11 @@ public: static std::string to_string(pyramid_roi_align_node const& node); typed_primitive_inst(network_impl& network, pyramid_roi_align_node const& node); - memory_impl& input() const { return dep_memory(0); } - memory_impl& P2() const { return dep_memory(1); } - memory_impl& P3() const { return dep_memory(2); } - memory_impl& P4() const { return dep_memory(3); } - memory_impl& P5() const { return dep_memory(4); } + memory& input() const { return dep_memory(0); } + memory& P2() const { return dep_memory(1); } + memory& P3() const { return dep_memory(2); } + memory& P4() const { return dep_memory(3); } + memory& P5() const { return dep_memory(4); } }; using pyramid_roi_align_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h index 0cf0b30e81c..1d680ecb3cc 100644 --- a/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/quantize_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/quantize.hpp" +#include "cldnn/primitives/quantize.hpp" #include "primitive_inst.h" #include "data_inst.h" #include "kernel_selector/core/actual_kernels/quantize/quantize_kernel_params.h" diff --git a/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h b/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h index 6574ce8de21..678f861a2a8 100644 --- a/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/reduce_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/reduce.hpp" +#include "cldnn/primitives/reduce.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/refcounted_obj.h b/inference-engine/thirdparty/clDNN/src/include/refcounted_obj.h deleted file mode 100644 index 82b931f65da..00000000000 --- a/inference-engine/thirdparty/clDNN/src/include/refcounted_obj.h +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma once -#include -#include - -namespace cldnn { - -template -struct refcounted_obj_ptr; - -/** - * \brief Base class for all reference counted pointers aka PIMPL implementations - */ -// TODO refine this code for multithreading support -template -class refcounted_obj { -public: - using ptr = refcounted_obj_ptr::type>; - using cptr = refcounted_obj_ptr::type>; - - refcounted_obj() : _ref_count(1) {} - - virtual ~refcounted_obj() = default; - - void add_ref() const { ++_ref_count; } - - void release() const { - if ((--_ref_count) == 0) - delete static_cast(this); - } - - int get_ref_count() const { return _ref_count; } - -private: - mutable std::atomic_int _ref_count; -}; - -template -struct refcounted_obj_ptr { - template - explicit refcounted_obj_ptr(T* ptr, bool add_ref = true) : _ptr(ptr) { - static_assert(std::is_base_of::type>, U>::value, - "Object handled with refcounted_obj_ptr should derive from refcounted_obj"); - if (add_ref) - ptr_add_ref(); - } - - // for refcounted_obj_ptr, allow contruction from T* - template ::value>::type> - explicit refcounted_obj_ptr(typename std::remove_const::type* ptr, bool add_ref = true) : _ptr(ptr) { - static_assert(std::is_base_of::type>, U>::value, - "Object handled with refcounted_obj_ptr should derive from refcounted_obj"); - if (add_ref) - ptr_add_ref(); - } - - constexpr refcounted_obj_ptr() : _ptr(nullptr) {} - - refcounted_obj_ptr(const refcounted_obj_ptr& other) : _ptr(other._ptr) { ptr_add_ref(); } - - refcounted_obj_ptr& operator=(const refcounted_obj_ptr& other) { - if (this == &other) - return *this; - ptr_release(); - _ptr = other._ptr; - ptr_add_ref(); - return *this; - } - - refcounted_obj_ptr(refcounted_obj_ptr&& other) noexcept { - _ptr = other._ptr; - other._ptr = nullptr; - } - - refcounted_obj_ptr& operator=(refcounted_obj_ptr&& other) { - if (this == &other) - return *this; - ptr_release(); - _ptr = other._ptr; - other._ptr = nullptr; - return *this; - } - - ~refcounted_obj_ptr() { - ptr_release(); - _ptr = nullptr; - } - - T* detach() { - T* result = _ptr; - _ptr = nullptr; - return result; - } - - void reset(T* ptr, bool add_ref = true) { - ptr_release(); - _ptr = ptr; - if (add_ref) - ptr_add_ref(); - } - - operator bool() const { return _ptr != nullptr; } - T* get() const { return _ptr; } - T& operator*() const { return *get(); } - T* operator->() const { return get(); } - - friend bool operator==(const refcounted_obj_ptr& lhs, const refcounted_obj_ptr& rhs) { - return lhs._ptr == rhs._ptr; - } - - friend bool operator!=(const refcounted_obj_ptr& lhs, const refcounted_obj_ptr& rhs) { return !(lhs == rhs); } - - // for refcounted_obj_ptr, allow conversion to refcounted_obj_ptr - template - operator refcounted_obj_ptr() const { - return refcounted_obj_ptr(_ptr); - } - -private: - T* _ptr; - void ptr_add_ref() { - if (_ptr) - _ptr->add_ref(); - } - void ptr_release() { - if (_ptr) - _ptr->release(); - } -}; - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h b/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h index 26553e215ce..0b6a24068cc 100644 --- a/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/region_yolo_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/region_yolo.hpp" +#include "cldnn/primitives/region_yolo.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h b/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h index c9fccb0d7c7..ce851e482ae 100644 --- a/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/reorder_inst.h @@ -4,8 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/reorder.hpp" + +#include "cldnn/primitives/reorder.hpp" #include "primitive_inst.h" + #include #include @@ -50,8 +52,8 @@ public: public: typed_primitive_inst(network_impl& network, reorder_node const& node); - memory_impl& mean_nv12_memory() const { return dep_memory(2); } - memory_impl& mean_memory() const { return dep_memory(1); } + memory::ptr mean_nv12_memory() const { return dep_memory_ptr(2); } + memory::ptr mean_memory() const { return dep_memory_ptr(1); } bool has_mean() const { return !argument.mean.empty(); } diff --git a/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h b/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h index 43361d31760..43efd38a9b5 100644 --- a/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/reorg_yolo_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/reorg_yolo.hpp" +#include "cldnn/primitives/reorg_yolo.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/resample_inst.h b/inference-engine/thirdparty/clDNN/src/include/resample_inst.h index 33bf98f2dda..6bb8de03b27 100644 --- a/inference-engine/thirdparty/clDNN/src/include/resample_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/resample_inst.h @@ -4,10 +4,11 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/resample.hpp" +#include "cldnn/primitives/resample.hpp" #include "primitive_inst.h" -#include #include "topology_impl.h" + +#include #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h b/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h index 3c640441bad..32adce55186 100644 --- a/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h @@ -4,9 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/reshape.hpp" +#include "cldnn/primitives/reshape.hpp" #include "primitive_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h b/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h index 51f4a24a8cb..60ba5b3dd4e 100644 --- a/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/reverse_sequence.hpp" +#include "cldnn/primitives/reverse_sequence.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/roi_pooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/roi_pooling_inst.h index f9c3dc3b069..6d3cfcf9f38 100644 --- a/inference-engine/thirdparty/clDNN/src/include/roi_pooling_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/roi_pooling_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/roi_pooling.hpp" +#include "cldnn/primitives/roi_pooling.hpp" #include "primitive_inst.h" + #include namespace cldnn { @@ -34,8 +35,8 @@ public: public: using parent::parent; - memory_impl& rois_memory() const { return dep_memory(1); } - memory_impl& trans_memory() const { return dep_memory(2); } + memory::ptr rois_memory() const { return dep_memory_ptr(1); } + memory::ptr trans_memory() const { return dep_memory_ptr(2); } }; using roi_pooling_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h index 018daad2a8c..a25fb49be99 100644 --- a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h @@ -4,11 +4,12 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/scale.hpp" +#include "cldnn/primitives/scale.hpp" #include "primitive_inst.h" +#include "kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h" + #include #include -#include "kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h" namespace cldnn { @@ -47,8 +48,8 @@ public: public: typed_primitive_inst(network_impl& network, scale_node const& desc); - memory_impl& scale_memory() const { return dep_memory(1); } - memory_impl& bias_memory() const { return dep_memory(2); } + memory::ptr scale_memory() const { return dep_memory_ptr(1); } + memory::ptr bias_memory() const { return dep_memory_ptr(2); } bool bias_term() const { return _node.as().bias_term(); } }; diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h index dd87a7facce..f70bed68bee 100644 --- a/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/scatter_elements_update_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/scatter_elements_update.hpp" +#include "cldnn/primitives/scatter_elements_update.hpp" #include "primitive_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h index 1da6e21445c..acf3ee9c6c0 100644 --- a/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/scatter_nd_update_inst.h @@ -4,7 +4,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/scatter_nd_update.hpp" +#include "cldnn/primitives/scatter_nd_update.hpp" #include "primitive_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h b/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h index 54fa0fae09d..1c9b7934f25 100644 --- a/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/scatter_update_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/scatter_update.hpp" +#include "cldnn/primitives/scatter_update.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/select_inst.h b/inference-engine/thirdparty/clDNN/src/include/select_inst.h index bd59f29062d..4adde4e8c60 100644 --- a/inference-engine/thirdparty/clDNN/src/include/select_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/select_inst.h @@ -4,9 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include - +#include "cldnn/primitives/select.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h b/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h index 585d7440ba7..278f9c56fab 100644 --- a/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/shuffle_channels.hpp" +#include "cldnn/primitives/shuffle_channels.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/sliding_window_utils.h b/inference-engine/thirdparty/clDNN/src/include/sliding_window_utils.h index b832c49979f..369b78f4a8b 100644 --- a/inference-engine/thirdparty/clDNN/src/include/sliding_window_utils.h +++ b/inference-engine/thirdparty/clDNN/src/include/sliding_window_utils.h @@ -4,15 +4,15 @@ #pragma once -#include -#include +#include "cldnn/runtime/layout.hpp" +#include "cldnn/runtime/tensor.hpp" + +#include "meta_utils.h" #include #include #include -#include "meta_utils.h" - namespace cldnn { /// @brief Sliding window output range computation mode. diff --git a/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h b/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h index 1fc97c3288c..8bf45e73924 100644 --- a/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/softmax_inst.h @@ -4,8 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/softmax.hpp" +#include "cldnn/primitives/softmax.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h b/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h index 3fed298e57f..db2f69719d9 100644 --- a/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/space_to_batch_inst.h @@ -4,8 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/space_to_batch.hpp" + +#include "cldnn/primitives/space_to_batch.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h b/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h index 527c17e9b85..2cebeb7cdf8 100644 --- a/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/space_to_depth_inst.h @@ -4,8 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/space_to_depth.hpp" + +#include "cldnn/primitives/space_to_depth.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/split_inst.h b/inference-engine/thirdparty/clDNN/src/include/split_inst.h index 16f74d64e4a..af9a8e402eb 100644 --- a/inference-engine/thirdparty/clDNN/src/include/split_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/split_inst.h @@ -4,8 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/split.hpp" + +#include "cldnn/primitives/split.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h b/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h index 812cd213761..406d46b44ef 100644 --- a/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h @@ -4,9 +4,11 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/strided_slice.hpp" + +#include "cldnn/primitives/strided_slice.hpp" #include "primitive_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" + #include #include diff --git a/inference-engine/thirdparty/clDNN/src/include/tile_inst.h b/inference-engine/thirdparty/clDNN/src/include/tile_inst.h index ea3966c5ee5..4569201f79c 100644 --- a/inference-engine/thirdparty/clDNN/src/include/tile_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/tile_inst.h @@ -4,8 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/tile.hpp" + +#include "cldnn/primitives/tile.hpp" #include "primitive_inst.h" + #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h index ae48ecd853a..29967531887 100644 --- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h +++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h @@ -3,12 +3,15 @@ // #pragma once + +#include "cldnn/runtime/tensor.hpp" +#include "cldnn/runtime/layout.hpp" +#include "cldnn/runtime/device.hpp" +#include "cldnn/primitives/primitive.hpp" + + #include #include -#include "api/tensor.hpp" -#include "api/layout.hpp" -#include "api/primitive.hpp" -#include "device_impl.h" #include namespace cldnn { diff --git a/inference-engine/thirdparty/clDNN/src/include/topology_impl.h b/inference-engine/thirdparty/clDNN/src/include/topology_impl.h index 2e9b943ca14..bb1df4fc4b7 100644 --- a/inference-engine/thirdparty/clDNN/src/include/topology_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/topology_impl.h @@ -4,9 +4,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once -#include "api/primitive.hpp" -#include "api/input_layout.hpp" -#include "refcounted_obj.h" + +#include "cldnn/primitives/primitive.hpp" +#include "cldnn/primitives/input_layout.hpp" #include #include @@ -16,8 +16,9 @@ namespace cldnn { typedef std::map> topology_map; -struct topology_impl : public refcounted_obj { +struct topology_impl { public: + using ptr = std::shared_ptr; explicit topology_impl(const topology_map& map = topology_map()) : _primitives(map) {} void add(std::shared_ptr desc) { diff --git a/inference-engine/thirdparty/clDNN/src/input_layout.cpp b/inference-engine/thirdparty/clDNN/src/input_layout.cpp index b4df10dd23a..6aaefe16e65 100644 --- a/inference-engine/thirdparty/clDNN/src/input_layout.cpp +++ b/inference-engine/thirdparty/clDNN/src/input_layout.cpp @@ -5,8 +5,8 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "input_layout_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include @@ -27,16 +27,16 @@ input_layout_inst::typed_primitive_inst(network_impl& network, input_layout_node _has_valid_input = false; // by default input for 'input_layout' is invalid as long as user doesn't call set_data } -void input_layout_inst::set_data(memory_impl& mem) { +void input_layout_inst::set_data(memory::ptr mem) { auto ol = node.get_output_layout(); - check_memory_to_set(mem, ol); + check_memory_to_set(*mem, ol); - if (mem.is_allocated_by(get_network().get_engine())) { - _output = (memory_impl::ptr) &mem; + if (mem->is_allocated_by(get_network().get_engine())) { + _output = mem; } else { - mem_lock src((memory_impl::ptr) &mem); - mem_lock dst(_output); + mem_lock src(mem, get_network().get_stream()); + mem_lock dst(_output, get_network().get_stream()); std::copy(src.begin(), src.end(), dst.begin()); } diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index 7ba2a515120..716b5fa49eb 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -6,7 +6,6 @@ #include "kernel_selector_params.h" #include "to_string_utils.h" -#include "gpu/ocl_toolkit.h" #include "program_node.h" #include "program_impl.h" @@ -733,28 +732,27 @@ kernel_selector::activation_function get_kernel_selector_activation_param(activa void set_params(const program_node& node, kernel_selector::params& params) { const auto& program = node.get_program(); - const auto& context = program.get_engine().get_context(); - const auto& device_info = context->get_device_info(); + const auto& device_info = program.get_engine().get_device_info(); - params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups"); - params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short"); - params.engineInfo.bSubGroupCharSupport = context->extension_supported("cl_intel_subgroups_char"); - params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16"); - params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64"); + params.engineInfo.bSubGroupSupport = device_info.supports_subgroups; + params.engineInfo.bSubGroupShortSupport = device_info.supports_subgroups_short; + params.engineInfo.bSubGroupCharSupport = device_info.supports_subgroups_char; + params.engineInfo.bFP16Support = device_info.supports_fp16; + params.engineInfo.bFP64Support = device_info.supports_fp64; params.engineInfo.bIMADSupport = device_info.supports_imad != 0; params.engineInfo.bIMMADSupport = device_info.supports_immad != 0; params.engineInfo.bImageSupport = device_info.supports_image != 0; - params.engineInfo.bOptHintsSupport = device_info.supports_optimization_hints; + params.engineInfo.bOptHintsSupport = false; params.engineInfo.bLocalBlockIOSupport = device_info.supports_local_block_io; params.engineInfo.deviceType = get_device_type(device_info.dev_type); params.engineInfo.maxWorkGroupSize = device_info.max_work_group_size; params.engineInfo.maxLocalMemSize = device_info.max_local_mem_size; params.engineInfo.maxImage2dWidth = device_info.max_image2d_width; params.engineInfo.maxImage2dHeight = device_info.max_image2d_height; - params.engineInfo.computeUnitsCount = device_info.compute_units_count; + params.engineInfo.computeUnitsCount = device_info.execution_units_count; params.engineInfo.maxThreadsPerExecutionUnit = device_info.max_threads_per_execution_unit; params.engineInfo.maxThreadsPerDevice = device_info.max_threads_per_device; - params.engineInfo.deviceCache = context->get_device_cache(); + params.engineInfo.deviceCache = program.get_tuning_cache(); params.engineInfo.driverVersion = device_info.driver_version; auto impl_forcing_bo = program.get_options().get(); @@ -766,9 +764,7 @@ void set_params(const program_node& node, kernel_selector::params& params) { } void set_optional_params(const program_impl& program, kernel_selector::optional_params& params) { - const auto& context = program.get_engine().get_context(); - - params.meaningfulKernelsNames = context->get_configuration().meaningful_kernels_names; + params.meaningfulKernelsNames = false; params.allowStaticInputReordering = program.get_options().get()->enabled() || program.get_options().get()->enabled(); params.allowInputReordering = false; diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index 6540c4f7962..9c26b09cf27 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -6,7 +6,7 @@ #include "topology_impl.h" #include "network_impl.h" #include "primitive_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "data_inst.h" #include "reorder_inst.h" diff --git a/inference-engine/thirdparty/clDNN/src/loop.cpp b/inference-engine/thirdparty/clDNN/src/loop.cpp index da79292fdaa..9399dfdfa6a 100644 --- a/inference-engine/thirdparty/clDNN/src/loop.cpp +++ b/inference-engine/thirdparty/clDNN/src/loop.cpp @@ -5,11 +5,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "loop_inst.h" -#include "error_handler.h" #include "json_object.h" #include "primitive_type_base.h" -#include "api/data.hpp" -#include "api/mutable_data.hpp" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/mutable_data.hpp" #include #include #include @@ -199,18 +198,18 @@ void loop_inst::preprocess_output_memory() { const primitive_id& external_id = output_mapping.external_id; const primitive_id& internal_id = output_mapping.internal_id; if (output_mapping.axis < 0) { - memory_impl::ptr memory = get_external_memory(external_id); - body_network->get_primitive(internal_id)->set_output_memory(*memory); + memory::ptr memory = get_external_memory(external_id); + body_network->get_primitive(internal_id)->set_output_memory(memory); } else { - memory_impl::ptr to_mem = get_external_memory(external_id); + memory::ptr to_mem = get_external_memory(external_id); auto output_prim = body_network->get_primitive(internal_id); layout sliced_layout = output_prim->output_memory().get_layout(); const int64_t max_iteration = node.get_max_iteration(); - std::vector sliced_mems; + std::vector sliced_mems; sliced_mems.reserve(max_iteration); for (int j=0; j < max_iteration; ++j) { - memory_impl::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); + memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); sliced_mems.push_back(sliced_mem); } @@ -219,7 +218,7 @@ void loop_inst::preprocess_output_memory() { const int64_t num_elements_iteration = sliced_layout.count() / num_elements_batch; const int64_t start = output_mapping.start < 0? node.get_max_iteration() - 1: output_mapping.start; concatenated_memory_mapping memory_mapping_info( - output_mapping.axis, to_mem, sliced_mems, + output_mapping.axis, to_mem, sliced_mems, _network.get_stream(), num_elements_iteration, output_mapping.stride, start); memory_mapping_info.concat_data_prim = body_network->get_primitive(internal_id); concatenated_output_mem_mappings.push_back(memory_mapping_info); @@ -241,7 +240,7 @@ void loop_inst::preprocess_input_memory() { CLDNN_ERROR_MESSAGE(id(), "loop primitive_map is incomplete"); } - memory_impl& memory = input_memory(memory_num); + auto memory = input_memory_ptr(memory_num); for (size_t i = 0; i < input_map_ptrs.size(); ++i) { const auto input_map = input_map_ptrs.at(i); bool is_concatenated_input = (input_map->axis >= 0); @@ -249,10 +248,10 @@ void loop_inst::preprocess_input_memory() { layout sliced_layout = body_network->get_primitive(input_map->internal_id)->output_memory().get_layout(); const int64_t max_iteration = node.get_max_iteration(); - std::vector sliced_mems; + std::vector sliced_mems; sliced_mems.reserve(max_iteration); for (int j=0; j < max_iteration; ++j) { - memory_impl::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); + memory::ptr sliced_mem = engine.allocate_memory(sliced_layout, 0); sliced_mems.push_back(sliced_mem); } const int64_t num_elements_batch = concatenated_memory_mapping::get_batch_size( @@ -260,12 +259,12 @@ void loop_inst::preprocess_input_memory() { const int64_t num_elements_iteration = sliced_layout.count() / num_elements_batch; const int64_t start = input_map->start < 0? node.get_max_iteration() - 1: input_map->start; concatenated_memory_mapping concatenated_input_mem_mapping_info( - input_map->axis, (memory_impl::ptr)&memory, sliced_mems, + input_map->axis, memory, sliced_mems, _network.get_stream(), num_elements_iteration, input_map->stride, start); concatenated_input_mem_mapping_info.sliced_data_prim = body_network->get_primitive(input_map->internal_id); iteration_mem.push_back(concatenated_input_mem_mapping_info); } else { - if (memory.get_layout().data_type != body_network->get_primitive(input_map->internal_id)->output_memory().get_layout().data_type) { + if (memory->get_layout().data_type != body_network->get_primitive(input_map->internal_id)->output_memory().get_layout().data_type) { CLDNN_ERROR_MESSAGE(id(), "incompatible datatypes"); } body_network->set_input_data(input_map->internal_id, memory); @@ -285,17 +284,17 @@ void loop_inst::preprocess_backedge_memory() { auto backedged_sliced_output_mems = get_sliced_mem(back_edge.from); const auto backedge_to_prim = body_network->get_primitive(back_edge.to); const auto backedge_from_prim = body_network->get_primitive(back_edge.from); - memory_impl::ptr initial_mem = get_external_memory(input_map->external_id); + memory::ptr initial_mem = get_external_memory(input_map->external_id); if (backedged_sliced_output_mems.empty()) { // backedge output which does not need concatenation // input memory = output memory = loop output memory const auto output_mapping = node.find_io_primitive_maps(back_edge.from, false); - memory_impl::ptr backedge_mem; + memory::ptr backedge_mem; if (output_mapping.empty()) { // from and to primitives in backedge are connected directly if (backedge_to_prim == backedge_from_prim->dependencies().front()) { backedge_memory_mappings.emplace_back( - backedge_from_prim, backedge_to_prim, initial_mem); + backedge_from_prim, backedge_to_prim, initial_mem, body_network->get_stream()); continue; } else { auto output_prim = body_network->get_primitive(back_edge.from); @@ -305,19 +304,19 @@ void loop_inst::preprocess_backedge_memory() { } else { backedge_mem = get_external_memory(output_mapping.front()->external_id); } - body_network->set_input_data(back_edge.to, *backedge_mem); - body_network->set_output_memory(back_edge.from, *backedge_mem); + body_network->set_input_data(back_edge.to, backedge_mem); + body_network->set_output_memory(back_edge.from, backedge_mem); backedge_memory_mappings.emplace_back( - backedge_from_prim, backedge_to_prim, backedge_mem, initial_mem); + backedge_from_prim, backedge_to_prim, backedge_mem, initial_mem, body_network->get_stream()); } else { // backedge output which needs concatenation backedge_memory_mappings.emplace_back( - backedge_from_prim, backedge_to_prim, backedged_sliced_output_mems, initial_mem); + backedge_from_prim, backedge_to_prim, backedged_sliced_output_mems, initial_mem, body_network->get_stream()); } } } -std::vector loop_inst::get_sliced_mem(const primitive_id& internal_id) const { +std::vector loop_inst::get_sliced_mem(const primitive_id& internal_id) const { for (const auto& mem_mapping : concatenated_input_mem_mappings) { if (mem_mapping.sliced_data_prim->id() == internal_id) { return mem_mapping.sliced_mems; @@ -331,20 +330,18 @@ std::vector loop_inst::get_sliced_mem(const primitive_id& inte return {}; // not found } -memory_impl::ptr loop_inst::get_external_memory(const primitive_id& external_id) const { +memory::ptr loop_inst::get_external_memory(const primitive_id& external_id) const { const auto outputPrim = _network.get_primitive(external_id); - memory_impl& memory = outputPrim->output_memory(); - return (memory_impl::ptr) &memory; + return outputPrim->output_memory_ptr(); } loop_inst::typed_primitive_inst(network_impl & network, loop_node const & node) : parent(network, node), preproc_memories_done(false), - body_network(node.get_program() - .get_engine() - .allocate_network(*node.get_body_program(), - network.get_stream_id(), - false)) { + body_network(network_impl::allocate_network(network.get_stream_ptr(), + node.get_body_program(), + false, + network.is_primary_stream())) { if (!check_if_axis_is_set_properly(node)) CLDNN_ERROR_MESSAGE(node.id(), "axis is not set properly"); diff --git a/inference-engine/thirdparty/clDNN/src/lrn.cpp b/inference-engine/thirdparty/clDNN/src/lrn.cpp index c1f0c1b3e1b..2f84b0fad13 100644 --- a/inference-engine/thirdparty/clDNN/src/lrn.cpp +++ b/inference-engine/thirdparty/clDNN/src/lrn.cpp @@ -4,7 +4,7 @@ #include "lrn_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/lstm.cpp b/inference-engine/thirdparty/clDNN/src/lstm.cpp index 5c0d4c25ee3..c04b686e297 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "lstm_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp index 2e60fff1285..4d26546e995 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "lstm_dynamic_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp index 72e09670072..36b346c2f19 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_input.cpp @@ -6,7 +6,7 @@ #include "lstm_dynamic_input_inst.h" #include "lstm_dynamic_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp index 6db06934be1..44db5f6ff28 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm_dynamic_timeloop.cpp @@ -6,7 +6,7 @@ #include "lstm_dynamic_timeloop_inst.h" #include "lstm_dynamic_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp b/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp index 2c8346accbc..3e66d985f6d 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "lstm_elt_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp b/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp index 4456e29675a..77f2b624529 100644 --- a/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp +++ b/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "lstm_gemm_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp index 3e95bd95eb4..b43b72ca084 100644 --- a/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp +++ b/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp @@ -5,7 +5,7 @@ #include "max_unpooling_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/memory.cpp b/inference-engine/thirdparty/clDNN/src/memory.cpp deleted file mode 100644 index 9cf892500f0..00000000000 --- a/inference-engine/thirdparty/clDNN/src/memory.cpp +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/memory.hpp" -#include "memory_impl.h" -#include "engine_impl.h" - -namespace cldnn { - -memory memory::allocate(const engine& engine, const layout& layout, uint32_t net_id, bool reset) { - size_t size = layout.bytes_count(); - if (size == 0) - throw std::invalid_argument("size should be more than 0"); - - allocation_type type = engine.get()->get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d()); - return memory(engine.get()->allocate_memory(layout, type, net_id, reset).detach()); -} - -memory memory::share_buffer(const engine& engine, const layout& layout, shared_handle buf, uint32_t net_id) { - shared_mem_params params = { shared_mem_type::shared_mem_buffer, nullptr, nullptr, buf, -#ifdef _WIN32 - nullptr, -#else - 0, -#endif - 0 }; - return memory(engine.get()->reinterpret_handle(layout, ¶ms, net_id).detach()); -} - -memory memory::share_image(const engine& engine, const layout& layout, shared_handle img, uint32_t net_id) { - shared_mem_params params = { shared_mem_type::shared_mem_image, nullptr, nullptr, img, -#ifdef _WIN32 - nullptr, -#else - 0, -#endif - 0 }; - return memory(engine.get()->reinterpret_handle(layout, ¶ms, net_id).detach()); -} - -#ifdef _WIN32 -memory memory::share_surface(const engine& engine, const layout& layout, shared_handle surf, uint32_t plane, - uint32_t net_id) { - shared_mem_params params = { shared_mem_type::shared_mem_vasurface, nullptr, nullptr, nullptr, surf, plane }; - return memory(engine.get()->reinterpret_handle(layout, ¶ms, net_id).detach()); -} - -memory memory::share_dx_buffer(const engine& engine, const layout& layout, shared_handle res, uint32_t net_id) { - shared_mem_params params = { shared_mem_type::shared_mem_dxbuffer, nullptr, nullptr, res, nullptr, 0 }; - return memory(engine.get()->reinterpret_handle(layout, ¶ms, net_id).detach()); -} -#else -memory memory::share_surface(const engine& engine, const layout& layout, shared_surface surf, uint32_t plane, - uint32_t net_id) { - shared_mem_params params = { shared_mem_type::shared_mem_vasurface, nullptr, nullptr, nullptr, surf, plane }; - return memory(engine.get()->reinterpret_handle(layout, ¶ms, net_id).detach()); -} -#endif - -size_t memory::count() const { - if (_impl) - return get_layout().count(); - else - return 0; -} - -size_t memory::size() const { - if (_impl) - return _impl->size(); - else - return 0; -} - -const layout& memory::get_layout() const { - if (_impl) - return _impl->get_layout(); - else - throw std::runtime_error("empty memory object"); -} - -int memory::get_net_id() const { - if (_impl) - return _impl->get_net_id(); - else - throw std::runtime_error("empty memory object"); -} - -bool memory::is_allocated_by(const engine& engine) const { - if (_impl) - return _impl->is_allocated_by(*engine.get()); - else - return false; -} - -bool memory::is_the_same_buffer(const memory& other) const { - if (_impl == nullptr) - return false; - - if (_impl == other.get()) - return true; - - if (_impl->get_engine() != other.get()->get_engine()) - return false; - - // User memory, check te pointers - if (!_impl->get_engine()) - return lock_impl() == other.lock_impl(); - - // Engine memory, let it decide - return _impl->get_engine()->is_the_same_buffer(*_impl, *other.get()); -} - -shared_mem_params memory::get_internal_params() const { - if (_impl) - return _impl->get_internal_params(); - else - throw std::runtime_error("empty memory object"); -} - -memory memory::attach_impl(const cldnn::layout& layout, void* ptr, uint32_t net_id) { - return memory(new simple_attached_memory(layout, ptr, net_id)); -} - -void* memory::lock_impl() const { - if (_impl) - return _impl->lock(); - else - return nullptr; -} - -void memory::unlock() const { - if (_impl) _impl->unlock(); -} - -void memory::retain() { - if (_impl) _impl->add_ref(); -} -void memory::release() { - if (_impl) _impl->release(); -} - -void memory::reset() { - release(); - _impl = nullptr; -} - -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/mutable_data.cpp b/inference-engine/thirdparty/clDNN/src/mutable_data.cpp index 85f73d89c8a..7ba0640d9c1 100644 --- a/inference-engine/thirdparty/clDNN/src/mutable_data.cpp +++ b/inference-engine/thirdparty/clDNN/src/mutable_data.cpp @@ -5,9 +5,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "mutable_data_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" +#include "cldnn/runtime/memory.hpp" #include -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include @@ -20,19 +20,17 @@ primitive_type_id mutable_data::type_id() { } namespace { -memory_impl::ptr attach_or_copy_data(network_impl& network, memory_impl& mem) { +memory::ptr attach_or_copy_data(network_impl& network, memory::ptr mem, bool reuse) { auto& engine = network.get_engine(); - auto own_id = network.get_id(); + auto& stream = network.get_stream(); - if (mem.is_allocated_by(engine) && - (own_id == mem.get_net_id() || network.is_primary_stream())) { - mem.set_net(own_id); - return (memory_impl::ptr) & mem; + if (mem->is_allocated_by(engine) && reuse) { + return mem; } - memory_impl::ptr result = engine.allocate_memory(mem.get_layout(), network.get_id(), false); - mem_lock src(mem); - mem_lock dst(result); + memory::ptr result = engine.allocate_memory(mem->get_layout(), false); + mem_lock src(mem, stream); + mem_lock dst(result, stream); std::copy(src.begin(), src.end(), dst.begin()); return result; @@ -40,65 +38,16 @@ memory_impl::ptr attach_or_copy_data(network_impl& network, memory_impl& mem) { } // namespace mutable_data_node::typed_program_node(const std::shared_ptr dprim, program_impl& prog) - : parent(dprim, prog), mem(dprim->mem.get()) { + : parent(dprim, prog), mem(dprim->mem) { recalc_output_layout(false); can_share_buffer(false); - fill_memory(); } -void mutable_data_node::attach_memory(memory_impl& new_mem, bool invalidate_users_if_changed) { - mem = (memory_impl::ptr) &new_mem; +void mutable_data_node::attach_memory(memory::ptr new_mem, bool invalidate_users_if_changed) { + mem = new_mem; recalc_output_layout(invalidate_users_if_changed); } -void mutable_data_node::fill_memory() { - auto prim = get_primitive(); - - if (prim->fill_type == mutable_data::filler_type::no_fill) - return; - - auto memory = mem.get(); - auto layout = memory->get_layout(); - if (layout.data_type != data_types::f32) - CLDNN_ERROR_MESSAGE(id(), "only f32 data types can be filled"); - - switch (prim->fill_type) { - case mutable_data::filler_type::zero: - fill_memory_constant(0.f); - break; - case mutable_data::filler_type::one: - fill_memory_constant(1.f); - break; - case mutable_data::filler_type::xavier: - fill_memory_xavier(); - break; - default: - break; - } -} - -void mutable_data_node::fill_memory_xavier() { - auto memory = mem.get(); - auto layout = memory->get_layout(); - auto n = layout.count() / layout.size.batch[0]; - float scale = static_cast(sqrt(3.0f / static_cast(n))); - std::default_random_engine generator(0); - - mem_lock lock(mem); - auto out_ptr = lock.begin(); - std::uniform_real_distribution distribution(-scale, scale); - for (uint32_t i = 0; i < (uint32_t)layout.count(); i++) out_ptr[i] = distribution(generator); -} - -void mutable_data_node::fill_memory_constant(float value) { - auto memory = mem.get(); - auto layout = memory->get_layout(); - mem_lock lock(mem); - auto out_ptr = lock.begin(); - - for (uint32_t i = 0; i < (uint32_t)layout.count(); i++) out_ptr[i] = value; -} - std::string mutable_data_inst::to_string(mutable_data_node const& node) { auto node_info = node.desc_to_json(); @@ -109,6 +58,6 @@ std::string mutable_data_inst::to_string(mutable_data_node const& node) { } mutable_data_inst::typed_primitive_inst(network_impl& network, mutable_data_node const& node) - : parent(network, node, *attach_or_copy_data(network, node.get_attached_memory())) {} + : parent(network, node, attach_or_copy_data(network, node.get_attached_memory_ptr(), network.is_primary_stream())) {} } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp index 2ce1c58a1ff..14cfd2791a5 100644 --- a/inference-engine/thirdparty/clDNN/src/network.cpp +++ b/inference-engine/thirdparty/clDNN/src/network.cpp @@ -3,26 +3,28 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "network_impl.h" -#include "engine_impl.h" -#include "event_impl.h" -#include "program_impl.h" -#include "api/data.hpp" -#include "api/mutable_data.hpp" -#include "api/input_layout.hpp" -#include -#include "cldnn_itt.h" -#include "error_handler.h" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/input_layout.hpp" + +#include "cldnn/runtime/error_handler.hpp" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/event.hpp" +#include "cldnn/runtime/stream.hpp" + +#include "network_impl.h" +#include "program_impl.h" +#include "to_string_utils.h" #include "primitive_inst.h" #include "input_layout_inst.h" #include "mutable_data_inst.h" #include "condition_inst.h" #include "kernel_selector_helper.h" -#include "gpu/memory_gpu.h" -#include +#include "runtime/cldnn_itt.hpp" -#include "gpu/ocl_toolkit.h" +#include #include #include #include @@ -44,24 +46,27 @@ namespace cldnn { network::network(program const& program, uint16_t stream_id) - : _impl(program.get()->get_engine().allocate_network(*program.get(), stream_id).detach()) {} + : _impl(network_impl::allocate_network(program.get()->get_engine(), program.get(), false, stream_id == 0)) {} -engine network::get_engine() const { - auto impl = engine_impl::ptr(&_impl->get_engine()); - return engine(impl.detach()); +engine& network::get_engine() const { + return _impl->get_engine(); } program network::get_program() const { - auto impl = program_impl::cptr(&_impl->get_program()); - return program(const_cast(impl.detach())); + auto impl = std::const_pointer_cast(_impl->get_program()); + return program(impl); } -void network::set_input_data(const primitive_id& id, const memory& mem) const { - _impl->set_input_data(id, *mem.get()); +void network::set_input_data(const primitive_id& id, memory::ptr mem) const { + _impl->set_input_data(id, mem); } -void network::set_output_memory(const primitive_id& id, const memory& mem) const { - _impl->set_output_memory(id, *mem.get()); +void network::set_output_memory(const primitive_id& id, memory::ptr mem) const { + _impl->set_output_memory(id, mem); +} + +stream& network::get_stream() const { + return _impl->get_stream(); } uint32_t network::get_id() { @@ -100,28 +105,18 @@ std::vector network::get_output_ids() const { return _impl->get_output_ids(); } -memory network::get_output_memory(const primitive_id& output_id) const { - auto out_mem = memory_impl::ptr(&_impl->get_primitive(output_id)->output_memory()); - return memory(out_mem.detach()); +memory::ptr network::get_output_memory(const primitive_id& output_id) const { + return _impl->get_primitive(output_id)->output_memory_ptr(); } -event network::get_primitive_event(const primitive_id& output_id) const { - auto out_event = _impl->get_primitive_event(output_id); - return event(out_event.detach()); +event::ptr network::get_primitive_event(const primitive_id& output_id) const { + return _impl->get_primitive_event(output_id); } -std::map network::execute(const std::vector& dependencies) const { - std::vector> dep_impls(dependencies.size()); +std::map network::execute(const std::vector& dependencies) const { + std::vector dep_impls(dependencies.size()); - std::transform( - dependencies.begin(), - dependencies.end(), - dep_impls.begin(), - [](const event& ev) { - return event_impl::ptr(ev.get()); - }); - - _impl->execute(dep_impls); + _impl->execute(dependencies); auto output_ids = get_output_ids(); std::map result; @@ -131,14 +126,6 @@ std::map network::execute(const std::vector return result; } -void network::retain() { - _impl->add_ref(); -} - -void network::release() { - _impl->release(); -} - #ifdef DEBUG_DUMP_PATH static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) { #if defined HALF_HALF_HPP @@ -189,13 +176,14 @@ float convert_element(float f) { return f; } float convert_element(half_t h) { return convert_half_to_float(h); } template -static void dump(memory_impl& mem, std::ofstream& file_stream) { - auto&& size = mem.get_layout().size; +static void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) { + auto&& size = mem->get_layout().size; file_stream << "shape: " << size.to_string() << " "; - file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem.get_layout().format) << ")" << std::endl; + file_stream << "(count: " << size.count() << ", original format: " << cldnn::fmt_to_str(mem->get_layout().format) << ")" << std::endl; - auto mem_ptr = static_cast(mem.lock()); + mem_lock lock(mem, stream); + auto mem_ptr = lock.data(); for (cldnn::tensor::value_type g = 0; g < size.group[0]; ++g) { for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) { @@ -205,7 +193,7 @@ static void dump(memory_impl& mem, std::ofstream& file_stream) { for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) { for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) { cldnn::tensor t(cldnn::group(g), cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, w)); - size_t input_it = mem.get_layout().get_linear_offset(t); + size_t input_it = mem->get_layout().get_linear_offset(t); file_stream << std::fixed << std::setprecision(6) << convert_element(mem_ptr[input_it]) << std::endl; } } @@ -214,12 +202,10 @@ static void dump(memory_impl& mem, std::ofstream& file_stream) { } } } - - mem.unlock(); } template <> -void dump(memory_impl& mem, std::ofstream& file_stream) { - auto&& size = mem.get_layout().size; +void dump(memory::ptr mem, stream& stream, std::ofstream& file_stream) { + auto&& size = mem->get_layout().size; file_stream << "shape: "; file_stream << size.batch[0] << " "; @@ -228,7 +214,8 @@ void dump(memory_impl& mem, std::ofstream& file_stream) { file_stream << size.spatial[0] << " "; file_stream << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - auto mem_ptr = static_cast(mem.lock()); + mem_lock lock(mem, stream); + auto mem_ptr = lock.data(); for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) { for (cldnn::tensor::value_type f = 0; f < (cldnn::tensor::value_type)ceil_div(size.feature[0], 32); ++f) { @@ -236,87 +223,95 @@ void dump(memory_impl& mem, std::ofstream& file_stream) { for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) { for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) { cldnn::tensor t(cldnn::batch(b), cldnn::feature(f), cldnn::spatial(x, y, z, 0)); - size_t input_it = mem.get_layout().get_linear_offset(t); + size_t input_it = mem->get_layout().get_linear_offset(t); file_stream << mem_ptr[input_it] << std::endl; } } } } } - - mem.unlock(); } -static void log_memory_to_file(memory_impl& mem, std::string layerName) { +static void log_memory_to_file(memory::ptr mem, stream& stream, std::string layerName) { std::string filename = layerName; std::replace(filename.begin(), filename.end(), '\\', '_'); std::replace(filename.begin(), filename.end(), '/', '_'); std::replace(filename.begin(), filename.end(), ' ', '_'); std::replace(filename.begin(), filename.end(), ':', '_'); - filename = DEBUG_DUMP_PATH + filename + ".txt"; + filename = DEBUG_DUMP_PATH + filename + ".txt"; std::ofstream file_stream(filename); - if (mem.get_layout().data_type == cldnn::data_types::f32) - dump(mem, file_stream); - else if (mem.get_layout().data_type == cldnn::data_types::f16) - dump(mem, file_stream); - else if (mem.get_layout().data_type == cldnn::data_types::bin) - dump(mem, file_stream); - else if (mem.get_layout().data_type == cldnn::data_types::i32) - dump(mem, file_stream); - else if (mem.get_layout().data_type == cldnn::data_types::i8) - dump(mem, file_stream); - else if (mem.get_layout().data_type == cldnn::data_types::u8) - dump(mem, file_stream); + auto mem_dt = mem->get_layout().data_type; + if (mem_dt == cldnn::data_types::f32) + dump(mem, stream, file_stream); + else if (mem_dt == cldnn::data_types::f16) + dump(mem, stream, file_stream); + else if (mem_dt == cldnn::data_types::bin) + dump(mem, stream, file_stream); + else if (mem_dt == cldnn::data_types::i32) + dump(mem, stream, file_stream); + else if (mem_dt == cldnn::data_types::i8) + dump(mem, stream, file_stream); + else if (mem_dt == cldnn::data_types::u8) + dump(mem, stream, file_stream); } #endif /* Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants opt pass). */ -network_impl::network_impl(const program_impl& program, uint16_t stream_id, bool is_internal) - : _program(&program), _stream_id(stream_id), _internal(is_internal), _reset_arguments(true) { +network_impl::network_impl(program_impl::ptr program, stream::ptr stream, bool is_internal, bool is_primary_stream) + : _program(program), _stream(stream), _internal(is_internal), _is_primary_stream(is_primary_stream), _reset_arguments(true) { static std::atomic id_gen{0}; if (!_internal) { net_id = ++id_gen; } - if (net_id) { - get_engine().get_context()->add_network(net_id); - } allocate_primitives(); check_names(); build_insts_deps(); build_exec_order(); validate_primitives(); - _program->dump_memory_pool(); } network_impl::~network_impl() { - for (auto const& prim : _exec_order) { - prim->cleanup(); - } - - auto toolkit = get_engine().get_context(); get_engine().get_memory_pool().clear_pool_for_network(net_id); - toolkit->release_pending_memory(net_id); - if (net_id) { - toolkit->remove_network(net_id); - } } -network_impl::network_impl(engine_impl& engine, +network_impl::ptr network_impl::allocate_network(stream::ptr stream, program_impl::ptr program, bool is_internal, bool is_primary_stream) { + return std::make_shared(program, stream, is_internal, is_primary_stream); +} + +network_impl::ptr network_impl::allocate_network(engine& engine, program_impl::ptr program, bool is_internal, bool is_primary_stream) { + auto stream = engine.create_stream(); + return std::make_shared(program, stream, is_internal, is_primary_stream); +} + +network_impl::ptr network_impl::build_network(engine& engine, + const topology_impl& topology, + const build_options& options, + bool is_internal) { + return std::make_shared(engine, topology, options, is_internal); +} + +network_impl::ptr network_impl::build_network(engine& engine, + const std::set>& nodes, + const build_options& options, + bool is_internal) { + return std::make_shared(engine, nodes, options, is_internal); +} + +network_impl::network_impl(engine& engine, const topology_impl& topo, const build_options& options, - uint16_t stream_id, bool is_internal) - : network_impl(*engine.build_program(topo, options, is_internal), stream_id, is_internal) {} + : network_impl(program_impl::build_program(engine, topo, options, is_internal), engine.create_stream(), is_internal) {} -network_impl::network_impl(engine_impl& engine, +network_impl::network_impl(engine& engine, const std::set>& nodes, const build_options& options, bool is_internal) - : network_impl(*engine.build_program(nodes, options, is_internal), 0, is_internal) {} + : network_impl(program_impl::build_program(engine, nodes, options, is_internal), engine.create_stream(), is_internal) {} void network_impl::validate_primitives() { for (auto const& prim : _exec_order) { @@ -337,7 +332,7 @@ void network_impl::set_arguments() { void network_impl::reset_execution(bool wait) { if (wait && _events.size() > 0) { - std::vector events; + std::vector events; for (auto& pair : _events) { auto& ev = pair.second; if (ev->is_set()) @@ -346,12 +341,12 @@ void network_impl::reset_execution(bool wait) { events.push_back(ev); } - get_engine().wait_for_events(events); + get_stream().wait_for_events(events); } _events.clear(); } -void network_impl::set_input_data(const primitive_id& id, memory_impl& data) { +void network_impl::set_input_data(const primitive_id& id, memory::ptr data) { std::shared_ptr primitive_inst; primitive_inst = find_primitive(id); @@ -370,7 +365,7 @@ void network_impl::set_input_data(const primitive_id& id, memory_impl& data) { input->set_data(data); } -void network_impl::set_output_memory(const primitive_id& id, memory_impl& mem) { +void network_impl::set_output_memory(const primitive_id& id, memory::ptr mem) { std::shared_ptr primitive_inst; primitive_inst = find_primitive(id); @@ -426,16 +421,6 @@ void network_impl::set_learning_rate(const float lr) { _learning_rate = lr; } float network_impl::get_learning_rate() { return _learning_rate; } -bool network_impl::is_primary_stream() { - auto _nstreams = get_engine().configuration().n_streams; - return _nstreams == 1 || (_nstreams > 1 && _stream_id > 0); -} - -bool network_impl::is_secondary_stream() { - auto _nstreams = get_engine().configuration().n_streams; - return _nstreams > 1 && _stream_id > 0; -} - std::string network_impl::get_primitive_info(const primitive_id& id) const { const auto& node = _program->get_node(id); return node.type()->to_string(node); @@ -452,14 +437,6 @@ void network_impl::allocate_primitives() { return (lhs->get_output_layout().bytes_count() > rhs->get_output_layout().bytes_count()); }); - std::vector> mutable_data_nodes; - for (auto const& node : nodes_to_allocate) { - if (node->is_type()) - mutable_data_nodes.push_back(node); - } - - allocate_mutable_data_for_streams(mutable_data_nodes); - for (auto const& node : nodes_to_allocate) { allocate_primitive_instance(*node); } @@ -483,30 +460,21 @@ void network_impl::add_to_exec_order(const primitive_id& id) { _exec_order.push_back(inst); } -void network_impl::execute(const std::vector>& events) { +void network_impl::execute(const std::vector& events) { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "NetworkImpl::Execute"); // Wait for previous execution completion reset_execution(false); - // collect all shared media surfaces and enqueue acquire/relese - auto check_and_add_to_return_vec = [](std::shared_ptr prim, std::vector& return_vec) { - const auto& mem = prim->output_memory().get_internal_params(); - if (mem.mem_type == shared_mem_type::shared_mem_vasurface || - mem.mem_type == shared_mem_type::shared_mem_dxbuffer) { - return_vec.push_back(static_cast(mem.mem)); - } - }; - std::vector surfaces; - + std::vector in_out_mem; for (auto& inst : _inputs) { - check_and_add_to_return_vec(inst, surfaces); + in_out_mem.push_back(inst->output_memory_ptr()); } for (auto& inst : _outputs) { - check_and_add_to_return_vec(inst, surfaces); + in_out_mem.push_back(inst->output_memory_ptr()); } - cl_int err; - cl::SharedSurfLock lock(get_engine().get_context()->queue(get_id()).get(), surfaces, &err); + + auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream()); set_arguments(); @@ -523,7 +491,7 @@ void network_impl::execute(const std::vector>& ev #endif std::cerr << "Dump " << layer_name << " layer" << std::endl; for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) { - log_memory_to_file(get_primitive(inst->id())->dep_memory(i), + log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i), get_stream(), layer_name + "_src_" + std::to_string(i)); } #if DUMP_SINGLE_LAYER @@ -537,15 +505,16 @@ void network_impl::execute(const std::vector>& ev inst->set_arguments(); } execute_primitive(inst, events); + #ifdef DEBUG_DUMP_PATH + get_stream().finish(); #if DUMP_SINGLE_LAYER if (layer_name == DUMP_LAYER_NAME) #endif { - log_memory_to_file(get_primitive(inst->id())->output_memory(), layer_name + "_dst_0"); + log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(), get_stream(), layer_name + "_dst_0"); } - get_engine().flush_network(get_id()); #endif } @@ -576,19 +545,19 @@ void network_impl::execute(const std::vector>& ev for (auto& dout : _data_outputs) { // data primitives are not executed so if they are marked as output we need to add // them valid events manually - _events[dout->id()] = get_engine().create_user_event(get_id(), true); + _events[dout->id()] = get_stream().create_user_event(true); } for (auto& prim : _primitives) { prim.second->reset_output_change(); } - get_engine().get_context()->reset_events(get_id()); + get_stream().reset_events(); - // Using output of previouse network as input to another one may cause hazard (in OOOQ mode) if user would not + // Using output of previous network as input to another one may cause hazard (in OOOQ mode) if user would not // provide proper event to execution. Flushing pipeline should prevent this kind of issues. // In scenarios with a big number of very small networks it can provide performance drop. - get_engine().flush_network(get_id()); + get_stream().flush(); } std::vector network_impl::get_input_ids() const { @@ -664,7 +633,7 @@ std::vector> network_impl::get_primitives(const } void network_impl::execute_primitive(const std::shared_ptr& primitive, - const std::vector>& events) { + const std::vector& events) { auto id = primitive->id(); auto it = _events.find(id); bool found = (it != _events.end()); @@ -673,45 +642,10 @@ void network_impl::execute_primitive(const std::shared_ptr& prim found, "Primitive " + id + " is tried to be executed for the second time"); - event_impl::ptr ev; - if (!get_engine().get_context()->enabled_single_kernel() || get_engine().get_context()->single_kernel_name() == id) - ev = primitive->execute(events); - else - ev = get_engine().create_user_event(get_id(), true); + event::ptr ev = primitive->execute(events); _events.insert({id, ev}); } -void network_impl::allocate_mutable_data_for_streams(std::vector>& mutable_data_nodes) { - // When multiple streams are used, mutable_data should be duplicated for each stream. - while (!mutable_data_nodes.empty()) { - auto it = mutable_data_nodes.begin(); - mutable_data_node& node = (*it)->as(); - auto mem = node.get_attached_memory_ptr(); - - if (is_secondary_stream()) { - // Alloc new buffer for this stream and copy data to have valid initial state - memory_impl::ptr result = get_engine().allocate_memory(mem->get_layout(), get_id(), false); - { - mem_lock src(mem); - mem_lock dst(result); - std::copy(src.begin(), src.end(), dst.begin()); - } - - // It's possible that several mutable_data nodes use the same memory buffer, so replace all usages - for (auto it1 = it; it1 != mutable_data_nodes.end();) { - if (get_engine().is_the_same_buffer((*it1)->as().get_attached_memory(), *mem)) { - (*it1)->as().attach_memory(*result, false); - it1 = mutable_data_nodes.erase(it1); - } else { - ++it1; - } - } - } else { - mutable_data_nodes.erase(it); - } - } -} - void network_impl::allocate_primitive_instance(program_node const& node) { if (_primitives.count(node.id())) return; @@ -746,16 +680,16 @@ void network_impl::transfer_memory_to_device(std::shared_ptr ins if (node.need_lockable_memory()) return; + if (!get_engine().supports_allocation(allocation_type::usm_device)) + return; + if (alloc_type == allocation_type::usm_host || alloc_type == allocation_type::usm_shared) { // Allocate and transfer memory auto& mem_pool = inst_mem.get_engine()->get_memory_pool(); - auto device_mem = inst_mem.get_engine()->allocate_memory( - inst_mem.get_layout(), - allocation_type::usm_device, - inst_mem.get_net_id()); - dynamic_cast(*device_mem).copy_from_other(dynamic_cast(inst_mem)); - mem_pool.release_memory(&inst_mem, node.id()); - instance->set_output_memory(*device_mem); + auto device_mem = inst_mem.get_engine()->allocate_memory(inst_mem.get_layout(), allocation_type::usm_device, false); + device_mem->copy_from(get_stream(), inst_mem); + mem_pool.release_memory(&inst_mem, node.id(), get_id()); + instance->set_output_memory(device_mem); } } } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp b/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp index a776234eae6..54fb3046ddb 100644 --- a/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp +++ b/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp @@ -6,7 +6,7 @@ #include "program_impl.h" #include "program_node.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include #include #include diff --git a/inference-engine/thirdparty/clDNN/src/normalize.cpp b/inference-engine/thirdparty/clDNN/src/normalize.cpp index 4bafd2263ee..d342e958b1e 100644 --- a/inference-engine/thirdparty/clDNN/src/normalize.cpp +++ b/inference-engine/thirdparty/clDNN/src/normalize.cpp @@ -4,7 +4,7 @@ #include "normalize_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/one_hot.cpp b/inference-engine/thirdparty/clDNN/src/one_hot.cpp index 3b985254018..d3a02ec58c4 100644 --- a/inference-engine/thirdparty/clDNN/src/one_hot.cpp +++ b/inference-engine/thirdparty/clDNN/src/one_hot.cpp @@ -4,7 +4,7 @@ #include "one_hot_inst.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "primitive_type_base.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/permute.cpp b/inference-engine/thirdparty/clDNN/src/permute.cpp index c820de4e425..3d6671f923a 100644 --- a/inference-engine/thirdparty/clDNN/src/permute.cpp +++ b/inference-engine/thirdparty/clDNN/src/permute.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "permute_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/pooling.cpp b/inference-engine/thirdparty/clDNN/src/pooling.cpp index 8376b7404f1..73801ffa2b9 100644 --- a/inference-engine/thirdparty/clDNN/src/pooling.cpp +++ b/inference-engine/thirdparty/clDNN/src/pooling.cpp @@ -5,7 +5,7 @@ #include "pooling_inst.h" #include "primitive_type_base.h" #include "sliding_window_utils.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp index 43ec27e3e1e..9e1ad0902ce 100644 --- a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp +++ b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp @@ -2,21 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// #include "primitive_inst.h" #include "data_inst.h" #include "mutable_data_inst.h" #include "generic_layer_inst.h" #include "input_layout_inst.h" -#include "max_unpooling_inst.h" #include "arg_max_min_inst.h" #include "fused_conv_eltwise_inst.h" #include "network_impl.h" -#include "engine_impl.h" -#include "memory_impl.h" +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/memory.hpp" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include #include @@ -27,7 +25,7 @@ namespace cldnn { uint32_t primitive_inst::get_network_id() const { return _network.get_id(); } -void primitive_inst::check_memory_to_set(const memory_impl& mem, const layout& layout) const { +void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout) const { CLDNN_ERROR_LAYOUT_MISMATCH("network layout", "set memory layout", mem.get_layout(), @@ -60,15 +58,15 @@ void primitive_inst::check_memory_to_set(const memory_impl& mem, const layout& l } } -void primitive_inst::set_output_memory(memory_impl& mem) { +void primitive_inst::set_output_memory(memory::ptr mem) { auto ol = _node.get_output_layout(); - check_memory_to_set(mem, ol); + check_memory_to_set(*mem, ol); - _output = (memory_impl::ptr) &mem; + _output = mem; } -event_impl::ptr primitive_inst::execute(const std::vector& events) { +event::ptr primitive_inst::execute(const std::vector& events) { const auto primitive_id = id(); CLDNN_ERROR_BOOL(primitive_id, "Invalid/unset input", @@ -79,12 +77,12 @@ event_impl::ptr primitive_inst::execute(const std::vector& even if (_exec_deps.empty()) return _impl->execute(events, *this); - std::vector dependencies; + std::vector dependencies; dependencies.reserve(_exec_deps.size()); for (auto& input : _exec_deps) { auto id = input->id(); try { - // if the requested event deos not exits it means that it has not been executed, so the processing_order is + // if the requested event does not exits it means that it has not been executed, so the processing_order is // wrong or synchronization failed. auto ev = get_network().get_primitive_event(id); dependencies.emplace_back(ev); @@ -107,10 +105,6 @@ void primitive_inst::set_arguments() { _impl->set_arguments(*this); } -void primitive_inst::cleanup() { - _impl->cleanup(*this); -} - void primitive_inst::build_deps() { if (_deps.empty() && !_node.get_dependencies().empty()) { _deps = _network.get_primitives(_node.get_dependencies()); @@ -119,7 +113,7 @@ void primitive_inst::build_deps() { } primitive_inst::primitive_inst(network_impl& network, program_node const& node, bool allocate_memory) - : _network(network), _node(node), _impl(node.get_selected_impl()), _output(), _output_changed(false) { + : _network(network), _node(node), _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr), _output(), _output_changed(false) { if (allocate_memory) { // In case when output is mutable_data primitive, and other users dependencies are only used for // suychronization, The output memory of such primitive will be fused with mutable_data @@ -153,7 +147,7 @@ primitive_inst::primitive_inst(network_impl& network, program_node const& node, } } -memory_impl::ptr primitive_inst::allocate_output() { +memory::ptr primitive_inst::allocate_output() { auto layout = _node.get_output_layout(); auto net_id = get_network_id(); auto& engine = get_network().get_engine(); @@ -163,33 +157,35 @@ memory_impl::ptr primitive_inst::allocate_output() { auto use_lockable_memory = _node.is_output() || _node.get_selected_impl()->is_cpu() || std::any_of(_node.get_users().begin(), _node.get_users().end(), [](const program_node* n) {return n->get_selected_impl()->is_cpu() || n->can_be_optimized(); }) - || engine.supports_allocation(allocation_type::usm_device) == false; + || !engine.supports_allocation(allocation_type::usm_device); allocation_type alloc_type = use_lockable_memory ? engine.get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d()) : allocation_type::usm_device; + if (!_network.is_internal() && (_node.can_be_optimized() || _node.is_type())) { - return engine.allocate_memory(layout, - _node.id(), - net_id, - _node.get_memory_dependencies(), - alloc_type, - false); + return engine.get_memory_from_pool(layout, + _node.id(), + net_id, + _node.get_memory_dependencies(), + alloc_type, + false); } else if (_network.is_internal() && _node.is_output() && _node.is_type() && engine.supports_allocation(allocation_type::usm_device)) { - return engine.allocate_memory(layout, allocation_type::usm_device, net_id, false); + return engine.allocate_memory(layout, allocation_type::usm_device, false); } else if (_network.is_internal() && !_node.is_output() && _node.is_type()) { // Skip memory reset for input_layout primitives, since data will be copied from cldnn::data primitive // or just reuse primitive's memory - return engine.allocate_memory(layout, alloc_type, net_id, false); + return engine.allocate_memory(layout, alloc_type, false); } else if (_network.is_internal() || (!_node.can_share_buffer()) || _node.can_be_optimized() || _node.is_output()) { - return engine.allocate_memory(layout, alloc_type, net_id); + return engine.allocate_memory(layout, alloc_type); + } else { + return engine.get_memory_from_pool(layout, + _node.id(), + net_id, + _node.get_memory_dependencies(), + alloc_type, + true); } - return engine.allocate_memory(layout, - _node.id(), - net_id, - _node.get_memory_dependencies(), - alloc_type, - true); } std::vector> primitive_inst::build_exec_deps( diff --git a/inference-engine/thirdparty/clDNN/src/prior_box.cpp b/inference-engine/thirdparty/clDNN/src/prior_box.cpp index a85691bb9c6..8d536ac6561 100644 --- a/inference-engine/thirdparty/clDNN/src/prior_box.cpp +++ b/inference-engine/thirdparty/clDNN/src/prior_box.cpp @@ -4,7 +4,7 @@ #include "prior_box_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include @@ -21,7 +21,7 @@ primitive_type_id prior_box::type_id() { namespace { template -void calculate_prior_box_output(memory_impl& output_mem, layout const& input_layout, prior_box& argument) { +void calculate_prior_box_output(memory::ptr output_mem, stream& stream, layout const& input_layout, prior_box& argument) { // Calculate output. // All the inputs for this layer are known at this point, // so the output buffer is written here and not in execute(). @@ -39,10 +39,10 @@ void calculate_prior_box_output(memory_impl& output_mem, layout const& input_lay const float offset = argument.offset; int num_priors = argument.is_clustered() ? static_cast(argument.widths.size()) : - output_mem.get_layout().size.spatial[1] / 4 / layer_width / layer_height; + output_mem->get_layout().size.spatial[1] / 4 / layer_width / layer_height; int var_size = static_cast(argument.variance.size()); - mem_lock lock{output_mem}; + mem_lock lock{output_mem, stream}; auto out_ptr = lock.begin(); int dim = layer_height * layer_width * num_priors * 4; @@ -204,7 +204,7 @@ void calculate_prior_box_output(memory_impl& output_mem, layout const& input_lay } // set the variance. - int count = output_mem.get_layout().size.spatial[0] * output_mem.get_layout().size.spatial[1]; + int count = output_mem->get_layout().size.spatial[0] * output_mem->get_layout().size.spatial[1]; int var_loop_count = argument.is_clustered() ? var_size : 4; for (int h = 0; h < layer_height; ++h) { for (int w = 0; w < layer_width; ++w) { @@ -224,7 +224,7 @@ prior_box_node::typed_program_node(std::shared_ptr prim, program_impl } void prior_box_node::calc_result() { - if (result != (memory_impl::ptr) nullptr) + if (result != nullptr) return; auto& argument = *typed_desc(); @@ -340,15 +340,17 @@ void prior_box_node::calc_result() { CLDNN_ERROR_BOOL(id(), "Prior box padding", is_padded(), "Prior-box layer doesn't support output padding."); // allocate storage - result = get_program().get_engine().allocate_memory(get_output_layout(), 0, false); + result = get_program().get_engine().allocate_memory(get_output_layout()); // perform calculations if (get_output_layout().data_type == data_types::f16) - calculate_prior_box_output::type>(*result, + calculate_prior_box_output::type>(result, + get_program().get_stream(), input().get_output_layout(), *typed_desc()); else - calculate_prior_box_output::type>(*result, + calculate_prior_box_output::type>(result, + get_program().get_stream(), input().get_output_layout(), *typed_desc()); } diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index 97eacdf9ae1..ec59a526be0 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -4,10 +4,10 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "kernel_selector_helper.h" -#include "internal_primitive.h" -#include "internal_primitive_type_base.h" +#include "device_cache_reader.h" +#include "auto_tuner.h" #include "layout_optimizer.h" #include "pass_manager.h" #include "primitive_type.h" @@ -53,12 +53,13 @@ #include "reduce_inst.h" #include "region_yolo_inst.h" #include "strided_slice_inst.h" -#include "to_string_utils.h" -#include "gpu/memory_gpu.h" -#include "cldnn_itt.h" #include "loop_inst.h" +#include "to_string_utils.h" +#include "gpu/register_gpu.hpp" +#include "runtime/cldnn_itt.hpp" -#include "gpu/ocl_toolkit.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/engine.hpp" #include "kernel_base.h" @@ -76,25 +77,21 @@ #include #include -program::program(engine const& engine, topology const& topology, build_options const& options) - : _impl(engine.get()->build_program(*topology.get(), options).detach()) {} +program::program(engine& engine, const topology& topology, const build_options& options) + : _impl(program_impl::build_program(engine, *topology.get(), options)) {} -void program::retain() { - _impl->add_ref(); -} - -void program::release() { - _impl->release(); -} - -program_impl::program_impl(engine_impl& engine_ref, +program_impl::program_impl(engine& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal, bool no_optimizations) - : engine(&engine_ref), + : _engine(engine_ref), + _stream(_engine.create_stream()), + program_state(_engine), options(options), - processing_order() { + processing_order(), + tuning_cache(nullptr) { + init_primitives(); kernel_selector::KernelBase::ResetCounter(); set_options(); pm = std::unique_ptr(new pass_manager(*this)); @@ -106,13 +103,16 @@ program_impl::program_impl(engine_impl& engine_ref, } } -program_impl::program_impl(engine_impl& engine_ref, +program_impl::program_impl(engine& engine_ref, std::set> const& nodes, build_options const& options, bool is_internal) - : engine(&engine_ref), + : _engine(engine_ref), + program_state(_engine), options(options), - processing_order() { + processing_order(), + tuning_cache(nullptr) { + init_primitives(); set_options(); pm = std::unique_ptr(new pass_manager(*this)); prepare_nodes(nodes); @@ -120,7 +120,58 @@ program_impl::program_impl(engine_impl& engine_ref, } program_impl::~program_impl() { - engine->get_context()->remove_program(prog_id); +} + +void program_impl::init_primitives() { + static bool is_initialized = false; + if (!is_initialized) { + gpu::register_implementations_gpu(); + is_initialized = true; + } +} + +void program_impl::compile() { + auto& cache = program_state._kernels_cache; + cache.build_all(); +} + +void program_impl::init_kernels() { + for (auto& n : get_processing_order()) { + if (n->get_selected_impl()) + n->get_selected_impl()->init_kernels(); + } +} + +void program_impl::load_tuning_cache() { + OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::LoadTuningCache"); + try { + tuning_cache = kernel_selector::CreateTuningCacheFromFile(get_engine().configuration().tuning_cache_path); + } catch (...) { + tuning_cache = std::make_shared(); + } +} + +kernel_id program_impl::add_kernel(const std::shared_ptr kernelSring) { + return program_state._kernels_cache.set_kernel_source(kernelSring, false); +} + +kernel::ptr program_impl::get_kernel(kernel_id id) { + return program_state._kernels_cache.get_kernel(id); +} + +program_impl::ptr program_impl::build_program(engine& engine, + const topology_impl& topology, + const build_options& options, + bool is_internal, + bool no_optimizations) { + return std::make_shared(engine, topology, options, is_internal, no_optimizations); +} + +program_impl::ptr program_impl::build_program(engine& engine, + const std::set>& nodes, + const build_options& options, + bool is_internal) { + return std::make_shared(engine, nodes, options, is_internal); } program_node& program_impl::get_node(primitive_id const& id) { @@ -248,7 +299,7 @@ void program_impl::prepare_nodes(std::set> const& for (const auto& itr : nodes) { if (itr.get()->is_type()) { get_or_create(std::make_shared(itr.get()->id(), - itr.get()->as().get_primitive()->mem.get_layout())); + itr.get()->as().get_primitive()->mem->get_layout())); } else { get_or_create(itr->desc); } @@ -341,11 +392,9 @@ void program_impl::set_options() { prog_id = ++id_gen; assert(prog_id != 0); - get_engine().get_context()->add_program(prog_id); - if ((options.get()->config.mode == tuning_mode::tuning_tune_and_cache || options.get()->config.mode == tuning_mode::tuning_retune_and_cache) && - !engine->configuration().enable_profiling) { + !_engine.configuration().enable_profiling) { throw std::invalid_argument("Engine must be created with profiling enabled in tune_and_cache mode!"); } @@ -360,13 +409,13 @@ void program_impl::build_program(bool is_internal) { run_graph_compilation(); { post_optimize_graph(is_internal); } prepare_memory_dependencies(); - engine->compile_program(*this); + compile(); + init_kernels(); - if (!is_internal) + if (!is_internal) { prim_info = get_current_stage_info(); - - if (!is_internal) transfer_memory_to_device(); + } cleanup(); } @@ -376,7 +425,7 @@ void program_impl::init_graph() { apply_opt_pass(); for (auto& node : processing_order) { - if (!node->is_type() && !node->is_type()) + if (!node->is_type()) node->get_output_layout(); } @@ -389,6 +438,10 @@ void program_impl::run_graph_compilation() { apply_opt_pass(); } void program_impl::pre_optimize_graph(bool is_internal) { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::PreOptimizeGraph"); + + if (!is_internal) + load_tuning_cache(); + // trim to outputs apply_opt_pass(); // ToDo remove hidden dependencies from trimm pass @@ -401,7 +454,7 @@ void program_impl::pre_optimize_graph(bool is_internal) { bool output_size_handling_enabled = analyze_output_size_handling_need(); for (auto& node : processing_order) { - if (!node->is_type() && !node->is_type()) + if (!node->is_type()) node->get_output_layout(); } @@ -522,6 +575,9 @@ void program_impl::mark_if_data_flow(program_node& node) { void program_impl::transfer_memory_to_device() { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "ProgramImpl::TransferMemory"); + if (!get_engine().supports_allocation(allocation_type::usm_device)) + return; + for (auto& node : processing_order) { if (node->is_type() && !node->need_lockable_memory()) { auto& data_node = node->as(); @@ -535,15 +591,15 @@ void program_impl::transfer_memory_to_device() { throw std::invalid_argument(err_str); } + if (alloc_type == allocation_type::usm_host || alloc_type == allocation_type::usm_shared) { // Allocate and transfer memory - auto device_mem = mem.get_engine()->allocate_memory(data_node_layout, - allocation_type::usm_device, - mem.get_net_id(), - false); - dynamic_cast(*device_mem).copy_from_other(dynamic_cast(mem)); - data_node.attach_memory(*device_mem); - const_cast(data_node.get_primitive()->mem).reset(); + auto device_mem = mem.get_engine()->allocate_memory(data_node_layout, allocation_type::usm_device, false); + device_mem->copy_from(get_stream(), mem); + data_node.attach_memory(device_mem); + const_cast(data_node.get_primitive()->mem).reset(); + // TODO: Do we need finish call here? Maybe call it in network::execute() ? + get_stream().finish(); } } } @@ -551,8 +607,7 @@ void program_impl::transfer_memory_to_device() { void program_impl::cleanup() { for (auto& node : processing_order) - if (!node->is_type()) - node->get_output_layout(); + node->get_output_layout(); // in debug build, at the end, mark all nodes as outputs so user can query for buffers of all not-optimized nodes, // including internal ones etc. @@ -595,7 +650,7 @@ program_impl::nodes_ordering& program_impl::get_processing_order() { return proc const program_impl::nodes_ordering& program_impl::get_processing_order() const { return processing_order; } void program_impl::prepare_memory_dependencies() { - if (!get_engine().configuration().enable_memory_pool) + if (!get_engine().configuration().use_memory_pool) return; apply_opt_pass(); @@ -760,18 +815,12 @@ void program_impl::rename(program_node& node, primitive_id const& new_id) { nodes_map.emplace(new_id, node_ptr); nodes_map.erase(node.id()); - if (!node.is_type()) - const_cast(node.desc->id) = new_id; - else - reinterpret_cast(node).internal_id = new_id; + const_cast(node.desc->id) = new_id; } void program_impl::swap_names(program_node& node1, program_node& node2) { const auto _extract_id = [](program_node& node) -> primitive_id& { - if (!node.is_type()) - return const_cast(node.desc->id); - else - return reinterpret_cast(node).internal_id; + return const_cast(node.desc->id); }; nodes_map.at(node1.id()).swap(nodes_map.at(node2.id())); @@ -1011,24 +1060,6 @@ void program_impl::remove_nodes(std::vector& to_remove) { } } -void program_impl::dump_memory_pool() const { - if (!get_engine().configuration().enable_memory_pool) - return; - auto path = get_dir_path(options); - if (path.empty()) { - return; - } - path += "cldnn_memory_pool.log"; - auto dep = get_memory_dependencies_string(); - get_engine().dump_memory_pool(*this, path, dep); - std::string dump_file_name; - if (pm->get_pass_count() < 10) - dump_file_name += "0"; - dump_file_name += std::to_string(pm->get_pass_count()) + "_memory_pool"; - pm->inc_pass_count(); - dump_program(dump_file_name.c_str(), true); -} - // TODO: break this function into number of smaller ones + add per-primitive fields (possibly use // primitive_inst::to_string?) void program_impl::dump_program(const char* stage, diff --git a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp index 938c888d089..12e1abb4f34 100644 --- a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp @@ -9,8 +9,6 @@ #include "data_inst.h" #include "condition_inst.h" -#include "gpu/ocl_toolkit.h" - #include #include #include @@ -213,7 +211,7 @@ void dump_graph_init(std::ofstream& graph, !node->can_be_optimized()) { graph << "\\n Selected kernel: " << (node->get_selected_impl() == nullptr ? "none" - : node->get_selected_impl().get()->get_kernel_name()) + + : node->get_selected_impl()->get_kernel_name()) + "\n" + dump_mem_info(node); } graph << "\""; @@ -227,9 +225,6 @@ void dump_graph_init(std::ofstream& graph, if (node->is_type() || node->is_constant()) { graph << ", shape=box"; } - if (node->is_type()) { - graph << ", color=blue"; - } if (node->is_reusing_memory()) { graph << ", fillcolor=\"" << colors[node->get_reused_memory_color() % colors.size()] << "\" "; diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp index 27c67c03211..ccd099c2d4a 100644 --- a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp @@ -13,24 +13,25 @@ namespace cldnn { // helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization -void program_helpers::merge_buffers(engine_impl& engine, +void program_helpers::merge_buffers(engine& engine, program_node& node, const layout& target_layout, size_t begin_offset, size_t end_offset) { - memory_impl::ptr data_to_allocate = engine.allocate_memory(target_layout, 0, false); + memory::ptr data_to_allocate = engine.allocate_memory(target_layout, false); + auto& stream = node.get_program().get_stream(); for (size_t i = begin_offset; i < end_offset; i++) { auto& weights = node.get_dependency(i).as(); - mem_lock src{weights.get_attached_memory()}; - mem_lock dst{data_to_allocate}; + mem_lock src{weights.get_attached_memory_ptr(), stream}; + mem_lock dst{data_to_allocate, stream}; std::copy(src.begin(), src.end(), dst.begin() + (i - begin_offset) * src.size()); } for (size_t i = 0; i < end_offset - begin_offset - 1; i++) node.remove_dependency(begin_offset + 1); auto& data_node = node.get_dependency(begin_offset).as(); - data_node.attach_memory(*data_to_allocate, false); + data_node.attach_memory(data_to_allocate, false); } void program_helpers::reshape_deconvolution_weights(const std::vector &deconv_weights, diff --git a/inference-engine/thirdparty/clDNN/src/program_node.cpp b/inference-engine/thirdparty/clDNN/src/program_node.cpp index 63bdc8f58eb..4e24eacf25a 100644 --- a/inference-engine/thirdparty/clDNN/src/program_node.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_node.cpp @@ -71,7 +71,6 @@ std::unique_ptr program_node::desc_to_json() const { node_info->add("ptr", "node_" + std::to_string(reinterpret_cast(this))); node_info->add("id", id()); node_info->add("type", desc->type_string()); - node_info->add("internal", bool_to_str(this->is_type())); node_info->add("valid output layout", bool_to_str(valid_output_layout)); json_composite output_layout_info; @@ -267,6 +266,10 @@ bool program_node::is_padding_supported(int axis, int padding) const { return true; } + void program_node::set_selected_impl(std::unique_ptr impl) { + selected_impl = std::move(impl); +} + bool program_node::need_lockable_memory() const { bool need_lockable_mem = get_users().empty() || std::any_of(get_users().begin(), get_users().end(), [](const program_node* n) { return n->get_selected_impl()->is_cpu(); @@ -274,16 +277,3 @@ bool program_node::need_lockable_memory() const { return need_lockable_mem; } - -primitive_id details::internal_program_node_base::get_next_internal_id() { - static std::atomic counter{0}; - auto idx = counter++; - return primitive_id("_cldnn_internal_") + std::to_string(idx); -} - -details::internal_program_node_base::internal_program_node_base(program_impl& prog) - : program_node(nullptr, prog), internal_id(get_next_internal_id()) {} - -void details::internal_program_node_base::set_implementation(std::unique_ptr&& impl) { - selected_impl = std::move(impl); -} diff --git a/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp b/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp index bc2bbbc095d..37eb2bc1650 100644 --- a/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp +++ b/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "pyramid_roi_align_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/quantize.cpp b/inference-engine/thirdparty/clDNN/src/quantize.cpp index 95f4e5c6be3..6d26b7fdbdb 100644 --- a/inference-engine/thirdparty/clDNN/src/quantize.cpp +++ b/inference-engine/thirdparty/clDNN/src/quantize.cpp @@ -5,8 +5,8 @@ #include "quantize_inst.h" #include "binary_convolution_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "data_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/reduce.cpp b/inference-engine/thirdparty/clDNN/src/reduce.cpp index d1d66553e9c..e4f4634f062 100644 --- a/inference-engine/thirdparty/clDNN/src/reduce.cpp +++ b/inference-engine/thirdparty/clDNN/src/reduce.cpp @@ -5,7 +5,7 @@ #include "reduce_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "data_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/reorder.cpp b/inference-engine/thirdparty/clDNN/src/reorder.cpp index bbe1f897c4e..7300a83e3a4 100644 --- a/inference-engine/thirdparty/clDNN/src/reorder.cpp +++ b/inference-engine/thirdparty/clDNN/src/reorder.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "reorder_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include @@ -235,7 +235,7 @@ void reorder_inst::reuse_input() { if (node.requires_reinterpret()) { _output = _network.get_engine().reinterpret_buffer(input_memory(), node.get_output_layout()); } else { - _output = (memory_impl::ptr) &input_memory(); + _output = input_memory_ptr(); } } diff --git a/inference-engine/thirdparty/clDNN/src/resample.cpp b/inference-engine/thirdparty/clDNN/src/resample.cpp index 882a2361d3e..187dbdfc8e4 100644 --- a/inference-engine/thirdparty/clDNN/src/resample.cpp +++ b/inference-engine/thirdparty/clDNN/src/resample.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "resample_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include #include diff --git a/inference-engine/thirdparty/clDNN/src/reshape.cpp b/inference-engine/thirdparty/clDNN/src/reshape.cpp index ecdbeb289a9..e6fc316cc53 100644 --- a/inference-engine/thirdparty/clDNN/src/reshape.cpp +++ b/inference-engine/thirdparty/clDNN/src/reshape.cpp @@ -5,8 +5,8 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "reshape_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp b/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp index 77f650a8e19..c0c2f1fb51f 100644 --- a/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp +++ b/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp @@ -5,7 +5,7 @@ #include "reverse_sequence_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp b/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp index 4c291ec1329..caa6dca083d 100644 --- a/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp +++ b/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp @@ -4,7 +4,7 @@ #include "roi_pooling_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/scale.cpp b/inference-engine/thirdparty/clDNN/src/scale.cpp index 1e881122bdb..52916cf1699 100644 --- a/inference-engine/thirdparty/clDNN/src/scale.cpp +++ b/inference-engine/thirdparty/clDNN/src/scale.cpp @@ -4,7 +4,7 @@ #include "scale_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp index 11165e1b547..ccfb299221e 100644 --- a/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp +++ b/inference-engine/thirdparty/clDNN/src/scatter_elements_update.cpp @@ -5,7 +5,7 @@ #include "scatter_elements_update_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp index e2935e530f2..435650c28f6 100644 --- a/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp +++ b/inference-engine/thirdparty/clDNN/src/scatter_nd_update.cpp @@ -5,7 +5,7 @@ #include "scatter_nd_update_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/scatter_update.cpp b/inference-engine/thirdparty/clDNN/src/scatter_update.cpp index 2359494f4d9..cba37e0a4cb 100644 --- a/inference-engine/thirdparty/clDNN/src/scatter_update.cpp +++ b/inference-engine/thirdparty/clDNN/src/scatter_update.cpp @@ -5,7 +5,7 @@ #include "scatter_update_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/select.cpp b/inference-engine/thirdparty/clDNN/src/select.cpp index 5494449d384..433e8ba04d2 100644 --- a/inference-engine/thirdparty/clDNN/src/select.cpp +++ b/inference-engine/thirdparty/clDNN/src/select.cpp @@ -5,7 +5,7 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #include "select_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp b/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp index 530202d3fc1..61e5578a8e9 100644 --- a/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp +++ b/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp @@ -5,7 +5,7 @@ #include "shuffle_channels_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp b/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp index 98712851ae3..9de115c3650 100644 --- a/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp +++ b/inference-engine/thirdparty/clDNN/src/space_to_batch.cpp @@ -5,7 +5,7 @@ #include "space_to_batch_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "data_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp b/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp index c3283b32265..3fc1f16c3ef 100644 --- a/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp +++ b/inference-engine/thirdparty/clDNN/src/space_to_depth.cpp @@ -5,7 +5,7 @@ #include "space_to_depth_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/split.cpp b/inference-engine/thirdparty/clDNN/src/split.cpp index 80ee4aac11a..182fe49f0b0 100644 --- a/inference-engine/thirdparty/clDNN/src/split.cpp +++ b/inference-engine/thirdparty/clDNN/src/split.cpp @@ -4,8 +4,8 @@ #include "split_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp index eb409b3572c..8faf64a3626 100644 --- a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp +++ b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp @@ -4,7 +4,7 @@ #include "strided_slice_inst.h" #include "primitive_type_base.h" -#include "error_handler.h" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include "data_inst.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/tile.cpp b/inference-engine/thirdparty/clDNN/src/tile.cpp index c6661655d92..798a835af45 100644 --- a/inference-engine/thirdparty/clDNN/src/tile.cpp +++ b/inference-engine/thirdparty/clDNN/src/tile.cpp @@ -4,8 +4,8 @@ #include "tile_inst.h" #include "primitive_type_base.h" -#include "memory_impl.h" -#include "error_handler.h" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/error_handler.hpp" #include "json_object.h" #include diff --git a/inference-engine/thirdparty/clDNN/src/topology.cpp b/inference-engine/thirdparty/clDNN/src/topology.cpp index 7d0708669fd..4ba2c86a151 100644 --- a/inference-engine/thirdparty/clDNN/src/topology.cpp +++ b/inference-engine/thirdparty/clDNN/src/topology.cpp @@ -3,7 +3,8 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/topology.hpp" + +#include "cldnn/graph/topology.hpp" #include "topology_impl.h" #include #include @@ -33,14 +34,6 @@ void topology::add_primitive(std::shared_ptr desc) { _impl->add(desc); } -void topology::retain() { - _impl->add_ref(); -} - -void topology::release() { - _impl->release(); -} - const std::shared_ptr& topology::at(const primitive_id& id) const { return _impl->at(id); } diff --git a/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt b/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt index f1107dba431..00011e54075 100644 --- a/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt +++ b/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt @@ -65,7 +65,7 @@ if(UNIX) set_source_files_properties(${__CLDNN_Sources__gtest} PROPERTIES COMPILE_FLAGS "-Wno-undef") endif() - + if (MSVC) file(GLOB __CLDNN_Sources__natvis "${CMAKE_CURRENT_SOURCE_DIR}/float16.natvis" @@ -76,7 +76,7 @@ set(__CLDNN_Directory__ks_main "${CLDNN__KERNEL_SELECTOR_DIR}") set(__CLDNN_Directory__ks_core "${CLDNN__KERNEL_SELECTOR_DIR}/core") set(__CLDNN_Directory__ks_common "${CLDNN__KERNEL_SELECTOR_DIR}/common") set(__CLDNN_Directory__ks_core_common "${__CLDNN_Directory__ks_core}/common") -set(__CLDNN_Directory__ks_actual_kernels "${__CLDNN_Directory__ks_core}/actual_kernels") +set(__CLDNN_Directory__ks_actual_kernels "${__CLDNN_Directory__ks_core}/actual_kernels") set(__CLDNN_Directory__ks_cache "${__CLDNN_Directory__ks_core}/cache") set(__CLDNN_AllSources @@ -101,7 +101,9 @@ source_group("${__CLDNN_Label__gtest}" FILES ${__CLDNN_Sources__gtest}) include_directories( "${CLDNN__MAIN_DIR}" "${CLDNN__MAIN_DIR}/src" + "${CLDNN__MAIN_DIR}/src/include/" "${CMAKE_CURRENT_SOURCE_DIR}" + "${CLDNN__RUNTIME_DIR}" "${__CLDNN_Directory__test_utils}" "${__CLDNN_Directory__ks_core}" "${__CLDNN_Directory__ks_core}/common" @@ -118,20 +120,23 @@ include_directories(SYSTEM add_executable("${CLDNN_BUILD__PROJ}" ${__CLDNN_AllSources} ) + +if(COMMAND set_ie_threading_interface_for) + set_ie_threading_interface_for("${CLDNN_BUILD__PROJ}") +endif() + set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}") set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME "${CLDNN_BUILD__PROJ_OUTPUT_NAME}") # Set library dependencies -target_link_libraries("${CLDNN_BUILD__PROJ}" - "${CLDNN_BUILD__PROJ__clDNN}" - ) +target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE "${CLDNN_BUILD__PROJ__clDNN}") if(WIN32) - target_link_libraries("${CLDNN_BUILD__PROJ}" setupapi) + target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE setupapi) elseif((NOT ANDROID) AND (UNIX)) - target_link_libraries("${CLDNN_BUILD__PROJ}" pthread) + target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE pthread) endif() -target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES}) +target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE ${CLDNN__SYSTEM_LINK_LIBRARIES}) # =================================== Custom pre- and post-steps ======================================= diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp index 8dc4d57f426..b318e806d08 100644 --- a/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp @@ -2,11 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "api/engine.hpp" -#include "test_utils/test_utils.h" -#include "api/input_layout.hpp" -#include "api/network.hpp" +#include "test_utils.h" using namespace tests; using namespace cldnn; @@ -35,17 +31,16 @@ TEST(events_pool, DISABLED_basic_test) for (int i = 0; i < 20; i++) { - engine eng;// here we build new engine i times - auto input = memory::allocate(eng, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto eng = engine::create(engine_types::ocl, runtime_types::ocl);// here we build new engine i times + auto input = eng->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; for (int j = 0; j < 20; j++) //then we build network j times { - network network(eng, topology, bo); + network network(*eng, topology, bo); network.set_input_data("input", input); for(int k = 0; k < 20; k++) //and execute that network k times - network.execute(); + network.execute(); } - EXPECT_EQ(eng.get_max_used_device_memory_size(), (uint64_t)80); - eng.~engine(); + EXPECT_EQ(eng->get_max_used_device_memory(), (uint64_t)80); } } diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp deleted file mode 100644 index d7efdc35045..00000000000 --- a/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "api/engine.hpp" -#include "test_utils/test_utils.h" -#include "api/network.hpp" -#include "api/topology.hpp" -#include "api/input_layout.hpp" -#include "api/activation.hpp" -#include "api/cldnn.hpp" - -#include "test_utils.h" - -#if defined __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wmissing-braces" -#elif defined __GNUC__ && __GNUC__ >= 6 -#pragma GCC diagnostic ignored "-Wignored-attributes" -#endif - -#include - -using namespace cldnn; - -class user_gpu_toolkit -{ -public: - user_gpu_toolkit() - { - get_platform_and_device(get_plaftorm()); - create_context_from_one_device(); - } - - cl_context get_gpu_context() const { return _gpu_context; } - -private: - cl_platform_id _platform_id; - cl_device_id _gpu_device; - cl_context _gpu_context; - - void create_context_from_one_device() - { - cl_int error = 0; - _gpu_context = clCreateContext(0, 1, &_gpu_device, 0, 0, &error); - if (error != CL_SUCCESS) - { - throw std::runtime_error("error creating context"); - } - } - - cl_platform_id get_plaftorm() - { - static constexpr auto INTEL_PLATFORM_VENDOR = "Intel(R) Corporation"; - cl_uint n = 0; - cl_int err = clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err)); - } - - // Get platform list - std::vector platform_ids(n); - err = clGetPlatformIDs(n, platform_ids.data(), NULL); - if (err != CL_SUCCESS) { - throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err)); - } - - // find Intel platform - for (auto& id : platform_ids) { - size_t infoSize; - err = clGetPlatformInfo(id, CL_PLATFORM_VENDOR, 0, NULL, &infoSize); - if (err != CL_SUCCESS) { - throw std::runtime_error("clGetPlatformInfo error " + std::to_string(err)); - } - - std::vector tmp(infoSize); - - err = clGetPlatformInfo(id, CL_PLATFORM_VENDOR, infoSize, tmp.data(), NULL); - if (err != CL_SUCCESS) { - throw std::runtime_error("clGetPlatformInfo error " + std::to_string(err)); - } - - std::string vendor_id(tmp.data()); - if (vendor_id == std::string(INTEL_PLATFORM_VENDOR)) - return id; - } - return static_cast(nullptr); - } - - void get_platform_and_device(cl_platform_id platform_id) - { - _platform_id = platform_id; - cl_int err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &_gpu_device, 0); - if (err != CL_SUCCESS) { - throw std::runtime_error("clGetDeviceIDs error " + std::to_string(err)); - } - } -}; - -TEST(gpu_engine, DISABLED_user_context) -{ - user_gpu_toolkit gpu_toolkit; - cl_context user_context = gpu_toolkit.get_gpu_context(); - - device_query query(static_cast(user_context)); - auto devices = query.get_available_devices(); - - //[0] Check if the user engine config works. - auto engine_config = cldnn::engine_configuration(false, false, false, "", "", true, "", "", cldnn::priority_mode_types::disabled, cldnn::throttle_mode_types::disabled, true, 1); - - //[1]Check if the engine creation works. - engine engine(devices.begin()->second, engine_config); - auto info = engine.get_info(); - EXPECT_GT(info.cores_count, 0u); - EXPECT_GT(info.core_frequency, 0u); - - //[2]Now check if the queues works (run simple network). - topology topo; - auto inp_lay = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,2,2 }); - auto input_mem = cldnn::memory::allocate(engine, inp_lay); - tests::set_values(input_mem, { 1.0f, 2.0f, 3.0f, 4.0f }); - auto inp = input_layout("input", inp_lay); - auto activ = activation("this_needs_queue", "input", activation_func::abs); - topo.add(inp, activ); - network net(engine, topo); - - net.set_input_data("input", input_mem); - auto out = net.execute(); - auto out_ptr = out.at("this_needs_queue").get_memory().pointer(); - EXPECT_EQ(out.size(), size_t(1)); - for (uint32_t i = 0; i < 4; i++) - EXPECT_EQ(out_ptr[i], float(i + 1)); -} - -void execute_simple_topology(cldnn::engine& engine) { - auto batch_num = 1; - auto feature_num = 4; - auto x_size = 1; - auto y_size = 1; - auto input_tensor = cldnn::tensor(cldnn::spatial(x_size, y_size), cldnn::feature(feature_num), cldnn::batch(batch_num)); - auto topo = cldnn::topology( - cldnn::input_layout("input", { cldnn::data_types::f32, cldnn::format::bfyx, input_tensor }), - cldnn::activation("relu", "input", cldnn::activation_func::relu)); - - cldnn::network net(engine, topo); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx, input_tensor }); - tests::set_values(input_mem, { -1.f, 2.f, -3.f, 4.f }); - net.set_input_data("input", input_mem); - auto outs = net.execute(); - auto output = outs.at("relu"); - auto out_ptr = output.get_memory().pointer(); - ASSERT_EQ(out_ptr[0], 0.0f); - ASSERT_EQ(out_ptr[1], 2.0f); - ASSERT_EQ(out_ptr[2], 0.0f); - ASSERT_EQ(out_ptr[3], 4.0f); -} - - -TEST(gpu_device_query, get_device_info) -{ - cldnn::device_query query; - auto devices = query.get_available_devices(); - auto device_id = devices.begin()->first; - auto device = devices.begin()->second; - auto device_info = device.get_info(); - - //check key and few members, so we know that device info was returned properly - ASSERT_EQ(device_id, "0"); - ASSERT_GT(device_info.cores_count, 0u); - ASSERT_GT(device_info.core_frequency, 0u); - ASSERT_NE(device_info.dev_name, ""); - ASSERT_NE(device_info.driver_version, ""); -} - - -TEST(gpu_device_query, get_engine_info) -{ - const auto& engine = tests::get_test_engine(); - auto info = engine.get_info(); - EXPECT_GT(info.cores_count, 0u); - EXPECT_GT(info.core_frequency, 0u); -} - - -TEST(gpu_device_query, simple) -{ - cldnn::device_query query; - auto devices = query.get_available_devices(); - auto device = devices.begin()->second; - - cldnn::engine eng(device); - //check if simple execution was finished correctly - execute_simple_topology(eng); -} - -TEST(gpu_device_query, DISABLED_release_query) -{ - cldnn::device_query query; - auto devices = query.get_available_devices(); - auto device = devices.begin()->second; - - //destroy query - query.~device_query(); - //create engine - cldnn::engine eng(device); - //check if simple execution was finished correctly - execute_simple_topology(eng); -} - -TEST(gpu_device_query, DISABLED_release_device) -{ - cldnn::device_query query; - auto devices = query.get_available_devices(); - auto device = devices.begin()->second; - - //destroy query - query.~device_query(); - //create engine - cldnn::engine eng(device); - //destroy device - device.~device(); - //check if simple execution was finished correctly - execute_simple_topology(eng); -} diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp similarity index 67% rename from inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp rename to inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp index 81e46652875..25e6260ea12 100644 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/graph_manipulation_gpu_test.cpp @@ -2,14 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "test_utils.h" -#include +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/memory.hpp" #include "program_impl.h" #include "topology_impl.h" -#include "engine_impl.h" -#include "memory_impl.h" #include "data_inst.h" #include "activation_inst.h" #include "convolution_inst.h" @@ -18,29 +17,30 @@ #include "reshape_inst.h" #include "pass_manager.h" -#include "test_utils.h" #include "program_impl_wrapper.h" +#include + using namespace cldnn; using namespace ::tests; /* Basic test to show how the program can be build and run within internal tests in similar way as it is done in tests utilizing clDNN API */ TEST(basic, test1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; build_opt.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto weights1 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 1, 2 } }); set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) }); set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) }); set_values(weights2, { 1.1f, 0.1f }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights1", weights1)); topology.add(data("weights2", weights2)); topology.add(reshape("reshape1", "weights1", tensor(spatial(1, 2)))); @@ -49,9 +49,9 @@ TEST(basic, test1) { topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x)); topology.add(convolution("conv2", { "reorder2" }, { "concat" })); - program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false); - cldnn::refcounted_obj_ptr net = engine.get()->allocate_network(*prog, 0); - network network = (cldnn::network) net.get(); + program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false); + std::shared_ptr net = network_impl::allocate_network(engine, prog); + network network(net); network.set_input_data("input", input); @@ -60,24 +60,22 @@ TEST(basic, test1) { float epsilon = 1e-2f; for (auto& it : outputs) { - auto output = it.second.get_memory().pointer(); + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); EXPECT_NEAR(7.8f, output[0], epsilon); } } -/* - This test creates a program without optimization passes, even the compilation is being run manualy. - Thus, a single method from program_impl like add_intermediate might be tested separately. -*/ +// This test creates a program without optimization passes, even the compilation is being run manualy. +// Thus, a single method from program_impl like add_intermediate might be tested separately. TEST(add_intermediate_gpu, test1) { build_options build_opt; topology topology; - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 2, 2} }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 2, 2} }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 1, 1 } }); set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f), (2.1f), (2.2f), (2.3f), (2.4f), @@ -89,21 +87,21 @@ TEST(add_intermediate_gpu, test1) (4.5f), (4.6f), (4.7f), (4.8f) }); set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) }); - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights", weights)); topology.add(data("weights2", weights2)); topology.add(cldnn::convolution("conv1a", { "input" }, { "weights" })); topology.add(cldnn::convolution("conv1b", { "input" }, { "weights" })); topology.add(cldnn::convolution("conv2a", { "conv1a" }, { "weights2" })); - auto new_reorder = std::make_shared("reorder","nothing", input.get_layout()); - program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false, true); + auto new_reorder = std::make_shared("reorder","nothing", input->get_layout()); + program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false, true); prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0); prog->dump_program("custom_dump", true); - program_impl_wrapper::run_graph_compilation(*prog); + program_impl_wrapper::build(*prog); - cldnn::refcounted_obj_ptr net = engine.get()->allocate_network(*prog, 0); - network network = (cldnn::network) net.get(); + std::shared_ptr net = network_impl::allocate_network(engine, prog); + network network(net); network.set_input_data("input", input); auto outputs = network.execute(); @@ -116,7 +114,7 @@ TEST(add_intermediate_gpu, test1) uint32_t output_index = 0; for (auto& it : outputs) { - auto output = it.second.get_memory().pointer(); + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); for (uint32_t x = 0; x < output_size; x++) { EXPECT_FLOAT_EQ(expected_output_vec[x+output_size*output_index], output[x]); @@ -126,15 +124,16 @@ TEST(add_intermediate_gpu, test1) } /* This test shows how to use private members (here: add_connection) of program_impl using program_impl_wraper */ +// Disabled for now as it produces wrong results TEST(add_intermediate_gpu, test2) { build_options build_opt; topology topology; - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 1, 1 } }); set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f), (2.1f), (2.2f), (2.3f), (2.4f), @@ -147,7 +146,7 @@ TEST(add_intermediate_gpu, test2) set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) }); - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights2", weights2)); topology.add(cldnn::convolution("conv2a", { "input" }, { "weights2" })); @@ -157,16 +156,16 @@ TEST(add_intermediate_gpu, test2) w_vec.push_back("weights"); auto new_conv = std::make_shared("conv1a", "input", w_vec); auto weights_node = std::make_shared("weights", weights); - program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false, true); + program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false, true); prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true); program_impl_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv)); prog->dump_program("custom_dump", true); - program_impl_wrapper::run_graph_compilation(*prog); + program_impl_wrapper::build(*prog); - cldnn::refcounted_obj_ptr net = engine.get()->allocate_network(*prog, 0); - network network = (cldnn::network) net.get(); + std::shared_ptr net = network_impl::allocate_network(engine, prog); + network network(net); network.set_input_data("input", input); auto outputs = network.execute(); @@ -177,7 +176,7 @@ TEST(add_intermediate_gpu, test2) uint32_t output_size = 4; for (auto& it : outputs) { - auto output = it.second.get_memory().pointer(); + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); for (uint32_t x = 0; x < output_size; x++) { EXPECT_FLOAT_EQ(expected_output_vec[x], output[x]); diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/prepare_conv_eltw_fusing.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp similarity index 78% rename from inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/prepare_conv_eltw_fusing.cpp rename to inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp index 1e6037fd98a..1ca951b244d 100644 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/prepare_conv_eltw_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/prepare_conv_eltw_fusing.cpp @@ -2,20 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "test_utils.h" -#include +#include "cldnn/runtime/engine.hpp" #include "program_impl.h" #include "data_inst.h" -#include "engine_impl.h" #include "eltwise_inst.h" #include "network_impl.h" #include "pass_manager.h" -#include "test_utils.h" #include "program_impl_wrapper.h" +#include + using namespace cldnn; using namespace ::tests; @@ -23,20 +23,19 @@ using namespace ::tests; std::map test_prepare_conv_eltw_fusing(bool eltw1, bool eltw2) { build_options build_opt; - build_opt.set_option(build_option::graph_dumps_dir("dumps")); build_opt.set_option(build_option::optimize_data(true)); - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.1f, 1.2f, 1.3f, 1.4f }); set_values(weights1, { 2.1f}); set_values(weights2, { -1.5f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights1", weights1)); topology.add(data("weights2", weights2)); topology.add(convolution("conv1", { "input" }, { "weights1" })); @@ -67,15 +66,17 @@ std::map test_prepare_conv_eltw_fusing(bool eltw1, { topology.add(eltwise("eltw3", "conv1", "conv2", cldnn::eltwise_mode::sum)); } - program_impl::ptr prog = engine.get()->build_program(*topology.get(), build_opt, false, true); + program_impl::ptr prog = program_impl::build_program(engine, *topology.get(), build_opt, false, true); layout_optimizer lo; program_impl_wrapper::apply_opt_pass(*prog, lo); program_impl_wrapper::run_graph_compilation(*prog); program_impl_wrapper::prepare_memory_dependencies(*prog); - cldnn::refcounted_obj_ptr net = engine.get()->allocate_network(*prog, 0); - network network = (cldnn::network) net.get(); + program_impl_wrapper::compile(*prog); + program_impl_wrapper::init_kernels(*prog); + std::shared_ptr net = network_impl::allocate_network(engine, prog); + network network(net); network.set_input_data("input", input); return network.execute(); @@ -94,7 +95,7 @@ TEST(prepare_conv_eltw_fusing, testlp) float epsilon = 1e-3f; for (auto& it : outputs) { - auto output = it.second.get_memory().pointer(); + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); for (int i = 0; i < 4; i++) EXPECT_NEAR(ref_out[i], output[i], epsilon); } @@ -114,7 +115,7 @@ TEST(prepare_conv_eltw_fusing, testl) float epsilon = 1e-3f; for (auto& it : outputs) { - auto output = it.second.get_memory().pointer(); + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); for (int i = 0; i < 4; i++) EXPECT_NEAR(ref_out[i], output[i], epsilon); } @@ -135,7 +136,7 @@ TEST(prepare_conv_eltw_fusing, testp) float epsilon = 1e-3f; for (auto& it : outputs) { - auto output = it.second.get_memory().pointer(); + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); for (int i = 0; i < 4; i++) EXPECT_NEAR(ref_out[i], output[i], epsilon); } diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/program_impl_wrapper.h b/inference-engine/thirdparty/clDNN/tests/module_tests/program_impl_wrapper.h similarity index 70% rename from inference-engine/thirdparty/clDNN/tests_core_internal/program_impl_wrapper.h rename to inference-engine/thirdparty/clDNN/tests/module_tests/program_impl_wrapper.h index 855f3f050db..03b7bd27970 100644 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/program_impl_wrapper.h +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/program_impl_wrapper.h @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // +#pragma once + namespace cldnn { struct program_node; @@ -24,6 +26,20 @@ namespace cldnn { p.run_graph_compilation(); } + static void compile(program_impl& p) + { + p.compile(); + } + static void build(program_impl& p) + { + program_impl_wrapper::run_graph_compilation(p); + program_impl_wrapper::compile(p); + program_impl_wrapper::init_kernels(p); + } + static void init_kernels(program_impl& p) + { + p.init_kernels(); + } static void prepare_memory_dependencies(program_impl& p) { p.prepare_memory_dependencies(); diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/reorder_inputs_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp similarity index 79% rename from inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/reorder_inputs_test.cpp rename to inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp index fb21acf2396..f67512fc3a4 100644 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/reorder_inputs_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/reorder_inputs_test.cpp @@ -2,21 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "test_utils.h" -#include +#include "cldnn/runtime/engine.hpp" #include "program_impl.h" #include "data_inst.h" -#include "engine_impl.h" #include "eltwise_inst.h" #include "network_impl.h" #include "pass_manager.h" #include "to_string_utils.h" -#include "test_utils.h" #include "program_impl_wrapper.h" +#include + using namespace cldnn; using namespace ::tests; @@ -31,13 +31,13 @@ TEST(reorder_inputs, propagation) { // Format of convolutions should be propagated through pooling. // At most single reorder should be inserted before first convolution. - auto engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb, { 2, 32, 1, 1 } }); - auto weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 32, 32, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb, { 2, 32, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 32, 32, 1, 1 } }); topology topology; topology.add(data("weights", weights)); - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(convolution("conv1", "input", { "weights" })); topology.add(pooling("pool", "conv1", pooling_mode::max, { 1, 1, 1, 1 }, { 1, 1, 1, 1 })); topology.add(convolution("conv2", "pool", { "weights" })); @@ -70,11 +70,11 @@ TEST(reorder_inputs, propagation) { } TEST(reorder_inputs, impl_forcing_basic_format) { - auto engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(pooling("pool", "input", pooling_mode::max, { 1, 1, 2, 1 }, { 1, 1, 2, 1 })); implementation_desc pool_impl = { format::yxfb, "" }; @@ -91,14 +91,14 @@ TEST(reorder_inputs, impl_forcing_basic_format) { network.execute(); auto network_impl = network.get(); - auto& prog = network_impl->get_program(); - auto& pool_node = prog.get_node("pool"); + const auto& prog = network_impl->get_program(); + auto& pool_node = prog->get_node("pool"); auto pool_layout = pool_node.get_output_layout(); EXPECT_EQ(pool_layout.format.value, format::yxfb); auto out_mem = network.get_output("pool").get_memory(); - auto out_mem_ptr = out_mem.pointer(); + cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem_ptr.size(), 4u); @@ -109,11 +109,11 @@ TEST(reorder_inputs, impl_forcing_basic_format) { } TEST(reorder_inputs, impl_forcing_not_existing) { - auto engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(pooling("pool", "input", pooling_mode::max, { 1, 1, 2, 1 }, { 1, 1, 2, 1 })); implementation_desc pool_impl = { format::any, "NOT_EXISTING" }; @@ -125,11 +125,11 @@ TEST(reorder_inputs, impl_forcing_not_existing) { } TEST(reorder_inputs, impl_forcing_basic_format_kernel) { - auto engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("actv", "input", activation_func::relu)); implementation_desc actv_impl = { format::yxfb, "activation_ref" }; @@ -146,8 +146,8 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { network.execute(); auto network_impl = network.get(); - auto& prog = network_impl->get_program(); - auto& node = prog.get_node("actv"); + auto prog = network_impl->get_program(); + auto& node = prog->get_node("actv"); auto actv_layout = node.get_output_layout(); auto kernel_name = node.get_selected_impl()->get_kernel_name(); @@ -155,7 +155,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { EXPECT_EQ(kernel_name, actv_impl.kernel_name); auto out_mem = network.get_output("actv").get_memory(); - auto out_mem_ptr = out_mem.pointer(); + cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); ASSERT_EQ(out_mem_ptr.size(), 8u); @@ -171,13 +171,13 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { // TODO Not yet implemented //TEST(reorder_inputs, impl_forcing_conv_format_kernel) { -// auto engine = get_test_engine(); -// auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 2, 2, 2} }); -// auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 1, 1} }); +// auto& engine = get_test_engine(); +// auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 2, 2, 2} }); +// auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 1, 1} }); // // topology topology; // topology.add(data("weights", weights)); -// topology.add(input_layout("input", input.get_layout())); +// topology.add(input_layout("input", input->get_layout())); // topology.add(convolution("conv", "input", { "weights" })); // topology.add(reorder("output", "conv", format::bfyx, data_types::f32)); // @@ -209,7 +209,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { // EXPECT_EQ(conv_sel_impl->get_kernel_name(), impl.kernel); // // auto out_mem = network.get_output("output").get_memory(); -// auto out_mem_ptr = out_mem.pointer(); +// cldnn::mem_lock out_mem_ptr(out_mem, get_test_stream()); // // EXPECT_EQ(out_mem_ptr.size(), 8); // diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/test_uqr_distribution.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/test_uqr_distribution.cpp index 88ecd67818b..0b85f0b1c1c 100644 --- a/inference-engine/thirdparty/clDNN/tests/module_tests/test_uqr_distribution.cpp +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/test_uqr_distribution.cpp @@ -2,9 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "test_utils/uniform_quantized_real_distribution.hpp" +#include "test_utils.h" #include #include @@ -17,7 +15,7 @@ #include #include -namespace cldnn { namespace tests { +namespace tests { template struct uniform_quantized_real_distribution_test : ::testing::Test @@ -1137,4 +1135,4 @@ TYPED_TEST(uniform_quantized_real_distribution_test, DISABLED_generate_random_eq } } -}} // namespace cldnn { namespace tests { +} // namespace tests diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/usm_memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp similarity index 68% rename from inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/usm_memory_test.cpp rename to inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp index d1b35dc774c..c67b2516fc7 100644 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/usm_memory_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/module_tests/usm_memory_test.cpp @@ -2,14 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "test_utils.h" -#include +#include "cldnn/runtime/engine.hpp" +#include "cldnn/runtime/memory.hpp" +#include "cldnn/runtime/device_query.hpp" +#include "runtime/ocl/ocl_stream.hpp" +#include "runtime/ocl/ocl_memory.hpp" +#include "runtime/ocl/ocl_common.hpp" +#include "runtime/ocl/ocl_base_event.hpp" #include "program_impl.h" #include "topology_impl.h" -#include "engine_impl.h" -#include "memory_impl.h" #include "data_inst.h" #include "activation_inst.h" #include "convolution_inst.h" @@ -17,14 +21,9 @@ #include "network_impl.h" #include "reshape_inst.h" #include "pass_manager.h" -#include "api/engine.hpp" -#include "test_utils.h" #include "program_impl_wrapper.h" -#include "gpu/ocl_queue_wrapper.h" -#include "gpu/memory_gpu.h" -#include "gpu/ocl_toolkit.h" -#include "gpu/command_queues_builder.h" -#include "gpu/ocl_base_event.h" + +#include using namespace cldnn; using namespace ::tests; @@ -36,7 +35,7 @@ using namespace ::tests; #pragma GCC diagnostic ignored "-Wignored-attributes" #endif -#include +#include using namespace cldnn; using namespace ::tests; @@ -47,21 +46,24 @@ struct usm_test_params{ class BaseUSMTest : public ::testing::TestWithParam { protected: - std::shared_ptr _device = nullptr; - std::shared_ptr _engine = nullptr; + std::shared_ptr _device = nullptr; + std::shared_ptr _engine = nullptr; bool _supports_usm = false; public: void SetUp() override { // Find device, which supports USMs. - device_query query; + device_query query(engine_types::ocl, runtime_types::ocl); auto devices = query.get_available_devices(); for (const auto& d : devices) { - if (d.second.get()->mem_caps().supports_usm()) { - _device = std::make_shared(d.second); + if (d.second->get_mem_caps().supports_usm()) { + _device = std::dynamic_pointer_cast(d.second); break; } } - _engine = std::make_shared(_device->get()); + if (!_device) { + GTEST_SUCCEED(); + } + _engine = std::dynamic_pointer_cast(engine::create(engine_types::ocl, runtime_types::ocl, _device)); _supports_usm = true; } @@ -76,8 +78,8 @@ TEST_P(ctor_test, basic) { return; } try { - cl::UsmMemory mem(_device->get()->get_context()); - auto cl_dev = _device->get()->get_device(); + cl::UsmMemory mem(_device->get_context()); + auto cl_dev = _device->get_device(); switch (p.type) { case allocation_type::usm_host: { mem.allocateHost(1); @@ -115,35 +117,31 @@ TEST_P(copy_and_read_buffer, basic) { return; } try { - gpu::command_queues_builder q_builder(_device->get()->get_context(), _device->get()->get_device(), _device->get()->get_platform()); - q_builder.build(); - auto queue = cl::CommandQueueIntel(q_builder.queue()); + ocl::ocl_stream stream(*_engine); size_t values_count = 100; size_t values_bytes_count = values_count * sizeof(float); std::vector src_buffer(values_count); std::iota(src_buffer.begin(), src_buffer.end(), 0.0f); cldnn::layout linear_layout = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, 1, int32_t(values_count), 1)); - auto cldnn_mem_src = _engine->get()->allocate_memory(linear_layout, p.type); - auto ptr_to_fill = cldnn_mem_src->lock(); + auto cldnn_mem_src = _engine->allocate_memory(linear_layout, p.type); + // Fill src buffer switch (p.type) { case allocation_type::usm_host: case allocation_type::usm_shared: { - std::copy(src_buffer.begin(), src_buffer.end(), static_cast(ptr_to_fill)); - cldnn_mem_src->unlock(); + cldnn::mem_lock lock(cldnn_mem_src, stream); + std::copy(src_buffer.begin(), src_buffer.end(), lock.data()); break; } case allocation_type::usm_device: { - auto host_buf = _engine->get()->allocate_memory(linear_layout, allocation_type::usm_host); - std::copy(src_buffer.begin(), src_buffer.end(), static_cast(host_buf->lock())); - host_buf->unlock(); - queue.enqueueCopyUsm( - dynamic_cast(*host_buf).get_buffer(), - dynamic_cast(*cldnn_mem_src).get_buffer(), - values_bytes_count, - true - ); + auto casted = std::dynamic_pointer_cast(cldnn_mem_src); + auto host_buf = _engine->allocate_memory(linear_layout, allocation_type::usm_host); + { + cldnn::mem_lock lock(host_buf, stream); + std::copy(src_buffer.begin(), src_buffer.end(), lock.data()); + } + casted->copy_from(stream, *host_buf); break; } default: @@ -155,22 +153,17 @@ TEST_P(copy_and_read_buffer, basic) { switch (p.type) { case allocation_type::usm_host: case allocation_type::usm_shared: { - auto values_ptr = cldnn_mem_src->lock(); - std::memcpy(dst_buffer.data(), values_ptr, values_bytes_count); - cldnn_mem_src->unlock(); + cldnn::mem_lock lock(cldnn_mem_src, stream); + std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count); break; } case allocation_type::usm_device: { - auto host_buf = _engine->get()->allocate_memory(linear_layout, allocation_type::usm_host); - queue.enqueueCopyUsm( - dynamic_cast(*cldnn_mem_src).get_buffer(), - dynamic_cast(*host_buf).get_buffer(), - values_bytes_count, - true - ); - auto values_ptr = host_buf->lock(); - std::memcpy(dst_buffer.data(), values_ptr, values_bytes_count); - host_buf->unlock(); + auto host_buf = _engine->allocate_memory(linear_layout, allocation_type::usm_host); + host_buf->copy_from(stream, *cldnn_mem_src); + { + cldnn::mem_lock lock(host_buf, stream); + std::memcpy(dst_buffer.data(), lock.data(), values_bytes_count); + } break; } default: @@ -197,22 +190,21 @@ TEST_P(fill_buffer, DISABLED_basic) { return; } try { - gpu::command_queues_builder q_builder(_device->get()->get_context(), _device->get()->get_device(), _device->get()->get_platform()); - q_builder.build(); - auto queue = cl::CommandQueueIntel(q_builder.queue()); + ocl::ocl_stream stream(*_engine); + auto queue = stream.get_cl_queue(); size_t values_count = 100; size_t values_bytes_count = values_count * sizeof(float); - cl::UsmMemory mem(_device->get()->get_context()); + cl::UsmMemory mem(_device->get_context()); switch (p.type) { case allocation_type::usm_host: mem.allocateHost(values_bytes_count); break; case allocation_type::usm_shared: - mem.allocateShared(_device->get()->get_device(), values_bytes_count); + mem.allocateShared(_device->get_device(), values_bytes_count); break; case allocation_type::usm_device: - mem.allocateDevice(_device->get()->get_device(), values_bytes_count); + mem.allocateDevice(_device->get_device(), values_bytes_count); break; default: FAIL() << "Not supported allocation type!"; @@ -240,7 +232,7 @@ TEST_P(fill_buffer, DISABLED_basic) { break; } case allocation_type::usm_device: { - cl::UsmMemory host_mem(_device->get()->get_context()); + cl::UsmMemory host_mem(_device->get_context()); host_mem.allocateHost(values_bytes_count); queue.enqueueCopyUsm( mem, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp index 77546a65f4d..1760a85fdf6 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp @@ -2,24 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// +#include "test_utils.h" + +#include +#include +#include +#include #include -#include #include -#include "api/memory.hpp" -#include -#include "api/activation.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include "test_utils/float16.h" -#include "api/reorder.hpp" using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(activation_f32_fw_gpu, not_basic_yxfb) { // Input: @@ -34,9 +28,9 @@ TEST(activation_f32_fw_gpu, not_basic_yxfb) { // 0, 0, 0, 1, 0, // 0, 0, 0, 0, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, 0.0f, -3.0f, 4.0f, 5.0f, 0.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -49,7 +43,7 @@ TEST(activation_f32_fw_gpu, not_basic_yxfb) { 0.0f, 0.0f, 0.0f, 0.0f, 1.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::negation)); network network(engine, topology); network.set_input_data("input", input); @@ -58,8 +52,8 @@ TEST(activation_f32_fw_gpu, not_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -83,9 +77,9 @@ TEST(activation_f32_fw_gpu, erf_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, 0.0f, -3.0f, 4.0f, 5.0f, 0.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -93,7 +87,7 @@ TEST(activation_f32_fw_gpu, erf_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::erf)); network network(engine, topology); network.set_input_data("input", input); @@ -102,9 +96,9 @@ TEST(activation_f32_fw_gpu, erf_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -128,9 +122,9 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); activation_additional_params params = { 1.0f, 0.5f }; set_values(input, { 1.0f, 0.0f, -3.0f, 4.0f, 5.0f, @@ -139,7 +133,7 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::hard_sigmoid, params)); network network(engine, topology); network.set_input_data("input", input); @@ -148,9 +142,9 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -175,9 +169,9 @@ TEST(activation_f32_fw_gpu, reciprocal_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, 0.3f, -3.0f, 4.0f, 5.0f, 21.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -185,7 +179,7 @@ TEST(activation_f32_fw_gpu, reciprocal_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.1f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::reciprocal)); network network(engine, topology); network.set_input_data("input", input); @@ -194,9 +188,9 @@ TEST(activation_f32_fw_gpu, reciprocal_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -221,9 +215,9 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); activation_additional_params params = { 1.0f, 0.5f }; set_values(input, { 1.0f, 0.3f, -3.0f, 4.0f, 5.0f, @@ -232,7 +226,7 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.1f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::selu, params)); network network(engine, topology); network.set_input_data("input", input); @@ -241,9 +235,9 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -269,9 +263,9 @@ TEST(activation_f32_fw_gpu, softplus_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, 0.3f, -3.0f, 4.0f, 5.0f, 21.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -279,7 +273,7 @@ TEST(activation_f32_fw_gpu, softplus_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.1f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::softplus)); network network(engine, topology); network.set_input_data("input", input); @@ -288,9 +282,9 @@ TEST(activation_f32_fw_gpu, softplus_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -315,9 +309,9 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, 0.3f, -3.0f, 4.0f, 5.0f, 21.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -325,7 +319,7 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.1f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::softsign)); network network(engine, topology); network.set_input_data("input", input); @@ -334,9 +328,9 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -361,9 +355,9 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) { // 3 -3 3 0 1 // 1 1 1 -1 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, 0.0f, -3.0f, 4.0f, 5.0f, 21.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -371,7 +365,7 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) { 1.0f, 1.0f, 1.0f, -1.0f, 0.1f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("not", "input", activation_func::sign)); network network(engine, topology); network.set_input_data("input", input); @@ -380,9 +374,9 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "not"); auto output_memory = outputs.at("not").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -401,15 +395,15 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) { } TEST(activation_f32_fw_gpu, pow_basic_yxfb) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f }); VF output_vec = { 1.0f, 4.0f, 9.0f, 16.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("pow", "input", activation_func::pow, { 2.0f, 0.0f })); network network(engine, topology); network.set_input_data("input", input); @@ -418,8 +412,8 @@ TEST(activation_f32_fw_gpu, pow_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "pow"); auto output_memory = outputs.at("pow").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -437,15 +431,15 @@ TEST(activation_f32_fw_gpu, pow_basic_yxfb) { } TEST(activation_f16_fw_gpu, pow_basic_yxfb) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb, { 1, 1, 2, 2 } }); set_values(input, { FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.5f) }); VF output_vec = { FLOAT16(1.0f), FLOAT16(8.0f), FLOAT16(27.0f), FLOAT16(91.125f) }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("pow", "input", activation_func::pow, { FLOAT16(3.0f), FLOAT16(0.0f) })); network network(engine, topology); network.set_input_data("input", input); @@ -454,8 +448,8 @@ TEST(activation_f16_fw_gpu, pow_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "pow"); auto output_memory = outputs.at("pow").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -487,9 +481,9 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) { // 3 -1.5 3 5 1 // 1 1 1 -0.5 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -502,7 +496,7 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) { 1.0f, 1.0f, 1.0f, -0.5f, 1.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("relu", "input", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -511,8 +505,8 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) { EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -555,9 +549,9 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) { // 1 2 1 -1 2 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 5, 4, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 5, 4, 2 } }); set_values(input, { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -578,7 +572,7 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) { 1.0f, 2.0f, 1.0f, -1.0f, 2.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("relu", "input", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -587,8 +581,8 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) { EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -618,10 +612,10 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions) // a: 0.5, b: 2.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 4 } }); - auto input_params = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 5, 4 } }); + auto input_params = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 1, 1 } }); set_values(input, { 0.0f, -2.0f, -3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -665,7 +659,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions) { for (auto func : funcs) { - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); if (i == 0) { @@ -684,9 +678,9 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions) EXPECT_EQ(outputs.begin()->first, "activation"); auto output_memory = outputs.at("activation").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -796,10 +790,10 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions) TEST(activation_f16_fw_gpu, basic_bfyx_all_functions) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 2, 4 } }); - auto input_params = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 2, 4 } }); + auto input_params = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { FLOAT16(-4.5f), FLOAT16(-2.5f), FLOAT16(-1.5f), FLOAT16(0.5f), FLOAT16(0.9f), FLOAT16(1.5f), FLOAT16(2.0f), FLOAT16(2.5f) }); @@ -818,7 +812,7 @@ TEST(activation_f16_fw_gpu, basic_bfyx_all_functions) for (uint8_t i = 0 ; i < 2 ; i++) { for (auto func : funcs) { - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); if (i == 0) { topology.add(activation("activation", "input", func, params)); @@ -834,9 +828,9 @@ TEST(activation_f16_fw_gpu, basic_bfyx_all_functions) EXPECT_EQ(outputs.begin()->first, "activation"); auto output_memory = outputs.at("activation").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -884,9 +878,9 @@ TEST(activation_f16_fw_gpu, basic_bfyx_all_functions) TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 4 } }); set_values(input, { 0.12f, 0.56f, 0.45f, 0.789f, 0.546f, 0.999f, 0.7899f, 0.6677f}); std::vector funcs = { @@ -902,7 +896,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan) for (auto func : funcs) { - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); topology.add(activation("activation", "input", func)); network network(engine, topology); @@ -912,9 +906,9 @@ TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan) EXPECT_EQ(outputs.begin()->first, "activation"); auto output_memory = outputs.at("activation").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -977,9 +971,9 @@ TEST(activation_f32_fw_gpu, relu_basic_acosh_yxfb) { // // Slope: 0.5 - const auto &engine = get_test_engine(); + auto &engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {1, 1, 5, 4}}); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, {1, 1, 5, 4}}); set_values(input, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, @@ -988,8 +982,8 @@ TEST(activation_f32_fw_gpu, relu_basic_acosh_yxfb) { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().with_padding(padding{{0, 0, 2, 1}, 0})), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().with_padding(padding{{0, 0, 2, 1}, 0})), activation("relu", "reorder", activation_func::acosh, {0.5f, 0.f}, padding{{0, 0, 0, 0}, 0})); network network(engine, topology); network.set_input_data("input", input); @@ -997,9 +991,9 @@ TEST(activation_f32_fw_gpu, relu_basic_acosh_yxfb) { EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1038,9 +1032,9 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) { // 3 -1.5 3 5 1 // 1 1 1 -0.5 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f, @@ -1054,8 +1048,8 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) { 1.0f, 1.0f, 1.0f, -0.5f, 1.0f}; topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), activation("relu", "reorder", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -1063,8 +1057,8 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) { EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1116,9 +1110,9 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_bfzyx) { // 3 -1.5 3 5 1 // 1 1 1 -0.5 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 5, 4, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 5, 4, 2 } }); set_values(input, { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f, @@ -1141,8 +1135,8 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_bfzyx) { 1.0f, 1.0f, 1.0f, -0.5f, 1.0f }; topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1, 0 }, 0 })), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 1, 0 }, 0 })), activation("relu", "reorder", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -1151,8 +1145,8 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_bfzyx) { auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -1194,9 +1188,9 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) { // 0 0 0 0 0 0 0 0 0 0 0 // 0 0 0 0 0 0 0 0 0 0 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -1215,7 +1209,7 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("relu", "input", activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 3, 3 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -1224,9 +1218,9 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) { EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); auto output_size = output_layout.get_buffer_size(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_size.spatial[1]; int x_size = output_size.spatial[0]; @@ -1245,9 +1239,9 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) { TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 4 } }); set_values(input, { 0.01f, 0.99f, -0.01f, -0.99f, 1.1f, 1.0f, 0.0f, -1.1f }); std::vector funcs = { @@ -1257,7 +1251,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil) for (auto func : funcs) { - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); topology.add(activation("activation", "input", func)); network network(engine, topology); @@ -1267,9 +1261,9 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil) EXPECT_EQ(outputs.begin()->first, "activation"); auto output_memory = outputs.at("activation").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1300,8 +1294,8 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil) TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::i8, format::yxfb,{ 2, 2, 2, 2 } }); std::vector input_vec = { 1, 0, 5, 1, @@ -1321,7 +1315,7 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs) for (auto func : funcs) { topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("activation", "input", func)); network network(engine, topology); @@ -1332,14 +1326,12 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs) EXPECT_EQ(outputs.begin()->first, "activation"); auto output_memory = outputs.at("activation").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); - for (size_t i = 0; i < output_layout.get_linear_size(); ++i) - { - switch (func) - { + for (size_t i = 0; i < output_layout.get_linear_size(); ++i) { + switch (func) { case activation_func::none: EXPECT_EQ((int8_t)input_ptr[i], output_ptr[i]); break; @@ -1356,10 +1348,9 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs) } } -TEST(activation_i32_fw_gpu, basic_yxfb_i32_funcs) -{ - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i32, format::yxfb,{ 2, 2, 2, 2 } }); +TEST(activation_i32_fw_gpu, basic_yxfb_i32_funcs) { + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::i32, format::yxfb,{ 2, 2, 2, 2 } }); std::vector input_vec = { 1, 0, 5, 1, @@ -1378,11 +1369,10 @@ TEST(activation_i32_fw_gpu, basic_yxfb_i32_funcs) activation_func::clamp }; - for (auto func : funcs) - { + for (auto func : funcs) { topology topology; activation_additional_params params = {0.0, 1.0}; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("activation", "input", func, params)); network network(engine, topology); @@ -1393,14 +1383,12 @@ TEST(activation_i32_fw_gpu, basic_yxfb_i32_funcs) EXPECT_EQ(outputs.begin()->first, "activation"); auto output_memory = outputs.at("activation").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - auto input_ptr = input.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); - for (size_t i = 0; i < output_layout.get_linear_size(); ++i) - { - switch (func) - { + for (size_t i = 0; i < output_layout.get_linear_size(); ++i) { + switch (func) { case activation_func::none: EXPECT_EQ((int32_t)input_ptr[i], output_ptr[i]); break; @@ -1429,13 +1417,13 @@ TEST(activation_f32_fw_gpu, b_fs_yx_fsv16_prelu) { constexpr int x = 2; constexpr int y = 2; - auto eng = get_test_engine(); + auto& eng = get_test_engine(); auto in_lay = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(b, f, x, y)); auto params_lay = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, cldnn::tensor(1, f, 1, 1)); - auto in_mem = cldnn::memory::allocate(eng, in_lay); - auto params_mem = cldnn::memory::allocate(eng, params_lay); + auto in_mem = eng.allocate_memory(in_lay); + auto params_mem = eng.allocate_memory(params_lay); auto in_data = generate_random_4d(b, f, y, x, -1, 1); auto params_data = generate_random_1d(f, -1, 1); @@ -1464,7 +1452,7 @@ TEST(activation_f32_fw_gpu, b_fs_yx_fsv16_prelu) { } } - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected.size(), out_ptr.size()); for (size_t i = 0; i < expected.size(); ++i) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp index b9e26b77b94..e8c18919b84 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp @@ -2,26 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; /* These tests are inteded to check if additional reorders are being added properly during @@ -30,20 +23,20 @@ add_reorders optimization pass. //concatenation of incompatible convolutions TEST(add_reorders_gpu, two_convolutions_and_concatenation) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; build_opt.set_option(build_option::optimize_data(false)); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::yxio,{ 1, 1, 1, 2 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::oiyx,{ 1, 1, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::yxio,{ 1, 1, 1, 2 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 1, 2 } }); set_values(input, { 1.1f, 1.2f, 1.3f, 1.4f }); set_values(weights1, { 2.1f, 3.1f}); set_values(weights2, { 1.1f, 0.1f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights1", weights1)); topology.add(data("weights2", weights2)); @@ -63,23 +56,18 @@ TEST(add_reorders_gpu, two_convolutions_and_concatenation) { float expected_out[] = { 6.34f, 1.34f, 6.86f, 1.46f }; float epsilon = 1e-3f; - for (auto& it : outputs) - { - auto output = it.second.get_memory().pointer(); - for (size_t cntr = 0; cntr < 2 * 2; cntr++) - { + for (auto& it : outputs) { + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); + for (size_t cntr = 0; cntr < 2 * 2; cntr++) { EXPECT_NEAR(expected_out[cntr], output[cntr], epsilon); } } } template -void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num_tiles) -{ - auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair - { - switch (axis) - { +void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) { + auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair { + switch (axis) { case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]); case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]); case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]); @@ -89,22 +77,19 @@ void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num } }; - const pointer src = input.pointer(); - pointer dst = output.pointer(); + cldnn::mem_lock src(input, get_test_stream()); + cldnn::mem_lock dst(output, get_test_stream()); const data_t* psrc = src.data(); data_t* pdst = dst.data(); - auto sizes = get_sizes(input.get_layout().size, axis); + auto sizes = get_sizes(input->get_layout().size, axis); int outer_dim = sizes.first; int inner_dim = sizes.second; - for (int i = 0; i < outer_dim; i++) - { - for (int t = 0; t < num_tiles; t++) - { - for (int j = 0; j < inner_dim; j++) - { + for (int i = 0; i < outer_dim; i++) { + for (int t = 0; t < num_tiles; t++) { + for (int j = 0; j < inner_dim; j++) { pdst[j] = psrc[j]; } pdst += inner_dim; @@ -114,13 +99,13 @@ void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num } TEST(add_reorders_gpu, basic_reshape_and_tile) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 2, 1 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 4, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 2, 2, 1 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::byxf,{ 2, 1, 4, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1))); topology.add(tile("tile", "reshape", tensor(2, 1, 2, 4))); @@ -136,10 +121,10 @@ TEST(add_reorders_gpu, basic_reshape_and_tile) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]); } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp index ac1dacfe4e3..d7b4b630201 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp @@ -2,32 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "api/memory.hpp" -#include -#include "api/arg_max_min.hpp" -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; template -void generic_arg_max_test_xyf(int input_b, int input_f, int input_y, int input_x, arg_max_min::out_type mode, bool expect_throw = false) -{ +void generic_arg_max_test_xyf(int input_b, int input_f, int input_y, int input_x, arg_max_min::out_type mode, bool expect_throw = false) { auto axis = arg_max_min::axis_name::xyf; auto sort_type = arg_max_min::sort_type::sort_by_values; auto test_input_fmt = format::bfyx; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(input_b, input_f, input_x, input_y); - auto input = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); + auto input = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, mode, 1U, axis, sort_type, false, padding(), type_to_data_type::value)); int min_random = -2, max_random = 2; @@ -51,12 +46,12 @@ void generic_arg_max_test_xyf(int input_b, int input_f, int input_y, int input_x int out_size = input_x * input_y * input_f; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); Tout index; Tin value; for (auto i = 0; i < input_b; i++) { - index = get_value(output_ptr, i); + index = get_value(output_ptr.data(), i); EXPECT_GE(index, (Tout)0); EXPECT_LT(index, (Tout)out_size); value = input_rnd_vec[i*out_size + (int)index]; @@ -74,11 +69,11 @@ void generic_arg_max_test_xyf(int input_b, int input_f, int input_y, int input_x TEST(arg_max_gpu_batch_one, base) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 1, top_k = 8; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k)); std::vector input_vec = { @@ -100,11 +95,10 @@ TEST(arg_max_gpu_batch_one, base) { EXPECT_EQ(outputs.begin()->first, "arg_max"); auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[batch_num * top_k]; - for (uint32_t i = 0; i < batch_num * top_k; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < batch_num * top_k; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } int size = x_size * y_size * feature_num; int index; @@ -123,8 +117,7 @@ TEST(arg_max_gpu_batch_one, base) { } else amount += same_values * (j - same_values + 1); - } - else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) { + } else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) { if (same_values != j + 1) { amount += same_values * (j - same_values + 1); same_values = 1; @@ -135,12 +128,10 @@ TEST(arg_max_gpu_batch_one, base) { } EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0); EXPECT_LT(out_buffer[i*top_k + top_k - 1], size); - for (int j = 0; j < top_k; j++) - { + for (int j = 0; j < top_k; j++) { index = (int)out_buffer[i*top_k + j]; value = input_vec[i*size + index]; - for (int k = 0; k < size; k++) - { + for (int k = 0; k < size; k++) { if (input_vec[i*size + k] > value) count++; } @@ -152,11 +143,11 @@ TEST(arg_max_gpu_batch_one, base) { TEST(arg_max_gpu_top_k, base) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 8; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k)); std::vector input_vec = { @@ -184,11 +175,10 @@ TEST(arg_max_gpu_top_k, base) { EXPECT_EQ(outputs.begin()->first, "arg_max"); auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[batch_num * top_k]; - for (uint32_t i = 0; i < batch_num * top_k; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < batch_num * top_k; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } int size = x_size * y_size * feature_num; int index; @@ -207,24 +197,21 @@ TEST(arg_max_gpu_top_k, base) { } else amount += same_values * (j - same_values + 1); - } - else if (input_vec[i*size + (int)(int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)(int)out_buffer[i*top_k + j + 1]]) { + } else if (input_vec[i*size + (int)(int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)(int)out_buffer[i*top_k + j + 1]]) { if (same_values != j+1) { amount += same_values * (j - same_values + 1); same_values = 1; } - } - else + } else { same_values++; + } } EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0); EXPECT_LT(out_buffer[i*top_k + top_k - 1], size); - for (int j = 0; j < top_k; j++) - { + for (int j = 0; j < top_k; j++) { index = (int)out_buffer[i*top_k + j]; value = input_vec[i*size + index]; - for (int k = 0; k < size; k++) - { + for (int k = 0; k < size; k++) { if (input_vec[i*size + k] > value) count++; } @@ -236,11 +223,11 @@ TEST(arg_max_gpu_top_k, base) { TEST(arg_max_gpu_min_top_k, base) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 3; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k)); std::vector input_vec = { @@ -266,11 +253,10 @@ TEST(arg_max_gpu_min_top_k, base) { EXPECT_EQ(outputs.begin()->first, "arg_max"); auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[batch_num * top_k]; - for (uint32_t i = 0; i < batch_num * top_k; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < batch_num * top_k; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } int size = x_size * y_size * feature_num; int index; @@ -289,24 +275,21 @@ TEST(arg_max_gpu_min_top_k, base) { } else amount += same_values * (j - same_values + 1); - } - else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) { + } else if (input_vec[i*size + (int)out_buffer[i*top_k + j]] != input_vec[i*size + (int)out_buffer[i*top_k + j + 1]]) { if (same_values != j + 1) { amount += same_values * (j - same_values + 1); same_values = 1; } - } - else + } else { same_values++; + } } EXPECT_GE(out_buffer[i*top_k + top_k - 1], 0); EXPECT_LT(out_buffer[i*top_k + top_k - 1], size); - for (int j = 0; j < top_k; j++) - { + for (int j = 0; j < top_k; j++) { index = (int)out_buffer[i*top_k + j]; value = input_vec[i*size + index]; - for (int k = 0; k < size; k++) - { + for (int k = 0; k < size; k++) { if (input_vec[i*size + k] < value) count++; } @@ -318,11 +301,11 @@ TEST(arg_max_gpu_min_top_k, base) { TEST(arg_max_gpu_min_axis_batch, base) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k, arg_max_min::batch)); std::vector input_vec = { @@ -348,14 +331,12 @@ TEST(arg_max_gpu_min_axis_batch, base) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], i < (out_size / 2) ? 0 : 1); } } @@ -407,11 +388,11 @@ TEST(arg_max_gpu_min, i64) { TEST(arg_max_gpu_min_axis_batch, i32) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::i32)); std::vector input_vec = { @@ -437,14 +418,12 @@ TEST(arg_max_gpu_min_axis_batch, i32) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int32_t out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], i < (out_size / 2) ? 0 : 1); } } @@ -452,11 +431,11 @@ TEST(arg_max_gpu_min_axis_batch, i32) { TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, z_size = 1, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ batch_num, feature_num, x_size , y_size, z_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ batch_num, feature_num, x_size , y_size, z_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::min, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::i32)); std::vector input_vec = { @@ -482,25 +461,23 @@ TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int32_t out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], i < (out_size / 2) ? 0 : 1); } } TEST(arg_max_gpu_min_axis_y_yxfb, f32) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 1; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, padding(), data_types::f32)); std::vector input_vec = { @@ -548,25 +525,23 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], ref_vec[i]); } } TEST(arg_max_gpu_min_axis_batch_yxfb, f32) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 1; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::f32)); std::vector input_vec = { @@ -614,25 +589,23 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], ref_vec[i]); } } TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, padding(), data_types::f32)); std::vector input_vec = { @@ -690,27 +663,25 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], ref_vec[i]); } } TEST(top_k_layer_tests, second_output) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); - auto top_k_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1 , 1 } }); - auto second_output = memory::allocate(engine, { data_types::f32, format::bfyx, { top_k, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto top_k_input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1 , 1 } }); + auto second_output = engine.allocate_memory({ data_types::f32, format::bfyx, { top_k, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(cldnn::data("const", top_k_input)); topology.add(mutable_data("second_output", second_output)); topology.add(arg_max_min("arg_max", { "input", "const", "second_output" }, arg_max_min::min, top_k, arg_max_min::batch)); @@ -738,17 +709,16 @@ TEST(top_k_layer_tests, second_output) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); - auto second_output_ptr = second_output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock second_output_ptr(second_output, get_test_stream()); + float out_buffer[out_size]; float second_out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); - second_out_buffer[i] = get_value(second_output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); + second_out_buffer[i] = get_value(second_output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], i < (out_size / 2) ? 0 : 1); EXPECT_EQ(second_out_buffer[i], input_vec[i]); } @@ -756,13 +726,13 @@ TEST(top_k_layer_tests, second_output) { TEST(top_k_layer_tests, second_output2) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 1; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); - auto top_k_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1 , 1 } }); - auto second_output = memory::allocate(engine, { data_types::f32, format::yxfb, { top_k, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); + auto top_k_input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1 , 1 } }); + auto second_output = engine.allocate_memory({ data_types::f32, format::yxfb, { top_k, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(cldnn::data("const", top_k_input)); topology.add(mutable_data("second_output", second_output)); topology.add(arg_max_min("arg_max", { "input", "const", "second_output" }, arg_max_min::max, top_k, arg_max_min::batch, arg_max_min::sort_by_values, false, padding(), data_types::f32)); @@ -834,17 +804,15 @@ TEST(top_k_layer_tests, second_output2) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); - auto second_output_ptr = second_output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock second_output_ptr(second_output, get_test_stream()); float out_buffer[out_size]; float second_out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); - second_out_buffer[i] = get_value(second_output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); + second_out_buffer[i] = get_value(second_output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], ref_vec[i]); EXPECT_EQ(second_out_buffer[i], second_ref_vec[i]); } @@ -852,11 +820,11 @@ TEST(top_k_layer_tests, second_output2) { TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, padding(), data_types::f32)); std::vector input_vec = { @@ -914,25 +882,23 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], ref_vec[i]); } } TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) { static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int top_k = 2; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_indices, false, padding(), data_types::f32)); std::vector input_vec = { @@ -990,14 +956,12 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) { EXPECT_EQ(outputs.begin()->first, "arg_max"); const int out_size = y_size * feature_num * x_size * top_k; auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[out_size]; - for (uint32_t i = 0; i < out_size; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < out_size; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } - for (int i = 0; i < out_size; i++) - { + for (int i = 0; i < out_size; i++) { EXPECT_EQ(out_buffer[i], ref_vec[i]); } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp index 01eafbde391..76741001060 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp @@ -2,23 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/average_unpooling.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include -#include -#include "test_utils/float16.h" +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(average_unpooling_gpu, basic_in2x2x2x1) { // Input : 2x2x2x1 @@ -40,9 +34,9 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1) { // f1: b0: 1.5 2.5 1 b1: 1.75 2.9375 1.1875 // f1: b0: 1.5 2.5 1 b1: 1.75 2.9375 1.1875 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); set_values(input, { 2.5f, -4.5f, @@ -52,7 +46,7 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(average_unpooling("average_unpooling", "input", { 2, 2, 3, 2 }, { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); network network(engine, topology); @@ -62,8 +56,8 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1) { auto outputs = network.execute(); auto output = outputs.at("average_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -107,9 +101,9 @@ TEST(average_unpooling_gpu, basic_in2x2x3x2_with_average_pooling_unpooling) { // f1: b0: 1.5 1.5 0.5 b1: 1.75 1.75 1 // f1: b0: 1.5 1.5 0.5 b1: 1.75 1.75 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); set_values(input, { 1.f, 2.f, -10.f, @@ -123,9 +117,9 @@ TEST(average_unpooling_gpu, basic_in2x2x3x2_with_average_pooling_unpooling) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(pooling("pooling", "input", pooling_mode::average_no_padding, { 1, 1, 2, 2 }, { 1, 1, 2, 2 })); - topology.add(average_unpooling("average_unpooling", "pooling", input.get_layout().size, { 1, 1, 2, 2 }, { 1, 1, 2, 2 })); + topology.add(average_unpooling("average_unpooling", "pooling", input->get_layout().size, { 1, 1, 2, 2 }, { 1, 1, 2, 2 })); network network(engine, topology); @@ -134,8 +128,8 @@ TEST(average_unpooling_gpu, basic_in2x2x3x2_with_average_pooling_unpooling) { auto outputs = network.execute(); auto output = outputs.at("average_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -179,9 +173,9 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_output_padding) { // f0: b0: 0.625 -0.5 -1.125 b1: 0 -1.6875 -1.6875 // f1: b0: 1.5 2.5 1 b1: 1.75 2.9375 1.1875 // f1: b0: 1.5 2.5 1 b1: 1.75 2.9375 1.1875 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 2.5f, -4.5f, @@ -191,7 +185,7 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_output_padding) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(average_unpooling("average_unpooling", "input", { 2, 2, 3, 2 }, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, padding({ 0, 0, 1, 1 }, 0))); network network(engine, topology); @@ -201,8 +195,8 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_output_padding) { auto outputs = network.execute(); auto output = outputs.at("average_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -260,9 +254,9 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_fp16) { // f1: b0: 1.5 2.5 1 b1: 1.75 2.9375 1.1875 // f1: b0: 1.5 2.5 1 b1: 1.75 2.9375 1.1875 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { FLOAT16(2.5f), FLOAT16(-4.5f), @@ -272,7 +266,7 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_fp16) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(average_unpooling("average_unpooling", "input", { 2, 2, 3, 2 }, { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); network network(engine, topology); @@ -282,8 +276,8 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_fp16) { auto outputs = network.execute(); auto output = outputs.at("average_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -304,4 +298,4 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_fp16) { for (size_t i = 0; i < expected_output_vec.size(); ++i) { EXPECT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i])); } -} \ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/barriers_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/barriers_test.cpp index 2b15da70ed2..2b72881c618 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/barriers_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/barriers_test.cpp @@ -2,28 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// +#include "test_utils.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#include "test_utils/test_utils.h" +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(DISABLED_oooq_test, simple) { - engine_configuration cfg{ false, false, false, std::string(), std::string(), true }; - engine eng{ cfg }; + engine_configuration cfg{ false, queue_types::out_of_order }; + auto eng = engine::create(engine_types::ocl, runtime_types::ocl, cfg); - memory input_mem = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input_mem = eng->allocate_memory(layout{ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input_mem, { 50 }); /* ---- r1 ---- r3 ---- -- r7 -- @@ -34,16 +27,16 @@ TEST(DISABLED_oooq_test, simple) */ topology tpl; - tpl.add(input_layout("in", input_mem.get_layout())); - tpl.add(reorder("r0", "in", input_mem.get_layout(), std::vector{ 0 })); - tpl.add(reorder("r1", "r0", input_mem.get_layout(), std::vector{ 1 })); - tpl.add(reorder("r2", "r0", input_mem.get_layout(), std::vector{ 2 })); - tpl.add(reorder("r3", "r1", input_mem.get_layout(), std::vector{ 3 })); - tpl.add(reorder("r4", "r2", input_mem.get_layout(), std::vector{ 4 })); - tpl.add(reorder("r5", "r4", input_mem.get_layout(), std::vector{ 5 })); + tpl.add(input_layout("in", input_mem->get_layout())); + tpl.add(reorder("r0", "in", input_mem->get_layout(), std::vector{ 0 })); + tpl.add(reorder("r1", "r0", input_mem->get_layout(), std::vector{ 1 })); + tpl.add(reorder("r2", "r0", input_mem->get_layout(), std::vector{ 2 })); + tpl.add(reorder("r3", "r1", input_mem->get_layout(), std::vector{ 3 })); + tpl.add(reorder("r4", "r2", input_mem->get_layout(), std::vector{ 4 })); + tpl.add(reorder("r5", "r4", input_mem->get_layout(), std::vector{ 5 })); tpl.add(concatenation("c6", { "r3", "r5" }, concatenation::along_x)); - layout concat_lay = input_mem.get_layout(); + layout concat_lay = input_mem->get_layout(); concat_lay.size.spatial[0] *= 2; tpl.add(reorder("r7", "c6", concat_lay, std::vector{ 7 })); @@ -52,13 +45,13 @@ TEST(DISABLED_oooq_test, simple) concat_lay.size.spatial[1] *= 2; build_options options; - network net{ eng, tpl, options }; + network net{ *eng, tpl, options }; net.set_input_data("in", input_mem); auto output = net.execute().at("c9").get_memory(); - EXPECT_TRUE(output.get_layout().size.spatial[0] == 2); - EXPECT_TRUE(output.get_layout().size.spatial[1] == 2); - EXPECT_TRUE(output.get_layout().size.feature[0] == 1); - EXPECT_TRUE(output.get_layout().size.batch[0] == 1); -} \ No newline at end of file + EXPECT_TRUE(output->get_layout().size.spatial[0] == 2); + EXPECT_TRUE(output->get_layout().size.spatial[1] == 2); + EXPECT_TRUE(output->get_layout().size.feature[0] == 1); + EXPECT_TRUE(output->get_layout().size.batch[0] == 1); +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/batch_to_space_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/batch_to_space_gpu_test.cpp index 9299820f8ce..e3d33ad2507 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/batch_to_space_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/batch_to_space_gpu_test.cpp @@ -2,18 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -26,9 +21,9 @@ TEST(batch_to_space_fp16_gpu, i8111_bs1222_cb0000_ce0000) { // Output : 1x2x2x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(8), feature(1), spatial(1, 1)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), @@ -38,7 +33,7 @@ TEST(batch_to_space_fp16_gpu, i8111_bs1222_cb0000_ce0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -50,7 +45,7 @@ TEST(batch_to_space_fp16_gpu, i8111_bs1222_cb0000_ce0000) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f @@ -71,9 +66,9 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0000_ce0000) { // Output : 1x6x2x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(4), feature(3), spatial(1, 2)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -85,7 +80,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0000_ce0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfyx, {1,2,1,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -97,7 +92,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0000_ce0000) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 6.f, 1.f, 7.f, 12.f, 18.f, @@ -121,9 +116,9 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0010_ce0101) { // Output : 1x5x1x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(4), feature(3), spatial(1, 2)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -135,7 +130,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0010_ce0101) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfyx, {1,2,1,2}, 1), tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,1}, 0), @@ -147,7 +142,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0010_ce0101) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 13.f, 3.f, 15.f, 5.f @@ -168,9 +163,9 @@ TEST(batch_to_space_fp16_gpu, i62121_bs12311_cb02000_ce00110) { // Output : 1x2x2x1x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(6), feature(2), spatial(1, 2, 1)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfzyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfzyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -182,7 +177,7 @@ TEST(batch_to_space_fp16_gpu, i62121_bs12311_cb02000_ce00110) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfzyx, {1,2,3,1,1}, 1), tensor(format::bfzyx, {0,2,0,0,0}, 0), tensor(format::bfzyx, {0,0,1,1,0}, 0), @@ -194,7 +189,7 @@ TEST(batch_to_space_fp16_gpu, i62121_bs12311_cb02000_ce00110) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 2.f, 6.f, 14.f, 18.f @@ -215,9 +210,9 @@ TEST(batch_to_space_fp16_gpu, i1212112_bs112321_cb02000_ce00110) { // Output : 1x1x3x1x2x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(12), feature(1), spatial(2, 1, 1, 2)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfwzyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfwzyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -231,7 +226,7 @@ TEST(batch_to_space_fp16_gpu, i1212112_bs112321_cb02000_ce00110) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfwzyx, {1,1,2,3,2,1}, 1), tensor(format::bfwzyx, {0,0,1,0,0,0}, 0), tensor(format::bfwzyx, {0,0,0,2,0,0}, 0), @@ -243,7 +238,7 @@ TEST(batch_to_space_fp16_gpu, i1212112_bs112321_cb02000_ce00110) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 24.f, 25.f, 28.f, 29.f, @@ -266,9 +261,9 @@ TEST(batch_to_space_fp16_gpu, i21611_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { // Output : 1x16x1x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(2), feature(16), spatial(1, 1)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), @@ -278,7 +273,7 @@ TEST(batch_to_space_fp16_gpu, i21611_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f16)); topology.add(batch_to_space("batch_to_space", "input_fsv", tensor(format::bfyx, {1,1,1,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), @@ -293,7 +288,7 @@ TEST(batch_to_space_fp16_gpu, i21611_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("bts_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 16.f, 1.f, 17.f, 2.f, 18.f, 3.f, 19.f, @@ -317,9 +312,9 @@ TEST(batch_to_space_fp16_gpu, i2812_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { // Output : 1x6x1x4 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(2), feature(8), spatial(2, 1)}; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), @@ -329,7 +324,7 @@ TEST(batch_to_space_fp16_gpu, i2812_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f16)); topology.add(batch_to_space("batch_to_space", "input_fsv", tensor(format::bfyx, {1,1,1,2}, 1), tensor(format::bfyx, {0,2,0,0}, 0), @@ -344,7 +339,7 @@ TEST(batch_to_space_fp16_gpu, i2812_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("bts_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 4.f, 20.f, 5.f, 21.f, 6.f, 22.f, 7.f, 23.f, @@ -367,9 +362,9 @@ TEST(batch_to_space_fp32_gpu, i8111_bs1222_cb0000_ce0000) { // Output : 1x2x2x2 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(8), feature(1), spatial(1, 1)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -377,7 +372,7 @@ TEST(batch_to_space_fp32_gpu, i8111_bs1222_cb0000_ce0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -389,7 +384,7 @@ TEST(batch_to_space_fp32_gpu, i8111_bs1222_cb0000_ce0000) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f @@ -410,9 +405,9 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0000_ce0000) { // Output : 1x6x2x2 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(4), feature(3), spatial(1, 2)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -424,7 +419,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0000_ce0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfyx, {1,2,1,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -436,7 +431,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0000_ce0000) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 6.f, 1.f, 7.f, 12.f, 18.f, @@ -460,9 +455,9 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0010_ce0101) { // Output : 1x5x1x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(4), feature(3), spatial(1, 2)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -474,7 +469,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0010_ce0101) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfyx, {1,2,1,2}, 1), tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,1}, 0), @@ -486,7 +481,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0010_ce0101) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 13.f, 3.f, 15.f, 5.f @@ -507,9 +502,9 @@ TEST(batch_to_space_fp32_gpu, i62121_bs12311_cb02000_ce00110) { // Output : 1x2x2x1x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(6), feature(2), spatial(1, 2, 1)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -521,7 +516,7 @@ TEST(batch_to_space_fp32_gpu, i62121_bs12311_cb02000_ce00110) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfzyx, {1,2,3,1,1}, 1), tensor(format::bfzyx, {0,2,0,0,0}, 0), tensor(format::bfzyx, {0,0,1,1,0}, 0), @@ -533,7 +528,7 @@ TEST(batch_to_space_fp32_gpu, i62121_bs12311_cb02000_ce00110) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 2.f, 6.f, 14.f, 18.f @@ -554,9 +549,9 @@ TEST(batch_to_space_fp32_gpu, i1212112_bs112321_cb02000_ce00110) { // Output : 1x1x3x1x2x2 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(12), feature(1), spatial(2, 1, 1, 2)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -570,7 +565,7 @@ TEST(batch_to_space_fp32_gpu, i1212112_bs112321_cb02000_ce00110) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(batch_to_space("batch_to_space", "Input", tensor(format::bfwzyx, {1,1,2,3,2,1}, 1), tensor(format::bfwzyx, {0,0,1,0,0,0}, 0), tensor(format::bfwzyx, {0,0,0,2,0,0}, 0), @@ -582,7 +577,7 @@ TEST(batch_to_space_fp32_gpu, i1212112_bs112321_cb02000_ce00110) { auto outputs = network.execute(); auto output = outputs.at("batch_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 24.f, 25.f, 28.f, 29.f, @@ -605,9 +600,9 @@ TEST(batch_to_space_fp32_gpu, i21621_bs1112_cb0201_ce0810_b_fs_yx_fsv16) { // Output : 1x6x1x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(2), feature(16), spatial(1, 2)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, @@ -621,7 +616,7 @@ TEST(batch_to_space_fp32_gpu, i21621_bs1112_cb0201_ce0810_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f32)); topology.add(batch_to_space("batch_to_space", "input_fsv", tensor(format::bfyx, {1,1,1,2}, 1), tensor(format::bfyx, {0,2,0,1}, 0), @@ -636,7 +631,7 @@ TEST(batch_to_space_fp32_gpu, i21621_bs1112_cb0201_ce0810_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("bts_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 36.0f, 38.0f, 40.0f, 42.0f, 44.0f, 46.0f @@ -657,9 +652,9 @@ TEST(batch_to_space_fp32_gpu, i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16) { // Output : 1x8x3x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{batch(4), feature(10), spatial(1, 2)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, @@ -673,7 +668,7 @@ TEST(batch_to_space_fp32_gpu, i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f32)); topology.add(batch_to_space("batch_to_space", "input_fsv", tensor(format::bfyx, {1,2,2,1}, 1), tensor(format::bfyx, {0,8,1,0}, 0), @@ -688,7 +683,7 @@ TEST(batch_to_space_fp32_gpu, i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("bts_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 28.0f, 9.0f, 29.0f, 68.0f, 49.0f, 69.0f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/binary_convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/binary_convolution_gpu_test.cpp index a5535f3239f..e0ac110a817 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/binary_convolution_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/binary_convolution_gpu_test.cpp @@ -1,26 +1,18 @@ -// Copyright (C) 2018-2021 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// - -#include -#include -#include -#include "api/binary_convolution.hpp" -#include "api/reorder.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include -#include "float16.h" #include "test_utils.h" +#include +#include +#include +#include + +#include + using namespace cldnn; -using namespace tests; +using namespace ::tests; // Batch, groups, IC, IW, IH, OC, OW, OH, KH, KW, SH, SW, PH, PW struct TestParams { @@ -74,23 +66,23 @@ struct TestParams { } }; -static void fill(cldnn::memory& mem) { - auto ptr = mem.pointer(); - for (size_t i = 0; i < div_up(mem.get_layout().count(), 32); i++) { +static void fill(cldnn::memory::ptr mem) { + cldnn::mem_lock ptr(mem, get_test_stream()); + for (size_t i = 0; i < div_up(mem->get_layout().count(), 32); i++) { ptr[i] = (uint32_t)rand() % (1 << 31); } } template -void compute_ref_conv_bin(const cldnn::memory &src, - const cldnn::memory &weights, - cldnn::memory &dst, - TestParams &p) -{ - auto src_data = src.pointer(); - auto weights_data = weights.pointer(); - auto dst_data = dst.pointer(); +void compute_ref_conv_bin(const cldnn::memory::ptr src, + const cldnn::memory::ptr weights, + cldnn::memory::ptr dst, + TestParams &p) { + + cldnn::mem_lock src_data(src, get_test_stream()); + cldnn::mem_lock weights_data(weights, get_test_stream()); + cldnn::mem_lock dst_data(dst, get_test_stream()); int pack_size = sizeof(data_t_src) * 8; @@ -117,7 +109,7 @@ void compute_ref_conv_bin(const cldnn::memory &src, return (data_t_src)((val >> bit) & 0x1); }; - auto ker = [=](data_t_acc &d, int g, int mb, int oc,int oh, int ow, int& ks) { + auto ker = [&](data_t_acc &d, int g, int mb, int oc,int oh, int ow, int& ks) { for (int ic = 0; ic < IC / NG; ++ic) { for (int kh = 0; kh < KH; ++kh) for (int kw = 0; kw < KW; ++kw) { @@ -133,15 +125,12 @@ void compute_ref_conv_bin(const cldnn::memory &src, uint8_t w = extract_bit(weights_data[widx / pack_size], widx % pack_size); uint8_t s = 0; - if ((ih < 0 || ih >= IH || iw < 0 || iw >= IW)) - { + if ((ih < 0 || ih >= IH || iw < 0 || iw >= IW)) { if (p.pad_value == 0.0f) continue; else s = (p.pad_value == -1.0f) ? 0 : 1; - } - else - { + } else { if (ic == 0) ks++; iidx = mb * div_up(IC, pack_size) * IH * IW + g * div_up(IC, pack_size) / NG * IH * IW @@ -180,18 +169,15 @@ void compute_ref_conv_bin(const cldnn::memory &src, } } -class binary_convolution_test : public ::testing::TestWithParam -{ - void SetUp() - { +class binary_convolution_test : public ::testing::TestWithParam { + void SetUp() { std::cout << GetParam() << std::endl; ASSERT_TRUE(GetParam().isConsistent()); } }; -TEST_P(binary_convolution_test, conv) -{ - const auto& engine = get_test_engine(); +TEST_P(binary_convolution_test, conv) { + auto& engine = get_test_engine(); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); topology topology_bin; @@ -217,9 +203,9 @@ TEST_P(binary_convolution_test, conv) cldnn::feature(p.oc), cldnn::spatial(p.ow, p.oh)}; - auto input = memory::allocate(engine, { cldnn::data_types::bin, cldnn::format::b_fs_yx_32fp, is_size }); - auto weights = memory::allocate(engine, { cldnn::data_types::bin, cldnn::format::bfyx, wei_size }); - auto output_ref = memory::allocate(engine, { cldnn::data_types::f32, cldnn::format::bfyx, os_size }); + auto input = engine.allocate_memory({ cldnn::data_types::bin, cldnn::format::b_fs_yx_32fp, is_size }); + auto weights = engine.allocate_memory({ cldnn::data_types::bin, cldnn::format::bfyx, wei_size }); + auto output_ref = engine.allocate_memory({ cldnn::data_types::f32, cldnn::format::bfyx, os_size }); fill(input); fill(weights); @@ -231,7 +217,7 @@ TEST_P(binary_convolution_test, conv) // print_bin_blob(weights, "weights"); // print_blob(output_ref, "ref_out"); - topology_bin.add(input_layout(input_name, input.get_layout())); + topology_bin.add(input_layout(input_name, input->get_layout())); topology_bin.add(data(output_name + weights_suffix, weights)); topology_bin.add(binary_convolution(output_name, input_name, {output_name + weights_suffix}, @@ -243,18 +229,15 @@ TEST_P(binary_convolution_test, conv) std::map outputs = network_bin.execute(); auto outputMemory = outputs.at(output_name).get_memory(); - for (size_t i = 0; i < output_ref.count(); i++) { - if (p.dt == data_types::f32) - { - auto ref = output_ref.pointer(); - auto opt = outputMemory.pointer(); + for (size_t i = 0; i < output_ref->count(); i++) { + if (p.dt == data_types::f32) { + cldnn::mem_lock ref(output_ref, get_test_stream()); + cldnn::mem_lock opt(outputMemory, get_test_stream()); ASSERT_EQ(ref[i], opt[i]) << i; - } - else if (p.dt == data_types::f16) - { - auto ref = output_ref.pointer(); - auto opt = outputMemory.pointer(); + } else if (p.dt == data_types::f16) { + cldnn::mem_lock ref(output_ref, get_test_stream()); + cldnn::mem_lock opt(outputMemory, get_test_stream()); ASSERT_EQ(ref[i], float16_to_float32(opt[i])) << i; } @@ -320,20 +303,19 @@ INSTANTIATE_TEST_CASE_P(BinaryConvTest, binary_convolution_test, ::testing::Valu ),); template -static void set_binary_values(const cldnn::memory& mem, std::vector args) { - auto ptr = mem.pointer(); +static void set_binary_values(cldnn::memory::ptr mem, std::vector args) { + cldnn::mem_lock ptr(mem, get_test_stream()); auto it = ptr.begin(); for (auto x : args) *it++ = x; } -TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) -{ - const auto& engine = get_test_engine(); +TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) { + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::bin, format::b_fs_yx_32fp, { 1, 16, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::bin, format::bfyx, { 4, 16, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::bin, format::b_fs_yx_32fp, { 1, 16, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::bin, format::bfyx, { 4, 16, 1, 1 } }); // 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 // 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 @@ -373,7 +355,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) 4.0f, 2.0f, -6.0f, 14.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), binary_convolution("binary_conv", "input", { "weights" }, { 1,1,1,1 }, @@ -396,8 +378,8 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) EXPECT_EQ(outputs.begin()->first, "binary_conv"); auto output_memory = outputs.at("binary_conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.data_type, data_types::f32); @@ -413,10 +395,10 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) } TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::bin, format::b_fs_yx_32fp, { 1, 16, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::bin, format::bfyx, { 4, 16, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::bin, format::b_fs_yx_32fp, { 1, 16, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::bin, format::bfyx, { 4, 16, 1, 1 } }); // 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 // 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 @@ -456,7 +438,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { 4.0f, 2.0f, -6.0f, 14.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), binary_convolution("binary_conv", "input", { "weights" }, { 1,1,1,1 }, @@ -479,8 +461,8 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { EXPECT_EQ(outputs.begin()->first, "binary_conv"); auto output_memory = outputs.at("binary_conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.data_type, data_types::f16); @@ -489,9 +471,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { EXPECT_EQ(output_layout.size.spatial[1], 2); EXPECT_EQ(output_layout.size.spatial[0], 2); - for (size_t i = 0; i < output_layout.count(); i++) - { + for (size_t i = 0; i < output_layout.count(); i++) { EXPECT_EQ(float16_to_float32(output_ptr[i]), output_vec[i]) << "index="<< i; } } - diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp index 1e206a2876c..df8a8d47c20 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp @@ -2,18 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include -#include - -#include "test_utils/test_utils.h" -#include "test_utils/uniform_quantized_real_distribution.hpp" +#include +#include #include @@ -26,7 +18,7 @@ static std::vector generate_rnd_real_input( const T min = static_cast(0), const T max = static_cast(1), const unsigned rnd_bits = 9) { static std::default_random_engine rnd_gen(random_seed); - cldnn::tests::distributions::uniform_quantized_real_distribution rnd_dist(min, max, rnd_bits); + tests::distributions::uniform_quantized_real_distribution rnd_dist(min, max, rnd_bits); auto acum = std::accumulate(sizes.begin(), sizes.end(), static_cast(1), std::multiplies()); @@ -62,12 +54,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -97,7 +89,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x)); @@ -140,12 +132,12 @@ TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) { constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -200,7 +192,7 @@ TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z)); @@ -249,12 +241,12 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) { constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z; constexpr auto out_size_w = in_size_w + blt_size_w + brb_size_w; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -308,7 +300,7 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z * out_size_w)); @@ -353,12 +345,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -388,7 +380,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x)); @@ -429,12 +421,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -464,7 +456,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x)); @@ -507,12 +499,12 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) { constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -532,7 +524,7 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto b = 0; b < out_size_b; ++b) { // B for (auto f = 0; f < out_size_f; ++f) { // F @@ -587,12 +579,12 @@ TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) { constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z; constexpr auto out_size_w = in_size_w + blt_size_w + brb_size_w; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -612,7 +604,7 @@ TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto b = 0; b < out_size_b; ++b) { // B for (auto f = 0; f < out_size_f; ++f) { // F @@ -664,12 +656,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -701,7 +693,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x)); @@ -743,12 +735,12 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) { constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, tensor{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, tensor{ in_size_b, in_size_f, in_size_x, in_size_y, in_size_z } }); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -781,7 +773,7 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z)); @@ -829,12 +821,12 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) { constexpr auto out_size_z = in_size_z + blt_size_z + brb_size_z; constexpr auto out_size_w = in_size_w + blt_size_w + brb_size_w; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(in_size_b), feature(in_size_f), spatial(in_size_x, in_size_y, in_size_z, in_size_w) } }); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -847,26 +839,26 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) { 1, -2, 3, -4, 5, 6, 7, 8, - 2, -3, 4, -5, + 2, -3, 4, -5, 15, 4, 4, 4, 2, -6, 13, -14, - 3, 7, 7, 7, + 3, 7, 7, 7, }; std::vector out_data = { - 2, -3, 4, -5, + 2, -3, 4, -5, 15, 4, 4, 4, 1, -2, 3, -4, 5, 6, 7, 8, - 2, -3, 4, -5, - 15, 4, 4, 4, + 2, -3, 4, -5, + 15, 4, 4, 4, - 2, -6, 13, -14, + 2, -6, 13, -14, 3, 7, 7, 7, - 2, -3, 4, -5, + 2, -3, 4, -5, 15, 4, 4, 4, }; set_values(input, input_data); @@ -876,7 +868,7 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x * out_size_z * out_size_w)); @@ -921,12 +913,12 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -958,7 +950,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(out_data.size(), static_cast(out_size_b * out_size_f * out_size_y * out_size_x)); @@ -996,12 +988,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -1021,7 +1013,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto b = 0; b < out_size_b; ++b) { // B for (auto f = 0; f < out_size_f; ++f) { // F @@ -1068,12 +1060,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -1092,7 +1084,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto b = 0; b < out_size_b; ++b) { // B for (auto f = 0; f < out_size_f; ++f) { // F @@ -1135,12 +1127,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -1158,7 +1150,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto b = 0; b < out_size_b; ++b) { // B for (auto f = 0; f < out_size_f; ++f) { // F @@ -1201,12 +1193,12 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) { constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y; constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x; - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, tensor{in_size_b, in_size_f, in_size_x, in_size_y}}); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( border("output", "input", @@ -1224,7 +1216,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto b = 0; b < out_size_b; ++b) { // B for (auto f = 0; f < out_size_f; ++f) { // F diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp index 9784a70a916..8ae447221ed 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp @@ -2,18 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include -#include - -#include "test_utils/test_utils.h" -#include "test_utils/uniform_quantized_real_distribution.hpp" +#include +#include #include @@ -23,8 +15,7 @@ using namespace ::tests; template void start_broadcast_test(data_types cldnn_data_type, std::vector output_shape, std::vector input_shape, std::vector broadcast_axes, - std::vector golden_data) -{ + std::vector golden_data) { size_t input_data_size = accumulate(input_shape.rbegin(), input_shape.rend(), (size_t)1, std::multiplies()); EXPECT_GE(input_data_size, (size_t)1); std::vector input_data = {}; @@ -51,11 +42,11 @@ void start_broadcast_test(data_types cldnn_data_type, std::vector output fixed_b_axes.push_back((uint16_t) (broadcast_axes.at(i) + shift)); } - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {cldnn_data_type, format::bfyx, {input_4d.at(0), input_4d.at(1), input_4d.at(3), input_4d.at(2)}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({cldnn_data_type, format::bfyx, {input_4d.at(0), input_4d.at(1), input_4d.at(3), input_4d.at(2)}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {output_4d.at(0), output_4d.at(1), output_4d.at(3), output_4d.at(2)}, fixed_b_axes)); set_values(input, input_data); @@ -65,7 +56,7 @@ void start_broadcast_test(data_types cldnn_data_type, std::vector output auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (tensor::value_type b = 0; b < output_4d.at(0); ++b) { for (tensor::value_type f = 0; f < output_4d.at(1); ++f) { @@ -110,11 +101,11 @@ void start_broadcast_test_5d(data_types cldnn_data_type, std::vector out fixed_b_axes.push_back((uint16_t)(broadcast_axes.at(i) + shift)); } - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { cldnn_data_type, format::bfzyx,{ input_5d.at(0), input_5d.at(1), input_5d.at(4), input_5d.at(3), input_5d.at(2) } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ cldnn_data_type, format::bfzyx,{ input_5d.at(0), input_5d.at(1), input_5d.at(4), input_5d.at(3), input_5d.at(2) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", { output_5d.at(0), output_5d.at(1), output_5d.at(4), output_5d.at(3), output_5d.at(2) }, fixed_b_axes)); set_values(input, input_data); @@ -124,7 +115,7 @@ void start_broadcast_test_5d(data_types cldnn_data_type, std::vector out auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (tensor::value_type b = 0; b < output_5d.at(0); ++b) { for (tensor::value_type f = 0; f < output_5d.at(1); ++f) { @@ -1438,11 +1429,11 @@ TEST(broadcast_gpu_int64_t, bfyx_2_to_2x3x4x5_w_b_axes_1_2_3) { TEST(broadcast_gpu, basic_error_wrong_b_axes_size) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 1, 2, 3, 4})); std::string msg_to_find = "Incorrect parameters configuration: broadcast_axes size should be less or equal 4."; @@ -1451,11 +1442,11 @@ TEST(broadcast_gpu, basic_error_wrong_b_axes_size) { TEST(broadcast_gpu, basic_error_wrong_b_axis_value) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 4})); std::string msg_to_find = "Incorrect parameters configuration: broadcast_axes index should be within broadcast_sizes range."; @@ -1464,11 +1455,11 @@ TEST(broadcast_gpu, basic_error_wrong_b_axis_value) { TEST(broadcast_gpu, basic_error_duplicate_b_axis_values) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 1, 1})); std::string msg_to_find = "Incorrect parameters configuration: Duplicate axes numbers was found in broadcast_axes."; @@ -1477,11 +1468,11 @@ TEST(broadcast_gpu, basic_error_duplicate_b_axis_values) { TEST(broadcast_gpu, basic_error_wrong_input_dimension_0) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 3, 4, 5}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {2, 3, 4, 5}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {2, 3, 4, 5}, {1})); std::string msg_to_find = "Input size on dimension number 0(=2) is not equal to: (=1)"; @@ -1490,11 +1481,11 @@ TEST(broadcast_gpu, basic_error_wrong_input_dimension_0) { TEST(broadcast_gpu, basic_error_not_dividable_2x3x4x5_to_3x3x4x5) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 3, 4, 5}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {2, 3, 4, 5}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {3, 3, 4, 5}, {})); std::string msg_to_find = "Invalid broadcast size: not dividable by input size"; @@ -1503,11 +1494,11 @@ TEST(broadcast_gpu, basic_error_not_dividable_2x3x4x5_to_3x3x4x5) { TEST(broadcast_gpu, basic_error_not_dividable_3_to_2x3x4x5_w_b_axes_0x1x3) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 3, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 3, 1}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 1, 3})); std::string msg_to_find = "Invalid broadcast size: not dividable by input size"; @@ -1516,11 +1507,11 @@ TEST(broadcast_gpu, basic_error_not_dividable_3_to_2x3x4x5_w_b_axes_0x1x3) { TEST(broadcast_gpu, basic_error_not_dividable_4x5_to_3x4x5_w_b_axes_1) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 5, 4}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 5, 4}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(broadcast("output", "input", {2, 3, 4, 5}, {1})); std::string msg_to_find = "Invalid broadcast size: not dividable by input size"; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp index 89935b79549..469d0b3103f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/cache_test.cpp @@ -2,18 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include - #include "test_utils.h" -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include #include #include @@ -172,13 +165,13 @@ void remove(const std::string& filename) { class cache_test_helper { public: - cache_test_helper(cldnn::engine engine, cache_version v) + cache_test_helper(cldnn::engine& engine, cache_version v) : _engine(engine) , _mode(cldnn::tuning_mode::tuning_disabled) , cache_filename(get_temporary_cache_file()) { auto cache = get_cache_version(v); - auto eus = engine.get_info().cores_count; + auto eus = engine.get_device_info().execution_units_count; replace(cache, eus_marker, eus); write(cache_filename, cache); @@ -210,7 +203,7 @@ public: } void test() { - auto w_mem = cldnn::memory::allocate(_engine, cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 16, 16, 1, 1 })); + auto w_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 16, 16, 1, 1 })); auto topology = cldnn::topology( cldnn::input_layout("input", cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })), cldnn::data("weights", w_mem), @@ -225,7 +218,7 @@ public: cldnn::build_option::optimize_data(true) ); auto network = cldnn::network(_engine, topology, build_opts); - auto in_mem = cldnn::memory::allocate(_engine, cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })); + auto in_mem = _engine.allocate_memory(cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 16, 3, 3 })); network.set_input_data("input", in_mem); network.execute(); @@ -247,7 +240,7 @@ public: if (compare_cache.compare) { auto cache = read(cache_filename); auto expected_cache = get_cache_version(compare_cache.value); - auto eus = _engine.get_info().cores_count; + auto eus = _engine.get_device_info().execution_units_count; replace(expected_cache, eus_marker, eus); EXPECT_EQ(cache, expected_cache); @@ -266,7 +259,7 @@ private: optional_compare(T v, bool neq) : compare(true), not_equal(neq), value(v) {} }; - cldnn::engine _engine; + cldnn::engine& _engine; cldnn::tuning_mode _mode; @@ -311,7 +304,7 @@ public: TEST(cache_test, no_cache_baseline) { SCOPED_TRACE("default implementation same as reference, cache tests may provide invalid pass"); - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); auto helper = cache_test_helper(engine, cache_version::version_2); helper.with_mode(cldnn::tuning_mode::tuning_disabled) @@ -321,7 +314,7 @@ TEST(cache_test, no_cache_baseline) { TEST_P(cache_version_test, use_only) { auto version = GetParam(); - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); cache_test_helper helper(engine, version); helper.with_mode(cldnn::tuning_mode::tuning_use_cache) @@ -337,7 +330,7 @@ TEST_P(cache_version_test, update) { ex_version = cache_version::version_2_from_1; } - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); cache_test_helper helper(engine, version); helper.with_mode(cldnn::tuning_mode::tuning_use_and_update) @@ -353,7 +346,7 @@ INSTANTIATE_TEST_CASE_P( cache_version_test::to_string); TEST(cache_test, remove_invalid) { - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); cache_test_helper helper(engine, cache_version::version_2_invalid); helper.with_mode(cldnn::tuning_mode::tuning_use_and_update) diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/cl_mem_input_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/cl_mem_input_test.cpp index 2ff579ca8ff..bf5d79b74fa 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/cl_mem_input_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/cl_mem_input_test.cpp @@ -2,23 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/activation.hpp" -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include -#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; typedef std::chrono::high_resolution_clock Time; typedef std::chrono::nanoseconds ns; @@ -26,10 +21,8 @@ typedef std::chrono::duration> ms; typedef std::chrono::duration fsec; -void checkStatus(int status, const char *message) -{ - if (status != 0) - { +void checkStatus(int status, const char *message) { + if (status != 0) { std::string str_message(message + std::string(": ")); std::string str_number(std::to_string(status)); @@ -37,8 +30,7 @@ void checkStatus(int status, const char *message) } } -std::vector createSampleData(int width, int height) -{ +std::vector createSampleData(int width, int height) { int data_size = width * (height + height / 2); auto data = std::vector(data_size); srand((unsigned)time(0)); @@ -57,8 +49,7 @@ std::vector createSampleData(int width, int height) return data; } -std::vector createReferenceData(std::vector data, int width, int height, cldnn::format format) -{ +std::vector createReferenceData(std::vector data, int width, int height, cldnn::format format) { auto img = std::vector(width * height * 3); for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { @@ -78,8 +69,7 @@ std::vector createReferenceData(std::vector data, int widt img[j + width * i] = R; img[j + width * i + width * height] = G; img[j + width * i + width * height * 2] = B; - } - else { //byxf + } else { //byxf img[3* width*i + 3 * j] = R; img[3 * width * i + 3 * j + 1] = G; img[3 * width*i + 3 * j + 2] = B; @@ -90,14 +80,12 @@ std::vector createReferenceData(std::vector data, int widt return img; } -struct OpenCL -{ +struct OpenCL { cl::Context _context; cl::Device _device; cl::CommandQueue _queue; - OpenCL() - { + OpenCL() { // get Intel iGPU OCL device, create context and queue { static constexpr auto INTEL_PLATFORM_VENDOR = "Intel(R) Corporation"; @@ -136,8 +124,7 @@ struct OpenCL _queue = cl::CommandQueue(_context, _device, props); } } - void releaseOclImage(std::shared_ptr image) - { + void releaseOclImage(std::shared_ptr image) { checkStatus(clReleaseMemObject(*image), "clReleaseMemObject"); } }; @@ -153,9 +140,9 @@ TEST(cl_mem_check, check_2_inputs) { image_format.image_channel_order = CL_R; image_format.image_channel_data_type = CL_UNORM_INT8; cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0, - 0, 0, 0, 0, 0, NULL }; + 0, 0, 0, 0, 0, { nullptr } }; - cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err); + cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); checkStatus(err, "Creating nv12 image plane_y failed"); image_format.image_channel_order = CL_RG; @@ -163,38 +150,38 @@ TEST(cl_mem_check, check_2_inputs) { image_desc.image_height = height / 2; image_desc.image_depth = 1; - cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err); + cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); checkStatus(err, "Creating nv12 image plane_uv failed"); size_t origin[3] = { 0, 0, 0 }; size_t y_region[3] = { (size_t)width, (size_t)height, 1 }; size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 }; - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, NULL, NULL); + err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr); checkStatus(err, "Writing nv12 image plane_y failed"); - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, NULL, NULL); + err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr); checkStatus(err, "Writing nv12 image plane_uv failed"); - device_query query(static_cast(ocl_instance->_context.get())); + device_query query(engine_types::ocl, runtime_types::ocl, static_cast(ocl_instance->_context.get())); auto devices = query.get_available_devices(); auto engine_config = cldnn::engine_configuration(); - engine engine(devices.begin()->second, engine_config); + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second, engine_config); auto input = input_layout("input", { data_types::i8, format::nv12, {1,1,height,width} }); auto input2 = input_layout("input2", { data_types::i8, format::nv12, {1,1,height / 2,width / 2} }); auto output_format = cldnn::format::byxf; layout output_layout(data_types::f32, output_format, { 1,3,height,width }); - auto input_memory = cldnn::memory::share_image(engine, input.layout, nv12_image_plane_y, 0); - auto input_memory2 = cldnn::memory::share_image(engine, input2.layout, nv12_image_plane_uv, 0); + auto input_memory = engine->share_image(input.layout, nv12_image_plane_y); + auto input_memory2 = engine->share_image(input2.layout, nv12_image_plane_uv); topology topology; topology.add(input); topology.add(input2); topology.add(reorder("reorder", "input", "input2", output_layout)); - network network(engine, topology); + network network(*engine, topology); network.set_input_data("input", input_memory); network.set_input_data("input2", input_memory2); @@ -202,7 +189,7 @@ TEST(cl_mem_check, check_2_inputs) { std::vector reference_results = createReferenceData(data, width, height, output_format); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); int size = width * height * 3; for (auto i = 0; i < size; i++) { EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f); @@ -222,26 +209,26 @@ TEST(cl_mem_check, check_input) { image_format.image_channel_order = CL_R; image_format.image_channel_data_type = CL_UNORM_INT8; cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0, - 0, 0, 0, 0, 0, NULL }; + 0, 0, 0, 0, 0, { nullptr } }; - cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err); + cl_mem nv12_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); checkStatus(err, "Creating nv12 image plane_y failed"); image_format.image_channel_order = CL_RG; image_desc.image_width = width / 2; image_desc.image_height = height / 2; - cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err); + cl_mem nv12_image_plane_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); checkStatus(err, "Creating nv12 image plane_uv failed"); size_t origin[3] = { 0, 0, 0 }; size_t y_region[3] = { (size_t)width, (size_t)height, 1 }; size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 }; - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, NULL, NULL); + err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr); checkStatus(err, "Writing nv12 image plane_y failed"); - err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, NULL, NULL); + err = clEnqueueWriteImage(ocl_instance->_queue.get(), nv12_image_plane_uv, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr); checkStatus(err, "Writing nv12 image plane_uv failed"); image_format.image_channel_order = CL_NV12_INTEL; @@ -267,12 +254,12 @@ TEST(cl_mem_check, check_input) { image_desc.image_depth = 0; image_format.image_channel_order = CL_R; - cl_mem img_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err); + cl_mem img_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); checkStatus(err, "Creating nv12 image plane_y failed"); image_desc.image_depth = 1; image_format.image_channel_order = CL_RG; - cl_mem img_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, NULL, &err); + cl_mem img_uv = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); checkStatus(err, "Creating nv12 image plane_uv failed"); size_t regionY[] = { (size_t)width, (size_t)height, 1 }; @@ -288,30 +275,29 @@ TEST(cl_mem_check, check_input) { checkStatus(clReleaseMemObject(nv12_image_plane_uv), "clReleaseMemObject"); checkStatus(clReleaseMemObject(nv12_image_plane_y), "clReleaseMemObject"); - device_query query(static_cast(ocl_instance->_context.get())); + device_query query(engine_types::ocl, runtime_types::ocl, static_cast(ocl_instance->_context.get())); auto devices = query.get_available_devices(); - auto engine_config = cldnn::engine_configuration(); - engine engine(devices.begin()->second, engine_config); + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second); auto input = input_layout("input", { data_types::i8, format::nv12, {1,1,height,width} }); auto output_format = cldnn::format::byxf; layout output_layout(data_types::f32, output_format, { 1,3,height,width }); - auto input_memory = cldnn::memory::share_image(engine, input.layout, img, 0); + auto input_memory = engine->share_image(input.layout, img); topology topology; topology.add(input); topology.add(reorder("reorder", "input", output_layout)); - network network(engine, topology); + network network(*engine, topology); network.set_input_data("input", input_memory); auto outputs = network.execute(); std::vector reference_results = createReferenceData(data, width, height, output_format); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); int size = width * height * 3; for (auto i = 0; i < size; i++) { EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp index a07688ee671..0fed8ea95e4 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp @@ -3,27 +3,24 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include #include "test_utils/test_utils.h" -#include "api/arg_max_min.hpp" + +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace std; // Run some topology too see if command queue does work correctly // Coppied from arg_max_gpu.base test. -void exexute_network(cldnn::engine engine) -{ +void exexute_network(cldnn::engine& engine) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(arg_max_min("arg_max", { "input" }, arg_max_min::max)); vector input_vec = { @@ -48,11 +45,10 @@ void exexute_network(cldnn::engine engine) EXPECT_EQ(outputs.begin()->first, "arg_max"); auto output = outputs.at("arg_max").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[batch_num]; - for (uint32_t i = 0; i < batch_num; i++) - { - out_buffer[i] = get_value(output_ptr, i); + for (uint32_t i = 0; i < batch_num; i++) { + out_buffer[i] = get_value(output_ptr.data(), i); } int size = x_size * y_size * feature_num; int index; @@ -62,8 +58,7 @@ void exexute_network(cldnn::engine engine) EXPECT_LT(out_buffer[i], size); index = (int)out_buffer[i]; value = input_vec[i*size + (int)index]; - for (int j = 0; j < size; j++) - { + for (int j = 0; j < size; j++) { EXPECT_LE(input_vec[i*size + j], value); } } @@ -73,49 +68,34 @@ TEST(command_queue_test, test_priority_hints) { engine_configuration configuration = engine_configuration( false, // profiling - false, // decorate_kernel_names - false, // dump_custom_program - "", // options - "", // single_kernel - true, // primitives_parallelisation - "", // engine_log + queue_types::out_of_order, "", // sources_dumps_dir priority_mode_types::low, throttle_mode_types::disabled); - cldnn::engine engine(configuration); - exexute_network(engine); + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); + exexute_network(*engine); } TEST(command_queue_test, test_throttle_hints) { engine_configuration configuration = engine_configuration( false, // profiling - false, // decorate_kernel_names - false, // dump_custom_program - "", // options - "", // single_kernel - true, // primitives_parallelisation - "", // engine_log + queue_types::out_of_order, "", // sources_dumps_dir priority_mode_types::disabled, throttle_mode_types::high); - cldnn::engine engine(configuration); - exexute_network(engine); + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); + exexute_network(*engine); } TEST(command_queue_test, test_priority_and_throttle_hints) { engine_configuration configuration = engine_configuration( false, // profiling - false, // decorate_kernel_names - false, // dump_custom_program - "", // options - "", // single_kernel - true, // primitives_parallelisation - "", // engine_log + queue_types::out_of_order, "", // sources_dumps_dir priority_mode_types::high, throttle_mode_types::low); - cldnn::engine engine(configuration); - exexute_network(engine); -} \ No newline at end of file + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, configuration); + exexute_network(*engine); +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp index 9294d5e5d7c..10f051b9ff4 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/concatenation_gpu_test.cpp @@ -4,17 +4,13 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include "api/memory.hpp" -#include -#include "api/convolution.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include "test_utils/float16.h" -#include +#include "test_utils.h" + +#include +#include +#include +#include + #include #include #include @@ -22,10 +18,9 @@ #include #include #include -#include using namespace cldnn; -using namespace tests; +using namespace ::tests; namespace cldnn { @@ -34,13 +29,13 @@ namespace cldnn TEST(concat_gpu, mixed_input_types) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 3 } }); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 1, 4, 3 } }); - auto input2 = memory::allocate(engine, { data_types::i8, format::bfyx, { 1, 1, 4, 3 } }); - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 4, 3 } }); - auto input4 = memory::allocate(engine, { data_types::i64, format::bfyx, { 1, 1, 4, 3 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 3 } }); + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 4, 3 } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 4, 3 } }); + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 4, 3 } }); + auto input4 = engine.allocate_memory({ data_types::i64, format::bfyx, { 1, 1, 4, 3 } }); set_values(input0, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(input1, { 11, 12, 13, 14, 12, 12, 13, 14, 13, 13, 13, 15 }); @@ -59,11 +54,11 @@ TEST(concat_gpu, mixed_input_types) { 41.0f, 42.0f, 43.0f, 44.0f, 42.0f, 42.0f, 43.0f, 44.0f, 43.0f, 43.0f, 43.0f, 45.0f }; topology topology( - input_layout("input0", input0.get_layout()), - input_layout("input1", input1.get_layout()), - input_layout("input2", input2.get_layout()), - input_layout("input3", input3.get_layout()), - input_layout("input4", input4.get_layout()), + input_layout("input0", input0->get_layout()), + input_layout("input1", input1->get_layout()), + input_layout("input2", input2->get_layout()), + input_layout("input3", input3->get_layout()), + input_layout("input4", input4->get_layout()), concatenation("concat", { "input0", "input1", "input2", "input3", "input4" }, concatenation::concatenation_axis::along_f, @@ -83,8 +78,8 @@ TEST(concat_gpu, mixed_input_types) { EXPECT_EQ(outputs.begin()->first, "concat"); auto output_memory = outputs.at("concat").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -102,12 +97,12 @@ TEST(concat_gpu, mixed_input_types) { } TEST(concat_gpu, mixed_input_types_5d) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); - auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); + auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); + auto input2 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 1, 1, 4, 3 } }); set_values(input0, { half_t(1.0f), half_t(2.0f), half_t(3.0f), half_t(4.0f), half_t(2.0f), half_t(2.0f), @@ -133,10 +128,10 @@ TEST(concat_gpu, mixed_input_types_5d) { 31.0f, 32.0f, 33.0f, 34.0f, 32.0f, 32.0f, 33.0f, 34.0f, 33.0f, 33.0f, 33.0f, 35.0f }; topology topology( - input_layout("input0", input0.get_layout()), - input_layout("input1", input1.get_layout()), - input_layout("input2", input2.get_layout()), - input_layout("input3", input3.get_layout()), + input_layout("input0", input0->get_layout()), + input_layout("input1", input1->get_layout()), + input_layout("input2", input2->get_layout()), + input_layout("input3", input3->get_layout()), concatenation("concat", { "input0", "input1", "input2", "input3" }, concatenation::concatenation_axis::along_f, @@ -155,8 +150,8 @@ TEST(concat_gpu, mixed_input_types_5d) { EXPECT_EQ(outputs.begin()->first, "concat"); auto output_memory = outputs.at("concat").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -176,10 +171,10 @@ TEST(concat_gpu, mixed_input_types_5d) { } TEST(concat_gpu, i8_optimization_with_pool) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 8, 3}}); - auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 8, 3}}); + auto input0 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 8, 3}}); + auto input1 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 8, 3}}); set_values(input0, { 11, 12, 13, @@ -207,8 +202,8 @@ TEST(concat_gpu, i8_optimization_with_pool) { 18, 14, -13, 15}; layout reorder_layout(data_types::i8, format::yxfb, {7, 2, 2, 1}); - topology topology(input_layout("input0", input0.get_layout()), - input_layout("input1", input1.get_layout()), + topology topology(input_layout("input0", input0->get_layout()), + input_layout("input1", input1->get_layout()), pooling("pool0", "input0", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}), pooling("pool1", "input1", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}), concatenation("concat", @@ -228,8 +223,8 @@ TEST(concat_gpu, i8_optimization_with_pool) { EXPECT_EQ(outputs.begin()->first, "reorder"); auto output_memory = outputs.at("reorder").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[0]; int x_size = output_layout.size.spatial[1]; @@ -279,12 +274,12 @@ TEST(concat_gpu, i8_optimization_with_conv) { // Output: // 53 54 30 // 52 47 37 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}}); - auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}}); - auto input2 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 1, 5, 4}}); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx, { 1, 3, 3, 2 } }); + auto input0 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 5, 4}}); + auto input1 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 5, 4}}); + auto input2 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 5, 4}}); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 3, 3, 2 } }); set_values(weights, { 1, 2, 1, 2, 1, 2, 1, 2, 1, @@ -303,14 +298,14 @@ TEST(concat_gpu, i8_optimization_with_conv) { 1, 2, -2, 4, 2, 3, 5, 3, -3, 1, 5, 4, 3, 2, 1 }); - + VF output_vec = { 53, 54, 30, 52, 47, 37 }; - + layout reorder_layout(data_types::i8, format::bfyx, {1, 1, 2, 3}); - topology topology(input_layout("input0", input0.get_layout()), - input_layout("input1", input1.get_layout()), - input_layout("input2", input2.get_layout()), + topology topology(input_layout("input0", input0->get_layout()), + input_layout("input1", input1->get_layout()), + input_layout("input2", input2->get_layout()), concatenation("concat", {"input0", "input1", "input2"}, concatenation::concatenation_axis::along_f, @@ -331,8 +326,8 @@ TEST(concat_gpu, i8_optimization_with_conv) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -358,20 +353,20 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { // Output : 1x1x3 // // Input0: - // -3 6 0 2 -1 -1 6 0 5 4 1 6 2 4 0 5 - // -2 -1 1 0 2 3 3 3 6 2 4 7 3 6 7 -1 - // 7 7 5 -3 1 -1 5 4 0 3 -2 6 2 5 2 4 - // 5 -1 3 6 2 0 -3 -1 0 3 0 -1 1 6 1 6 - // 5 -2 2 -1 5 6 3 4 1 0 6 6 7 2 6 3 - // 6 7 -1 5 5 6 -1 0 -1 5 5 2 3 -1 -3 4 + // -3 6 0 2 -1 -1 6 0 5 4 1 6 2 4 0 5 + // -2 -1 1 0 2 3 3 3 6 2 4 7 3 6 7 -1 + // 7 7 5 -3 1 -1 5 4 0 3 -2 6 2 5 2 4 + // 5 -1 3 6 2 0 -3 -1 0 3 0 -1 1 6 1 6 + // 5 -2 2 -1 5 6 3 4 1 0 6 6 7 2 6 3 + // 6 7 -1 5 5 6 -1 0 -1 5 5 2 3 -1 -3 4 // // Input1: - // 4 -2 0 0 6 2 0 4 6 4 4 4 -3 -1 4 -3 - // 1 0 -1 5 -1 1 4 2 7 7 0 2 3 4 -1 3 - // 7 7 2 -3 -1 5 -2 2 6 -3 0 7 0 3 3 3 - // -1 0 -2 -2 7 -3 -3 -1 5 0 3 4 0 -1 2 5 - // 2 -1 2 -3 0 -3 -3 2 4 3 3 5 5 7 5 1 - // 2 2 -3 6 6 7 1 -1 -2 5 1 -1 4 5 -3 -2 + // 4 -2 0 0 6 2 0 4 6 4 4 4 -3 -1 4 -3 + // 1 0 -1 5 -1 1 4 2 7 7 0 2 3 4 -1 3 + // 7 7 2 -3 -1 5 -2 2 6 -3 0 7 0 3 3 3 + // -1 0 -2 -2 7 -3 -3 -1 5 0 3 4 0 -1 2 5 + // 2 -1 2 -3 0 -3 -3 2 4 3 3 5 5 7 5 1 + // 2 2 -3 6 6 7 1 -1 -2 5 1 -1 4 5 -3 -2 // // Filters: // -1, 2, -2, 2, -2, 1, 1, 0, -1, 1, 2, -2, 2, 1, -2, 0, @@ -382,11 +377,11 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { // Output: // -14, -35, -10 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 16, 3, 2}}); - auto input1 = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 16, 3, 2}}); - auto weights = memory::allocate(engine, {data_types::i8, format::bfyx, {1, 32, 2, 1}}); + auto input0 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 16, 3, 2}}); + auto input1 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 16, 3, 2}}); + auto weights = engine.allocate_memory({data_types::i8, format::bfyx, {1, 32, 2, 1}}); set_values(weights, {-1, 2, -2, 2, -2, 1, 1, 0, -1, 1, 2, -2, 2, 1, -2, 0, 0, -2, -2, -2, -2, -1, 2, 1, 2, -1, -1, 0, 2, -2, -2, 1, 0, -2, 0, 1, -2, -1, -2, 0, -1, -1, -2, 1, -2, 0, 1, 2, 2, 2, 2, -2, 0, 2, 1, -2, -1, -1, 0, -2, 2, -1, 2, -1}); @@ -394,7 +389,7 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { set_values(input0, {-3, 6, 0, 2, -1, -1, 6, 0, 5, 4, 1, 6, 2, 4, 0, 5, -2, -1, 1, 0, 2, 3, 3, 3, 6, 2, 4, 7, 3, 6, 7, -1, 7, 7, 5, -3, 1, -1, 5, 4, 0, 3, -2, 6, 2, 5, 2, 4, - 5, -1, 3, 6, 2, 0, -3, -1, 0, 3, 0, -1, 1, 6, 1, 6, + 5, -1, 3, 6, 2, 0, -3, -1, 0, 3, 0, -1, 1, 6, 1, 6, 5, -2, 2, -1, 5, 6, 3, 4, 1, 0, 6, 6, 7, 2, 6, 3, 6, 7, -1, 5, 5, 6, -1, 0, -1, 5, 5, 2, 3, -1, -3, 4 }); @@ -404,12 +399,12 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { -1, 0, -2, -2, 7, -3, -3, -1, 5, 0, 3, 4, 0, -1, 2, 5, 2, -1, 2, -3, 0, -3, -3, 2, 4, 3, 3, 5, 5, 7, 5, 1, 2, 2, -3, 6, 6, 7, 1, -1, -2, 5, 1, -1, 4, 5, -3, -2}); - + VF output_vec = { -14, -35, -10 }; layout reorder_layout(data_types::i8, format::bfyx, {1, 1, 3, 1}); - topology topology(input_layout("input0", input0.get_layout()), - input_layout("input1", input1.get_layout()), + topology topology(input_layout("input0", input0->get_layout()), + input_layout("input1", input1->get_layout()), pooling("pool0", "input0", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}), pooling("pool1", "input1", pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}), concatenation("concat", @@ -431,8 +426,8 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[0]; int x_size = output_layout.size.spatial[1]; @@ -527,7 +522,7 @@ public: void test(format::type fmt) { auto data_type = type_to_data_type::value; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const size_t batch_num = testing::get<0>(GetParam()); const std::vector in_features = testing::get<1>(GetParam()); const size_t input_y = testing::get<2>(GetParam()); @@ -539,7 +534,7 @@ public: topology topology; std::vector> in_data; - std::vector in_memory; + std::vector in_memory; std::vector input_ids; for (size_t i = 0; i < in_features.size(); i++) { auto size = tensor(static_cast(batch_num), @@ -563,7 +558,7 @@ public: } } - auto in_mem = memory::allocate(engine, in_lay); + auto in_mem = engine.allocate_memory(in_lay); set_values(in_mem, data_flat); in_memory.push_back(in_mem); @@ -585,7 +580,7 @@ public: network.execute(); auto out_mem = network.get_output("concat").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (size_t bi = 0; bi < batch_num; bi++) { size_t f_sum = 0; @@ -594,7 +589,7 @@ public: for (size_t yi = 0; yi < input_y; yi++) { for (size_t xi = 0; xi < input_x; xi++) { auto output_coords = tensor(batch(bi), feature(f_sum + fi), spatial(xi, yi, 0, 0)); - auto output_offset = out_mem.get_layout().get_linear_offset(output_coords); + auto output_offset = out_mem->get_layout().get_linear_offset(output_coords); auto ref_val = in_data[in_i][bi][fi][yi][xi]; auto actual_val = out_ptr[output_offset]; @@ -651,7 +646,7 @@ public: void test(format::type fmt) { auto data_type = type_to_data_type::value; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const size_t batch_num = testing::get<0>(GetParam()); const std::vector in_features = testing::get<1>(GetParam()); const size_t input_y = testing::get<2>(GetParam()); @@ -663,7 +658,7 @@ public: topology topology; std::vector> in_data; - std::vector in_memory; + std::vector in_memory; std::vector input_ids; for (size_t i = 0; i < in_features.size(); i++) { auto size = tensor(static_cast(batch_num), @@ -687,7 +682,7 @@ public: } } - auto in_mem = memory::allocate(engine, in_lay); + auto in_mem = engine.allocate_memory(in_lay); set_values(in_mem, data_flat); in_memory.push_back(in_mem); @@ -699,9 +694,11 @@ public: topology.add(concatenation("concat", input_ids, concatenation::concatenation_axis::along_f)); // Add identity convolution auto weights_lay = cldnn::layout(data_type, cldnn::format::bfyx, tensor(batch(output_f), feature(output_f))); - auto weights_mem = cldnn::memory::allocate(engine, weights_lay); + auto weights_mem = engine.allocate_memory(weights_lay); + weights_mem->fill(get_test_stream()); + get_test_stream().finish(); { - auto weights_ptr = weights_mem.pointer(); + cldnn::mem_lock weights_ptr(weights_mem, get_test_stream()); for (size_t fi = 0; fi < output_f; ++fi) { auto coords = tensor(batch(fi), feature(fi), spatial(0, 0, 0, 0)); auto offset = weights_lay.get_linear_offset(coords); @@ -724,7 +721,8 @@ public: network.execute(); auto out_mem = network.get_output("conv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); + ASSERT_EQ(out_mem->get_layout().format, fmt); for (size_t bi = 0; bi < batch_num; bi++) { size_t f_sum = 0; @@ -733,11 +731,11 @@ public: for (size_t yi = 0; yi < input_y; yi++) { for (size_t xi = 0; xi < input_x; xi++) { auto output_coords = tensor(batch(bi), feature(f_sum + fi), spatial(xi, yi, 0, 0)); - auto output_offset = out_mem.get_layout().get_linear_offset(output_coords); + auto output_offset = out_mem->get_layout().get_linear_offset(output_coords); auto ref_val = in_data[in_i][bi][fi][yi][xi]; auto actual_val = static_cast(out_ptr[output_offset]); - EXPECT_EQ(ref_val, actual_val) + ASSERT_EQ(ref_val, actual_val) << " b=" << bi << ", f=" << f_sum + fi << "(input " << in_i << "), y=" << yi << ", x=" << xi; } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp index 34a7c99dfb3..c8c066e4948 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp @@ -3,31 +3,26 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include +#include #include using namespace cldnn; using namespace ::tests; -bool is_output_equal(const cldnn::memory& mem, const std::vector& ref) +bool is_output_equal(const cldnn::memory::ptr mem, const std::vector& ref) { - auto ptr = mem.pointer(); - for (size_t i = 0; i < mem.get_layout().count(); i++) - { + cldnn::mem_lock ptr(mem, get_test_stream()); + for (size_t i = 0; i < mem->get_layout().count(); i++) { if (!are_equal(ptr[i], ref[i])) return false; } return true; @@ -36,14 +31,11 @@ bool is_output_equal(const cldnn::memory& mem, const std::vector& ref) topology generate_simple_branch (bool branch_true_false, const primitive_id& input_id) { topology branch; - if (branch_true_false) - { + if (branch_true_false) { branch.add( pooling(input_id + "_when_true", input_id, cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 }) ); - } - else - { + } else { branch.add( pooling(input_id + "_when_false", input_id, cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 }) ); @@ -52,29 +44,29 @@ topology generate_simple_branch (bool branch_true_false, const primitive_id& inp } TEST(DISABLED_condition_gpu, basic_equal_comp) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto scale_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + topology branch_true = generate_simple_branch(true, "condi"); topology branch_false = generate_simple_branch(false, "condi"); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) - ); + input_layout("compare", compare->get_layout()) + ); topology.add( - input_layout("scale_data", scale_mem.get_layout()) + input_layout("scale_data", scale_mem->get_layout()) ); topology.add( condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL) - ); + ); topology.add( scale("output", "condi", "scale_data") ); @@ -104,32 +96,31 @@ TEST(DISABLED_condition_gpu, basic_equal_comp) { } TEST(DISABLED_condition_gpu, basic_range_equal_comp) { - - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } }); topology branch_true = generate_simple_branch(true, "condi"); topology branch_false = generate_simple_branch(false, "condi"); topology topology; topology.add( - input_layout("input0", input0.get_layout()) + input_layout("input0", input0->get_layout()) ); topology.add( - input_layout("input1", input1.get_layout()) + input_layout("input1", input1->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( concatenation("concat", { "input0", "input1" }, concatenation::along_x) ); - topology.add( + topology.add( condition("condi", "concat", branch_true, branch_false, "compare", cond_functions::EQUAL) ); @@ -182,14 +173,10 @@ std::pair, std::vector> get_values_to_compare(const cl std::vector ret_true; std::vector ret_false; auto mem_desc = generic_test::get_linear_memory_desc(input_lay); - for (int32_t b = 0; b < range.batch[0]; b++) - { - for (int32_t f = 0; f < range.feature[0]; f++) - { - for (int32_t y = 0; y < range.spatial[1]; y++) - { - for (int32_t x = 0; x < range.spatial[0]; x++) - { + for (int32_t b = 0; b < range.batch[0]; b++) { + for (int32_t f = 0; f < range.feature[0]; f++) { + for (int32_t y = 0; y < range.spatial[1]; y++) { + for (int32_t x = 0; x < range.spatial[0]; x++) { auto linear_idx = generic_test::get_linear_index( input_lay, offset.batch[0] + b, @@ -198,17 +185,16 @@ std::pair, std::vector> get_values_to_compare(const cl offset.spatial[0] + x, mem_desc); - switch (func) - { + switch (func) { case cond_functions::EQUAL: ret_true.push_back(values.at(linear_idx)); ret_false.push_back(-1.0f); break; - case cond_functions::GREATER: + case cond_functions::GREATER: ret_true.push_back(values.at(linear_idx) - 1.0f); ret_false.push_back(99.0f); break; - case cond_functions::LESS: + case cond_functions::LESS: ret_true.push_back(values.at(linear_idx) + 1.0f); ret_false.push_back(-1.0f); break; @@ -222,10 +208,10 @@ std::pair, std::vector> get_values_to_compare(const cl TEST(DISABLED_condition_gpu, generic_test_true_false) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 2, 5, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } }); std::vector input_data(50); std::iota(input_data.begin(), input_data.end(), 0.0f); @@ -263,17 +249,14 @@ TEST(DISABLED_condition_gpu, generic_test_true_false) { 33, 36, 38, 41, 43, 46, 48 }; - for (auto const& func : functions) - { - for (auto const& range : ranges) - { - for (auto const& offset : offsets) - { - auto comp_values = get_values_to_compare(offset, range, input_data, input.get_layout(), func); + for (auto const& func : functions) { + for (auto const& range : ranges) { + for (auto const& offset : offsets) { + auto comp_values = get_values_to_compare(offset, range, input_data, input->get_layout(), func); auto comp_values_true = comp_values.first; auto comp_values_false = comp_values.second; - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx, range }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx, range }); topology branch_true; topology branch_false; @@ -286,10 +269,10 @@ TEST(DISABLED_condition_gpu, generic_test_true_false) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( condition("condi", "input", branch_true, branch_false, "compare", func, offset) @@ -324,7 +307,7 @@ TEST(DISABLED_condition_gpu, generic_test_true_false) { TEST(DISABLED_condition_gpu, basic_stacked_ifs) { - /* + /* <...> @@ -333,14 +316,14 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) { <...> - + */ - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto compare2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }); topology condi_1_true = generate_simple_branch(true, "condi"); topology condi_1_false = generate_simple_branch(false, "condi"); @@ -355,16 +338,16 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( condition("condi", "input", condi_1_true, condi_1_false, "compare", cond_functions::EQUAL) ); topology.add( - input_layout("compare2", compare2.get_layout()) + input_layout("compare2", compare2->get_layout()) ); topology.add( condition("condi2", "condi", condi_2_true, condi_2_false, "compare2", cond_functions::GREATER) @@ -406,15 +389,15 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { */ - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto compare2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }); - auto scale_5_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto compare2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 1 } }); + auto scale_5_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(scale_5_mem, { 5.0f }); - auto scale_10_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto scale_10_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(scale_10_mem, { 10.0f }); topology nested_true; @@ -433,7 +416,7 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { pooling("pooling_when_true", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 }) ); branch_true.add( - input_layout("compare2", compare2.get_layout()) + input_layout("compare2", compare2->get_layout()) ); branch_true.add( @@ -453,11 +436,11 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( @@ -488,21 +471,21 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { } TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } }); topology branch_true = generate_simple_branch(true, "condi"); topology branch_false = generate_simple_branch(false, "condi"); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL) @@ -512,21 +495,21 @@ TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) { } TEST(DISABLED_condition_gpu, negative_too_big_offset) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } }); topology branch_true = generate_simple_branch(true, "condi"); topology branch_false = generate_simple_branch(false, "condi"); topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL, {1, 1, 2, 1}) @@ -536,12 +519,12 @@ TEST(DISABLED_condition_gpu, negative_too_big_offset) { } TEST(DISABLED_condition_gpu, negative_not_same_layouts) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + topology branch_true; branch_true.add( pooling("pooling_when_true", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 }) @@ -554,10 +537,10 @@ TEST(DISABLED_condition_gpu, negative_not_same_layouts) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL) @@ -567,11 +550,11 @@ TEST(DISABLED_condition_gpu, negative_not_same_layouts) { } TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options bs; bs.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); - auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); topology branch_true; branch_true.add( @@ -585,10 +568,10 @@ TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("compare", compare.get_layout()) + input_layout("compare", compare->get_layout()) ); topology.add( condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL) @@ -596,6 +579,6 @@ TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) { topology.add( pooling("pooling_check_name", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 }) ); - + EXPECT_ANY_THROW(network net(engine, topology, bs);); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp index 66855dba938..658bd6625f3 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp @@ -4,18 +4,15 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include "api/memory.hpp" -#include -#include "api/convolution.hpp" -#include "api/eltwise.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include "test_utils/float16.h" -#include +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include + #include #include #include @@ -25,12 +22,9 @@ #include #include #include -#include -#include -#include using namespace cldnn; -using namespace tests; +using namespace ::tests; namespace cldnn { @@ -226,29 +220,23 @@ VVF reference_scale_post_op(const VVF& input, const T& scale, const T& shi return output_shrinked; } -void dump_buffer(memory const& mem, std::string const& name) -{ +void dump_buffer(memory::ptr mem, std::string const& name) { std::ofstream out(name); - auto size = mem.get_layout().get_buffer_size(); - auto ptr = mem.pointer(); - auto pitches = mem.get_layout().get_pitches(); - out << "Data size: " << mem.get_layout().size << "\n"; - out << "Lower padding: " << mem.get_layout().data_padding.lower_size() << "\n"; - out << "Upper padding: " << mem.get_layout().data_padding.upper_size() << "\n"; + auto size = mem->get_layout().get_buffer_size(); + cldnn::mem_lock ptr(mem, get_test_stream()); + auto pitches = mem->get_layout().get_pitches(); + out << "Data size: " << mem->get_layout().size << "\n"; + out << "Lower padding: " << mem->get_layout().data_padding.lower_size() << "\n"; + out << "Upper padding: " << mem->get_layout().data_padding.upper_size() << "\n"; out << "\n"; - for (int b = 0; b < size.batch[0]; ++b) - { + for (int b = 0; b < size.batch[0]; ++b) { out << " ================ BATCH " << b << " =================\n\n"; - for (int f = 0; f < size.feature[0]; ++f) - { + for (int f = 0; f < size.feature[0]; ++f) { out << "feature " << f << ":\n"; - for (int z = 0; z < size.spatial[2]; ++z) - { - for (int y = 0; y < size.spatial[1]; ++y) - { - for (int x = 0; x < size.spatial[0]; ++x) - { + for (int z = 0; z < size.spatial[2]; ++z) { + for (int y = 0; y < size.spatial[1]; ++y) { + for (int x = 0; x < size.spatial[0]; ++x) { size_t idx = b * pitches.batch[0] + f * pitches.feature[0] + z * pitches.spatial[2] + y * pitches.spatial[1] + x * pitches.spatial[0]; out << ptr[idx] << " "; } @@ -274,12 +262,12 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_ // Group : 1 // Def_group: 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 4, 4 } }); - auto trans = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 18, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 3, 3 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 4, 4 } }); + auto trans = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 18, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 4, 4, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); set_values(input, { 0.680375f, -0.211234f, 0.566198f, 0.59688f, 0.823295f, -0.604897f, -0.329554f, 0.536459f, -0.444451f, 0.10794f, -0.0452059f, 0.257742f, -0.270431f, 0.0268018f, 0.904459f, 0.83239f, @@ -350,8 +338,8 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_ -2.6067f, 0.562893f, 0.671884f, 0.404735f, 1.45044f, 0.950113f }; topology topology( - input_layout("input", input.get_layout()), - input_layout("trans", trans.get_layout()), + input_layout("input", input->get_layout()), + input_layout("trans", trans->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -377,8 +365,8 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_ EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -406,12 +394,12 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1) // Group : 1 // Def_group: 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 4, 4 } }); - auto trans = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 18, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 3, 3 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 4, 4 } }); + auto trans = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 18, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 4, 4, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); set_values(input, { 0.680375f, -0.211234f, 0.566198f, 0.59688f, 0.823295f, -0.604897f, -0.329554f, 0.536459f, -0.444451f, 0.10794f, -0.0452059f, 0.257742f, -0.270431f, 0.0268018f, 0.904459f, 0.83239f, @@ -482,8 +470,8 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1) -0.210789f, -0.973089f, -0.407542f, 1.11818f, 0.843776f, 0.628229f, 1.29095f, 1.18637f, 0.808982f, 1.43841f }; topology topology( - input_layout("input", input.get_layout()), - input_layout("trans", trans.get_layout()), + input_layout("input", input->get_layout()), + input_layout("trans", trans->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -509,8 +497,8 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1) EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -538,12 +526,12 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) { // Group : 1 // Def_group: 2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 4, 4 } }); - auto trans = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 36, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 3, 3 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 4, 4 } }); + auto trans = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 36, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 4, 4, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); set_values(input, { 0.680375f, -0.211234f, 0.566198f, 0.59688f, 0.823295f, -0.604897f, -0.329554f, 0.536459f, -0.444451f, 0.10794f, -0.0452059f, 0.257742f, -0.270431f, 0.0268018f, 0.904459f, 0.83239f, @@ -646,8 +634,8 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) { 1.18929f, 0.382556f, 0.499048f, 1.16212f, 1.62688f, 1.31246f, 1.82684f }; topology topology( - input_layout("input", input.get_layout()), - input_layout("trans", trans.get_layout()), + input_layout("input", input->get_layout()), + input_layout("trans", trans->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -673,8 +661,8 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -711,10 +699,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) { // 21 28 39 // 18 20 20 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32,format::yxfb,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); @@ -723,7 +711,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) { { 17.0f, 19.0f, 19.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }, { 1,1,1,2 })); @@ -735,8 +723,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -782,10 +770,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) { // 21 28 39 // 18 20 20 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8,format::bfyx,{ 1, 1, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8,format::bfyx,{ 1, 1, 3, 2 } }); set_values(input, { 1.1f, 2.4f, 3.5f, 4.5f, 5.8f, 2.9f, 2.3f, 3.5f, 4.4f, 6.6f, @@ -798,7 +786,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) { { 22.0f, 20.0f, 21.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("to_int","input", { data_types::i8,format::bfyx,{ 1, 1, 5, 4 } }), data("weights", weights), convolution("conv", "to_int", { "weights" }, { 1,1,1,2 }), @@ -812,8 +800,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -839,10 +827,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) { // Input : 4x5x1 // Output : 2x3x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); @@ -852,7 +840,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) { { 17.0f, 19.0f, 19.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }, { 1,1,1,2 })); @@ -864,8 +852,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -934,11 +922,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1, 1 } }); set_values(input, { 1.0f, 0.0f, 1.0f, 0.0f, @@ -987,7 +975,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" })); @@ -1000,8 +988,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -1026,10 +1014,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { // data is similar as in basic_convolution3D - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } }); - auto weights_1 = memory::allocate(engine, { data_types::f32, format::goizyx, tensor(cldnn::group(2), cldnn::batch(1), cldnn::feature(1), cldnn::spatial(2, 2, 2))}); - auto biases_1 = memory::allocate(engine, { data_types::f32, format::bfyx, tensor(feature(2)) }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } }); + auto weights_1 = engine.allocate_memory({ data_types::f32, format::goizyx, tensor(cldnn::group(2), cldnn::batch(1), cldnn::feature(1), cldnn::spatial(2, 2, 2))}); + auto biases_1 = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(2)) }); set_values(input, { 1.0f, 0.0f, 1.0f, 0.0f, @@ -1117,7 +1105,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights_1", weights_1), data("biases_1", biases_1), convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32, true)); @@ -1130,8 +1118,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -1158,10 +1146,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { // data is similar as in basic_convolution3D_split2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 1, 2, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 1, 2, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1, 1 } }); set_values(input, { 1.0f, 0.0f, 1.0f, 0.0f, @@ -1249,7 +1237,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" })); @@ -1262,8 +1250,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int z_size = output_layout.size.spatial[2]; int y_size = output_layout.size.spatial[1]; @@ -1289,15 +1277,14 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { } TEST(convolution_f32_fw_gpu, with_output_size_same_input) { + auto& engine = get_test_engine(); - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 320, 320 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 64, 4, 7, 7 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 64, 4, 7, 7 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 320, 320 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 64, 4, 7, 7 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 64, 4, 7, 7 } }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("weights2", weights2), convolution::create_with_output_size("conv1", "input", { "weights" }, {1, 64, 160, 160}, {1, 1, 2, 2}, {0, 0, -3, -3}), @@ -1329,16 +1316,16 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) { // 8 8 8 8 // 8 8 8 8 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2,2,1,1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,2,2,2} }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2,2,1,1 } }); set_values(input, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), convolution("conv1", "input", { "weights" }), convolution("conv2", "conv1", { "weights" }), @@ -1353,8 +1340,8 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) { auto outputs = network.execute(); auto output_memory = outputs.at("conv3").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1397,11 +1384,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution) { // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); @@ -1411,7 +1398,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) { { 18.0f, 20.0f, 20.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( "conv", "input", { "weights" }, { "biases" }, { 0,0,1,2 })); @@ -1424,8 +1411,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1445,14 +1432,14 @@ TEST(convolution_f32_fw_gpu, basic_convolution) { TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { //Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, @@ -1470,9 +1457,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { { 18.0f, 20.0f, 20.0f } }; topology topology( - input_layout("input", input.get_layout()), - input_layout("weights", weights.get_layout()), - input_layout("biases", biases.get_layout()), + input_layout("input", input->get_layout()), + input_layout("weights", weights->get_layout()), + input_layout("biases", biases->get_layout()), convolution("conv", "input", { "weights" } , @@ -1491,8 +1478,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1542,11 +1529,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) { // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1560,7 +1547,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) { { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -1582,8 +1569,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1642,11 +1629,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) { // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1660,7 +1647,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) { { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -1684,8 +1671,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1738,11 +1725,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) { // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1757,7 +1744,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) { { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -1781,8 +1768,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1842,11 +1829,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_input_offs // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1864,7 +1851,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_input_offs { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -1888,8 +1875,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_input_offs EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -1951,11 +1938,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_input_off // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1974,7 +1961,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_input_off { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -1998,8 +1985,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_input_off EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -2052,11 +2039,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) { // Bias: // 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -2074,7 +2061,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) { const int x_pad = 2; const int y_pad = 1; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -2096,9 +2083,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) { EXPECT_EQ(outputs.begin()->first, "conv"); auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); auto output_size = output_layout.get_buffer_size(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_size.spatial[1]; int x_size = output_size.spatial[0]; @@ -2165,19 +2152,19 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) { } VF output_rnd_vec = flatten_4d(format::yxfb, output_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, input_rnd_vec); set_values(weights, filter_rnd_vec); set_values(biases, bias_rnd); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", {"weights"}, {"biases"}, {1,1,2,2}) @@ -2192,7 +2179,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < output_rnd.size(); ++i) { float x = float_round(output_rnd_vec[i]), y = float_round(output_ptr[i]); @@ -2235,19 +2222,19 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) { } VF output_rnd_vec = flatten_4d(format::yxfb, output_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, input_rnd_vec); set_values(weights, filter_rnd_vec); set_values(biases, bias_rnd); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) @@ -2262,7 +2249,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < output_rnd.size(); ++i) { float x = float_round(output_rnd_vec[i]), y = float_round(output_ptr[i]); @@ -2293,19 +2280,19 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) { // 8 0.5 // 6 9 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) @@ -2320,7 +2307,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_FLOAT_EQ(8.0f, output_ptr[0]); EXPECT_FLOAT_EQ(0.5f, output_ptr[1]); @@ -2347,19 +2334,19 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) { // // Output: // 3.65 -5.36 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 0.5f, 2.3f, 1.5f, -0.4f, 2.0f, 1.0f, -4.0f, 3.0f }); set_values(weights, { -1.2f, 1.5f, 0.5f, -0.5f }); set_values(biases, { -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 } ) @@ -2374,7 +2361,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_FLOAT_EQ(3.65f, output_ptr[0]); EXPECT_FLOAT_EQ(-5.36f, output_ptr[1]); @@ -2399,19 +2386,19 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) { // 5.1 f=0 // -5.2 f=1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 1, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 1.0f, 2.0f }); set_values(weights, { 1.0f, 2.0f, -1.0f, -2.0f }); set_values(biases, { 0.1f, -0.2f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 }) @@ -2426,7 +2413,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_FLOAT_EQ(5.1f, output_ptr[0]); EXPECT_FLOAT_EQ(-5.2f, output_ptr[1]); @@ -2458,19 +2445,19 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) { // 64,0 f=1 // 103.0 f=2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 1, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); set_values(input, { 1.0f, 3.0f, 2.0f, 4.0f }); set_values(weights, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f }); set_values(biases, { -5.0f, -6.0f, -7.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 }) @@ -2485,7 +2472,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_FLOAT_EQ(25.0f, output_ptr[0]); EXPECT_FLOAT_EQ(64.0f, output_ptr[1]); @@ -2514,19 +2501,19 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) { // 2.12 // 3.08 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); set_values(input, { -2.3f, -0.1f, 3.1f, 1.9f }); set_values(weights, { -1.1f, 1.5f, 0.5f, -0.5f, 0.1f, 0.2f, 0.4f, 0.7f, 2.0f, -1.0f, 2.5f, -1.5f }); set_values(biases, { 0.1f, -0.2f, 0.3f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) @@ -2541,7 +2528,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_TRUE(are_equal(3.08f, output_ptr[0])); EXPECT_TRUE(are_equal(2.12f, output_ptr[1])); @@ -2570,19 +2557,19 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { // // Output: // 12.25 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 3 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, 4.0f, -5.0f, 0.5f, 1.5f, -1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) @@ -2597,7 +2584,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_FLOAT_EQ(12.25f, output_ptr[0]); } @@ -2627,19 +2614,19 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { // Output: // rnd rnd // rnd 2.0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 3 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, 4.0f, -5.0f, 0.5f, 1.5f, -1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -2662,7 +2649,7 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_FLOAT_EQ(-7.25f, output_ptr[4]); } @@ -2702,12 +2689,12 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) { // 8 3.65 0.5 -5.36 // 6 3.65 9 -5.36 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 2 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2))}); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2))}); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f, @@ -2719,7 +2706,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) { set_values(biases1, { 2.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights1", weights1), data("biases1", biases1), convolution( @@ -2742,16 +2729,16 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 3)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 4)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 5)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 6)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 7)); + EXPECT_FLOAT_EQ(8.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[1]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[2]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[3]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[4]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[5]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[6]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[7]); } TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { @@ -2800,12 +2787,12 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { // 8 8 3.65 3.65 0.5 0.5 -5.36 -5.36 // 6 6 3.65 3.65 9 9 -5.36 -5.36 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 4, 4 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 2, 2 }, 2 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2)) }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 1.5f, 1.5f, 0.5f, 0.5f, 2.3f, 2.3f, 2.0f, 2.0f, -0.4f, -0.4f, @@ -2817,7 +2804,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { set_values(biases1, { 2.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights1", weights1), data("biases1", biases1), convolution( @@ -2840,33 +2827,33 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 3)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 4)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 5)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 6)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 7)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 8)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 9)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 10)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 11)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 12)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 13)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 14)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 15)); + EXPECT_FLOAT_EQ(8.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(8.0f, output_ptr[1]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[2]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[3]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[4]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[5]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[6]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[7]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[8]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[9]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[10]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[11]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[12]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[13]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[14]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[15]); } TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) { // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f, @@ -2881,7 +2868,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) { set_values(biases, { 2.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -2904,26 +2891,26 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 3)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 4)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 5)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 6)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 7)); + EXPECT_FLOAT_EQ(8.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[1]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[2]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[3]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[4]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[5]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[6]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[7]); } TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) { // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f, @@ -2938,7 +2925,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) set_values(biases, { 2.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("input_1", "input", { data_types::f32,format::bfyx,{ 1, 2, 4, 4 } }), data("weights", weights), data("biases", biases), @@ -2962,26 +2949,26 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 3)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 4)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 5)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 6)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 7)); + EXPECT_FLOAT_EQ(8.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[1]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[2]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[3]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[4]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[5]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[6]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[7]); } TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) { // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 1.5f, 1.5f, 0.5f, 0.5f, 2.3f, 2.3f, 2.0f, 2.0f, -0.4f, -0.4f, @@ -2996,7 +2983,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) { set_values(biases, { 2.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -3019,33 +3006,33 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 3)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 4)); - EXPECT_FLOAT_EQ(0.5f, get_value(output_ptr, 5)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 6)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 7)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 8)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 9)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 10)); - EXPECT_FLOAT_EQ(3.65f, get_value(output_ptr, 11)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 12)); - EXPECT_FLOAT_EQ(9.0f, get_value(output_ptr, 13)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 14)); - EXPECT_FLOAT_EQ(-5.36f, get_value(output_ptr, 15)); + EXPECT_FLOAT_EQ(8.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(8.0f, output_ptr[1]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[2]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[3]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[4]); + EXPECT_FLOAT_EQ(0.5f, output_ptr[5]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[6]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[7]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[8]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[9]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[10]); + EXPECT_FLOAT_EQ(3.65f, output_ptr[11]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[12]); + EXPECT_FLOAT_EQ(9.0f, output_ptr[13]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[14]); + EXPECT_FLOAT_EQ(-5.36f, output_ptr[15]); } TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt) { // Test for depthwise separable optimization, there are 16 weights and biases (split 16) // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 16, 4, 4 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, @@ -3066,8 +3053,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, @@ -3091,7 +3078,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw primitive_id bias_id = "biases1"; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data(weights_id, weights1), data(bias_id, biases1), convolution( @@ -3114,7 +3101,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); std::vector expected_output_vec = { 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, @@ -3132,9 +3119,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx) { // Test for depthwise separable optimization, there are 16 weights and biases (split 16) // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 16, 4, 4 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, @@ -3155,8 +3142,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, @@ -3180,7 +3167,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw primitive_id bias_id = "biases1"; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data(weights_id, weights1), data(bias_id, biases1), convolution( @@ -3203,7 +3190,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); std::vector expected_output_vec = { 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f, @@ -3226,9 +3213,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { // Test for grouped convolution, there are 16 joined weights and biases (group 16) // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 16, 4, 4 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, @@ -3249,10 +3236,10 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, }); - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); set_values(weights, { @@ -3302,7 +3289,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); std::vector expected_output_vec = { 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, @@ -3320,9 +3307,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) { // Test for grouped convolution, there are 16 joined weights and biases (group 16) // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 16, 4, 4 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, @@ -3343,10 +3330,10 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, }); - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); set_values(weights, { @@ -3397,7 +3384,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); std::vector expected_output_vec = { 8.0f, 0.5f, 6.0f, 9.0f, 3.65f,-5.36f, 3.65f, -5.36f, @@ -3450,14 +3437,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) // -1.75 // 2.25 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 4, 1, 1 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto biases2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 1.5f, 0.5f, 0.0f, -0.5f @@ -3468,7 +3455,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) set_values(biases2, { -1.0f, 2.5f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights1", weights1), data("biases1", biases1), data("weights2", weights2), @@ -3492,12 +3479,12 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(-2.25f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(7.5f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(-1.75f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(2.25f, get_value(output_ptr, 3)); + EXPECT_FLOAT_EQ(-2.25f, output_ptr[0]); + EXPECT_FLOAT_EQ(7.5f, output_ptr[1]); + EXPECT_FLOAT_EQ(-1.75f, output_ptr[2]); + EXPECT_FLOAT_EQ(2.25f, output_ptr[3]); } TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { @@ -3531,14 +3518,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { // 1 // 3.5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 1, 1 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); - auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto biases2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 1.5f, 0.5f @@ -3549,7 +3536,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { set_values(biases2, { -1.0f, 2.5f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights1", weights1), data("biases1", biases1), data("weights2", weights2), @@ -3573,12 +3560,12 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(-2.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(6.5f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(1.0f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(3.5f, get_value(output_ptr, 3)); + EXPECT_FLOAT_EQ(-2.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(6.5f, output_ptr[1]); + EXPECT_FLOAT_EQ(1.0f, output_ptr[2]); + EXPECT_FLOAT_EQ(3.5f, output_ptr[3]); } TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_nopad_split2) { @@ -3618,14 +3605,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no // 6 // -2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 4, 1, 1 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); - auto biases1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); - auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); + auto biases2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); set_values(input, { 1.5f, 0.5f, 2.0f, -1.0f @@ -3636,7 +3623,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no set_values(biases2, { -1.0f, 2.5f, 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights1", weights1), data("biases1", biases1), data("weights2", weights2), @@ -3660,14 +3647,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(-1.5f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(8.0f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(7.75f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(11.0f, get_value(output_ptr, 3)); - EXPECT_FLOAT_EQ(6.0f, get_value(output_ptr, 4)); - EXPECT_FLOAT_EQ(-2.0f, get_value(output_ptr, 5)); + EXPECT_FLOAT_EQ(-1.5f, output_ptr[0]); + EXPECT_FLOAT_EQ(8.0f, output_ptr[1]); + EXPECT_FLOAT_EQ(7.75f, output_ptr[2]); + EXPECT_FLOAT_EQ(11.0f, output_ptr[3]); + EXPECT_FLOAT_EQ(6.0f, output_ptr[4]); + EXPECT_FLOAT_EQ(-2.0f, output_ptr[5]); }*/ @@ -3695,12 +3682,12 @@ TEST(convolution_gpu, trivial_convolution_relu) { // 4 0.0 // 2 5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, @@ -3712,7 +3699,7 @@ TEST(convolution_gpu, trivial_convolution_relu) { set_values(biases, { -2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -3739,12 +3726,12 @@ TEST(convolution_gpu, trivial_convolution_relu) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(4.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(0.0f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(2.0f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(5.0f, get_value(output_ptr, 3)); + EXPECT_FLOAT_EQ(4.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(0.0f, output_ptr[1]); + EXPECT_FLOAT_EQ(2.0f, output_ptr[2]); + EXPECT_FLOAT_EQ(5.0f, output_ptr[3]); } TEST(convolution_gpu, relu_with_negative_slope) { @@ -3772,12 +3759,12 @@ TEST(convolution_gpu, relu_with_negative_slope) { // 4 -0.35 // 2 5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, @@ -3789,7 +3776,7 @@ TEST(convolution_gpu, relu_with_negative_slope) { set_values(biases, { -2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -3817,29 +3804,29 @@ TEST(convolution_gpu, relu_with_negative_slope) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); - EXPECT_FLOAT_EQ(4.0f, get_value(output_ptr, 0)); - EXPECT_FLOAT_EQ(-0.35f, get_value(output_ptr, 1)); - EXPECT_FLOAT_EQ(2.0f, get_value(output_ptr, 2)); - EXPECT_FLOAT_EQ(5.0f, get_value(output_ptr, 3)); + EXPECT_FLOAT_EQ(4.0f, output_ptr[0]); + EXPECT_FLOAT_EQ(-0.35f, output_ptr[1]); + EXPECT_FLOAT_EQ(2.0f, output_ptr[2]); + EXPECT_FLOAT_EQ(5.0f, output_ptr[3]); } TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); extern const std::vector conv_1x1_output; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 8, 16, 16 } }); - auto weights_conv_1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 8, 8, 1, 1 } }); - auto weights_conv_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 8, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 16, 8, 16, 16 } }); + auto weights_conv_1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 8, 8, 1, 1 } }); + auto weights_conv_2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 8, 1, 1 } }); set_random_values(input); set_random_values(weights_conv_1); set_random_values(weights_conv_2); - auto inp_lay = input_layout("input", input.get_layout()); + auto inp_lay = input_layout("input", input->get_layout()); auto conv_1 = convolution( "conv_1", "input", @@ -3867,8 +3854,8 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) { auto output_prim = outputs.at("conv_2").get_memory(); - auto output_ptr = output_prim.pointer(); - auto output_layout = output_prim.get_layout(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); + auto output_layout = output_prim->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -3876,16 +3863,12 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) { int b_size = output_layout.size.batch[0]; int f_offset = y_size * x_size; int b_offset = f_size * f_offset; - for (int b = 0; b < b_size; ++b) - { - for (int f = 0; f < f_size; ++f) - { - for (int y = 0; y < y_size; ++y) - { - for (int x = 0; x < x_size; ++x) - { + for (int b = 0; b < b_size; ++b) { + for (int f = 0; f < f_size; ++f) { + for (int y = 0; y < y_size; ++y) { + for (int x = 0; x < x_size; ++x) { int idx = b * b_offset + f * f_offset + y * x_size + x; - EXPECT_TRUE(are_equal(conv_1x1_output[idx], get_value(output_ptr, idx))); + EXPECT_TRUE(are_equal(conv_1x1_output[idx], output_ptr[idx])); } } } @@ -3918,13 +3901,13 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) const int32_t output_x = (input_x - weights_x) / stride_x + 1; const int32_t output_y = (input_y - weights_y) / stride_y + 1; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto input_size = tensor( batch_size, input_feature_count, input_x, input_y ); - auto input = memory::allocate(engine, { data_types::f32, input_format, input_size }); + auto input = engine.allocate_memory({ data_types::f32, input_format, input_size }); auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y ); - auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size }); - auto biases = memory::allocate(engine, { data_types::f32, biases_format, {1,output_feature_count,1,1}}); + auto weights = engine.allocate_memory({ data_types::f32, weights_format, weights_size }); + auto biases = engine.allocate_memory({ data_types::f32, biases_format, {1,output_feature_count,1,1}}); //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}}); @@ -4021,7 +4004,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) // Computing convolution. topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution( @@ -4049,7 +4032,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); // Checking result. uint32_t i = 0; @@ -4059,12 +4042,12 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) { for (uint32_t bi = 0; bi < batch_size; ++bi, ++i) { - auto equal = are_equal(output_vals[i], get_value(output_ptr, i)); + auto equal = are_equal(output_vals[i], output_ptr[i]); EXPECT_TRUE(equal); if (!equal) { std::cout << "Failed at position (" << yxi << ", output feature = " << ofi << ", batch = " << bi << "): " - << output_vals[i] << " != " << get_value(output_ptr, i) << std::endl; + << output_vals[i] << " != " << output_ptr[i] << std::endl; return; } } @@ -4074,9 +4057,8 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) #undef USE_OLD_WEIGHTS_FORMAT } -void add_primitives(const engine& engine, topology& topology) -{ - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } }); +void add_primitives(engine& engine, topology& topology) { + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 2 } }); std::vector weights_values = { 1, 2, 1, 2, 1, 2, @@ -4084,7 +4066,7 @@ void add_primitives(const engine& engine, topology& topology) 19, 17, -1, -10, 32, 23 }; set_values(weights, weights_values); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(biases, { 1.0f, -8.0f }); topology.add( @@ -4123,9 +4105,8 @@ TEST(convolution_f32_fw_gpu, byte_activation) { // // Bias: // 1 -8 - auto eng_conf = get_test_engine(); - engine engine{ eng_conf }; - auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::i8, format::bfyx,{ 1, 1, 5, 4 } }); VVVF output_vec = { { @@ -4146,7 +4127,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) { -1, -1, -1, -1, -1 }); topology topology( - input_layout("input", input.get_layout())); + input_layout("input", input->get_layout())); add_primitives(engine, topology); network network(engine, topology, opts); network.set_input_data("input", input); @@ -4155,8 +4136,8 @@ TEST(convolution_f32_fw_gpu, byte_activation) { EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -4176,11 +4157,11 @@ TEST(convolution_f32_fw_gpu, byte_activation) { } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4206,7 +4187,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), convolution("conv", "input", { "weights" }, { "biases" }, tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}), @@ -4221,9 +4202,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; @@ -4243,13 +4224,13 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_and_activations) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); - auto w_zp = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 1, 1 } }); - auto a_zp = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto w_zp = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 1, 1 } }); + auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4277,7 +4258,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), data("a_zp", a_zp), @@ -4295,9 +4276,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; @@ -4317,12 +4298,12 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_tensor) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); - auto a_zp = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4349,7 +4330,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), data("a_zp", a_zp), @@ -4366,9 +4347,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; @@ -4388,12 +4369,12 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_channel) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 2, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 2, 3, 3 } }); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); - auto a_zp = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 2, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 2, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4434,7 +4415,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), data("a_zp", a_zp), @@ -4451,9 +4432,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; @@ -4473,12 +4454,12 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activations_per_channel_3ic_with_sub) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 3, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 3, 3, 3 } }); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); - auto a_zp = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 3, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 3, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 3, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto a_zp = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 3, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4533,7 +4514,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), data("a_zp", a_zp), @@ -4552,9 +4533,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; @@ -4574,12 +4555,12 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio } TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_per_channel) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); - cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); - auto w_zp = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto w_zp = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4606,7 +4587,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p } }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), data("w_zp", w_zp), @@ -4623,9 +4604,9 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - auto output_layout = output_memory.get_layout(); + auto output_layout = output_memory->get_layout(); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; @@ -4648,10 +4629,9 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16) { #define USE_OLD_WEIGHTS_FORMAT 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -4681,17 +4661,17 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16) const int32_t output_y = (input_y - weights_y) / stride_y + 1; auto input_size = tensor( batch_size, input_feature_count, input_x, input_y ); - auto input = memory::allocate(engine, { data_types::f32, input_format, input_size }); + auto input = engine.allocate_memory({ data_types::f32, input_format, input_size }); auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y ); - auto weights = memory::allocate(engine, { data_types::f32, weights_format, weights_size }); + auto weights = engine.allocate_memory({ data_types::f32, weights_format, weights_size }); auto biases_size = tensor( 1,output_feature_count,1,1 ); - auto biases = memory::allocate(engine, { data_types::f32, biases_format, biases_size }); + auto biases = engine.allocate_memory({ data_types::f32, biases_format, biases_size }); auto output_size = tensor( batch_size, output_feature_count, output_x, output_y ); //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}}); - //auto input_cvtd = memory::allocate(engine, { data_types::f16, input_size }); - //auto weights_cvtd = memory::allocate(engine, { data_types::f16, weights_size }); - //auto biases_cvtd = memory::allocate(engine, { data_types::f16, biases_size }); + //auto input_cvtd = engine.allocate_memory({ data_types::f16, input_size }); + //auto weights_cvtd = engine.allocate_memory({ data_types::f16, weights_size }); + //auto biases_cvtd = engine.allocate_memory({ data_types::f16, biases_size }); //auto output_cvtd = memory::allocate({output_cvt_format, {batch_size, {output_x, output_y}, output_feature_count}}); // input: @@ -4785,9 +4765,9 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16) } } - //auto expected_float = memory::allocate(engine, { data_types::f32,{ format::x,{ static_cast(output_vals.size()) } } }); - //auto expected_half = memory::allocate(engine, { data_types::f16,{ format::x,{ static_cast(output_vals.size()) } } }); - //auto expected = memory::allocate(engine, { data_types::f32,{ format::x,{ static_cast(output_vals.size()) } } }); + //auto expected_float = engine.allocate_memory({ data_types::f32,{ format::x,{ static_cast(output_vals.size()) } } }); + //auto expected_half = engine.allocate_memory({ data_types::f16,{ format::x,{ static_cast(output_vals.size()) } } }); + //auto expected = engine.allocate_memory({ data_types::f32,{ format::x,{ static_cast(output_vals.size()) } } }); // set_values(expected_float, output_vals); // auto cvt_expected_f32_f16 = reorder::create({expected_float, expected_half}); @@ -4798,7 +4778,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16) // Computing convolution. topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("cvt_input", "input", {data_types::f16, input_format, input_size}), data("weights", weights), reorder("cvt_weights", "weights", {data_types::f16, weights_format, weights_size}), @@ -4822,7 +4802,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); // Checking result. uint32_t i = 0; @@ -5042,10 +5022,9 @@ INSTANTIATE_TEST_CASE_P(convolution_gpu_test, TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -5066,20 +5045,20 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); auto weights_data = generate_random_4d(output_f, input_f, filter_xy, filter_xy, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem)); // Reorder input to fs_byx_fsv32 @@ -5090,7 +5069,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -5152,9 +5131,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) network.execute(); auto out_mem = network.get_output("conv_fsv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::fs_b_yx_fsv32); + ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f; ++fi) @@ -5177,10 +5156,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) } TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -5197,20 +5175,20 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); auto weights_data = generate_random_4d(output_f, input_f, filter_xy, filter_xy, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem)); // add input padding by X and Y @@ -5220,7 +5198,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { // Generate bias data auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values @@ -5254,9 +5232,9 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { network.execute(); auto out_mem = network.get_output("conv_fsv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::fs_b_yx_fsv32); + ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f; ++fi) @@ -5309,10 +5287,9 @@ INSTANTIATE_TEST_CASE_P(convolution_gpu_with_crop, TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -5333,7 +5310,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); auto weights_data = generate_random_4d(output_f, input_f, filter_xy, filter_xy, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // ref input @@ -5357,11 +5334,11 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) auto input_data_bfyx = flatten_4d(format::bfyx, input_data); auto input_size = tensor(batch_num, input_f * 2, input_xy, input_xy); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem)); auto crop_batch_num = batch_num; @@ -5383,7 +5360,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -5462,9 +5439,9 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) network.execute(); auto out_mem = network.get_output("concat").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::bfyx); + ASSERT_EQ(out_mem->get_layout().format, format::bfyx); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f * 2; ++fi) @@ -5495,29 +5472,29 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { const int input_size_x = 1280; const int input_size_y = 720; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto input_size = tensor(batch_num, input_f, input_size_x, input_size_y); auto input_data = generate_random_4d(batch_num, input_f, input_size_y, input_size_x, -10, 10); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_size }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_size }); set_values(input, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); auto weights_data = generate_random_4d(output_f, input_f, filter_xy, filter_xy, -10, 10); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights = memory::allocate(engine, { data_types::i8, format::bfyx, weights_size }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, weights_size }); set_values(weights, weights_data_bfyx); auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_4d(1, output_f, 1, 1, -10, 10); auto biases_data_bfyx = flatten_4d(format::bfyx, biases_data); - auto biases = memory::allocate(engine, { data_types::i8, format::bfyx, biases_size }); + auto biases = engine.allocate_memory({ data_types::i8, format::bfyx, biases_size }); set_values(biases, biases_data_bfyx); topology topology_ref( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("to_int", "input", { data_types::i8,format::bfyx,{ batch_num, input_f, input_size_x, input_size_y } }), data("weights", weights), data("biases", biases), @@ -5535,11 +5512,11 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); topology topology_act( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("to_int", "input", { data_types::i8,format::b_fs_yx_fsv4,{ batch_num, input_f, input_size_x, input_size_y } }), data("weights", weights), data("biases", biases), @@ -5559,7 +5536,7 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { EXPECT_EQ(outputs_act.begin()->first, "output"); auto output_memory_act = outputs_act.at("output").get_memory(); - auto output_act_ptr = output_memory_act.pointer(); + cldnn::mem_lock output_act_ptr(output_memory_act, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -5582,10 +5559,9 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -5613,13 +5589,13 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) auto input_data = generate_random_4d(batch_num, input_f, input_size_y, input_size_x, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); auto weights_data = generate_random_4d(output_f, input_f, filter_xy, filter_xy, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); @@ -5627,7 +5603,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem) ); @@ -5636,7 +5612,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -5698,7 +5674,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) network.execute(); auto out_mem = network.get_output("conv_fsv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f; ++fi) @@ -5721,16 +5697,16 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) } template -void blockedFormatZeroCheck(cldnn::memory out_mem) { - auto out_ptr = out_mem.pointer(); +void blockedFormatZeroCheck(cldnn::memory::ptr out_mem) { + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); bool batch_blocked = false; - if (out_mem.get_layout().format == format::bs_fs_zyx_bsv16_fsv16 || - out_mem.get_layout().format == format::bs_fs_yx_bsv16_fsv16) + if (out_mem->get_layout().format == format::bs_fs_zyx_bsv16_fsv16 || + out_mem->get_layout().format == format::bs_fs_yx_bsv16_fsv16) batch_blocked = true; const int block_size = 16; - auto output_tensor = out_mem.get_layout().get_buffer_size(); + auto output_tensor = out_mem->get_layout().get_buffer_size(); const int b = output_tensor.batch[0]; const int f = output_tensor.feature[0]; const int spatials = std::accumulate(output_tensor.spatial.begin(), output_tensor.spatial.end(), 1, std::multiplies()); @@ -5820,7 +5796,7 @@ INSTANTIATE_TEST_CASE_P(convolution_gpu_block3D, TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int batch_num = testing::get<0>(GetParam()); const int input_f = testing::get<1>(GetParam()); @@ -5838,7 +5814,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) auto input_size = tensor(batch_num, input_f, input_xy, input_xy, 1); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, 1, 10); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy, 1); @@ -5846,14 +5822,14 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Reorder input to correct format @@ -5864,7 +5840,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -5925,14 +5901,14 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) network.execute(); auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); - auto out_ptr_bfyx = out_mem_bfyx.pointer(); + cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); - ASSERT_EQ(out_mem.get_layout().format, input_format); + ASSERT_EQ(out_mem->get_layout().format, input_format); auto flatten_ref = flatten_4d(format::bfyx, reference_result); @@ -5950,10 +5926,9 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -5976,7 +5951,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, 0, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfzyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfzyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy, 1); @@ -5984,14 +5959,14 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfzyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfzyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Reorder input to correct format @@ -6002,7 +5977,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfzyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfzyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -6063,14 +6038,14 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) network.execute(); auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); - auto out_ptr_bfyx = out_mem_bfyx.pointer(); + cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); - ASSERT_EQ(out_mem.get_layout().format, input_format); + ASSERT_EQ(out_mem->get_layout().format, input_format); auto flatten_ref = flatten_4d(format::bfyx, reference_result); @@ -6087,7 +6062,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int batch_num = testing::get<0>(GetParam()); const int input_f = testing::get<1>(GetParam()); @@ -6105,7 +6080,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) auto input_size = tensor(batch_num, input_f, input_xy, input_xy, 1); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, 1, 10); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy, 1); @@ -6113,14 +6088,14 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Reorder input to correct format @@ -6131,7 +6106,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -6181,7 +6156,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) } const float scalar = 5.5f; - auto scale_mem = memory::allocate(engine, { data_types::f32, format::bfzyx, {1, 1, 1, 1, 1} }); + auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, {1, 1, 1, 1, 1} }); set_values(scale_mem, {scalar}); topology.add(data("scalar", scale_mem)); @@ -6199,14 +6174,14 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) network.execute(); auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_mem_bfyx = network.get_output("reorder_bfzyx").get_memory(); - auto out_ptr_bfyx = out_mem_bfyx.pointer(); + cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); blockedFormatZeroCheck(out_mem); - ASSERT_EQ(out_mem.get_layout().format, input_format); + ASSERT_EQ(out_mem->get_layout().format, input_format); auto flatten_ref = flatten_4d(format::bfyx, reference_result); @@ -6251,7 +6226,7 @@ INSTANTIATE_TEST_CASE_P(convolution_gpu_block, TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int batch_num = testing::get<0>(GetParam()); const int input_xy = 5; @@ -6273,7 +6248,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, 1, 10); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); @@ -6281,14 +6256,14 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f32, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f32, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Reorder input to bs_fs_yx_bsv16_fsv16 @@ -6299,7 +6274,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f32, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f32, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -6362,12 +6337,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) network.execute(); auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); - auto out_ptr_bfyx = out_mem_bfyx.pointer(); + cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); auto flatten_ref = flatten_4d(format::bfyx, reference_result); @@ -6384,10 +6359,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -6414,7 +6388,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, 0, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy); @@ -6422,14 +6396,14 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Reorder input to bs_fs_yx_bsv16_fsv16 @@ -6440,7 +6414,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f16, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -6503,20 +6477,19 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) network.execute(); auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); - auto out_ptr_bfyx = out_mem_bfyx.pointer(); + cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); auto flatten_ref = flatten_4d(format::bfyx, reference_result); for (size_t i = 0; i < out_ptr_bfyx.size(); i++) { auto equal = are_equal(flatten_ref[i], out_ptr_bfyx[i], 1); EXPECT_TRUE(equal); - if (!equal) - { + if (!equal) { std::cout << "Difference at idx = " << i << std::endl; return; } @@ -6525,10 +6498,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -6547,7 +6519,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, 1, 10); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_xy, filter_xy, 1); @@ -6555,14 +6527,14 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f32, format::bfyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f32, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Reorder input to bs_fs_yx_bsv16_fsv16 @@ -6573,7 +6545,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) // Generate bias data auto biases_size = tensor(1, output_f, 1, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, { data_types::f32, format::bfyx, biases_size }); + auto biases_mem = engine.allocate_memory({ data_types::f32, format::bfyx, biases_size }); set_values(biases_mem, biases_data); // Calculate reference values with bias @@ -6623,7 +6595,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) } const float scalar = 5.5f; - auto scale_mem = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 1, 1, 1} }); + auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 1, 1, 1} }); set_values(scale_mem, {scalar}); topology.add(data("scalar", scale_mem)); @@ -6643,12 +6615,12 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) network.execute(); auto out_mem = network.get_output("conv_bsv16_fsv16").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); auto out_mem_bfyx = network.get_output("reorder_bfyx").get_memory(); - auto out_ptr_bfyx = out_mem_bfyx.pointer(); + cldnn::mem_lock out_ptr_bfyx(out_mem_bfyx, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(out_mem->get_layout().format, format::bs_fs_yx_bsv16_fsv16); auto flatten_ref = flatten_4d(format::bfyx, reference_result); @@ -6701,10 +6673,9 @@ INSTANTIATE_TEST_CASE_P(convolution_depthwise_gpu_fs_b_yx_fsv32, TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -6729,20 +6700,20 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(group(groups), batch(1), feature(1), spatial(filter_x, filter_y)); auto weights_data = generate_random_4d(output_f, 1, filter_y, filter_x, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::goiyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::goiyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem)); // Reorder input to fs_byx_fsv32 @@ -6782,9 +6753,9 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) network.execute(); auto out_mem = network.get_output("conv_fsv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::fs_b_yx_fsv32); + ASSERT_EQ(out_mem->get_layout().format, format::fs_b_yx_fsv32); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f; ++fi) @@ -6844,10 +6815,9 @@ INSTANTIATE_TEST_CASE_P(convolution_depthwise_gpu_b_fs_yx_fsv16, TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -6873,20 +6843,20 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(group(output_f), batch(1), feature(1), spatial(filter_x, filter_y)); auto weights_data = generate_random_4d(output_f, 1, filter_y, filter_x, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::goiyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::goiyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem)); // Reorder input to b_fs_yx_fsv16 @@ -6926,9 +6896,9 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) network.execute(); auto out_mem = network.get_output("conv_fsv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::b_fs_yx_fsv16); + ASSERT_EQ(out_mem->get_layout().format, format::b_fs_yx_fsv16); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f; ++fi) @@ -6970,11 +6940,11 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa auto input_lower_sizes = { 0, 16, 0, 0 }; auto input_upper_sizes = { 0, 64, 0, 0 }; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_size }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, weights_size}); - auto bias = memory::allocate(engine, { data_types::f32, format::bfyx, bias_size}); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_size }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, weights_size}); + auto bias = engine.allocate_memory({ data_types::f32, format::bfyx, bias_size}); set_values(input, { 3, -1, -1, -1, 2, -2, 2, 2, 0, 1, -5, 4, -1, 4, 1, 0, @@ -7002,7 +6972,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa layout reordered_input_layout = layout(data_types::f32, format::b_fs_yx_fsv16, input_size, input_padding); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("input_reordered", "input", reordered_input_layout), data("weights", weights), data("bias", bias), @@ -7022,8 +6992,8 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa EXPECT_EQ(outputs.begin()->first, "out"); auto output_memory = outputs.at("out").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -7055,10 +7025,9 @@ struct convolution_depthwise_gpu_bfyx : public convolution_depthwise_gpu {}; TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -7084,20 +7053,20 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) auto input_size = tensor(batch_num, input_f, input_xy, input_xy); auto input_data = generate_random_4d(batch_num, input_f, input_xy, input_xy, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(group(output_f), batch(1), feature(1), spatial(filter_x, filter_y)); auto weights_data = generate_random_4d(output_f, 1, filter_y, filter_x, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, { data_types::f16, format::goiyx, weights_size }); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::goiyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias auto reference_result = VVVVF(batch_num, VVVF(output_f)); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), data("weights", weights_mem)); // Calculate reference values without bias @@ -7134,9 +7103,9 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) network.execute(); auto out_mem = network.get_output("conv").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); - ASSERT_EQ(out_mem.get_layout().format, format::bfyx); + ASSERT_EQ(out_mem->get_layout().format, format::bfyx); for (int bi = 0; bi < batch_num; ++bi) for (int fi = 0; fi < output_f; ++fi) @@ -7260,7 +7229,7 @@ INSTANTIATE_TEST_CASE_P(convolution_grouped_fsv4_fsv16, convolution_grouped_gpu::PrintToStringParamName); TEST_P(convolution_grouped_gpu, base) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int input_x = testing::get<0>(GetParam()), input_y = testing::get<1>(GetParam()), @@ -7304,7 +7273,7 @@ TEST_P(convolution_grouped_gpu, base) { size_t offset = input_lay.get_linear_offset(coords); input_flat[offset] = input_rnd[b][f][z][y][x]; } - auto input = memory::allocate(engine, input_lay); + auto input = engine.allocate_memory(input_lay); set_values(input, input_flat); auto input_zp_rnd = std::vector(input_f); @@ -7314,7 +7283,7 @@ TEST_P(convolution_grouped_gpu, base) { input_zp_prim_name = { "input_zp" }; } auto input_zp_lay = layout(data_types::i8, format::bfyx, tensor(feature(input_f))); - auto input_zp = memory::allocate(engine, input_zp_lay); + auto input_zp = engine.allocate_memory(input_zp_lay); set_values(input_zp, input_zp_rnd); auto weights_size = tensor(group(groups), batch(output_f / groups), feature(input_f / groups), spatial(filter_x, filter_y, filter_z)); @@ -7335,7 +7304,7 @@ TEST_P(convolution_grouped_gpu, base) { size_t offset = weights_lay.get_linear_offset(coords); weights_flat[offset] = weights_rnd[gi][ofi][ifi][kzi][kyi][kxi]; } - auto weights = memory::allocate(engine, weights_lay); + auto weights = engine.allocate_memory(weights_lay); set_values(weights, weights_flat); auto weights_zp_rnd = std::vector(output_f); @@ -7345,7 +7314,7 @@ TEST_P(convolution_grouped_gpu, base) { weights_zp_prim_name = { "weights_zp" }; } auto weights_zp_lay = layout(data_types::i8, format::bfyx, tensor(batch(output_f))); - auto weights_zp = memory::allocate(engine, weights_zp_lay); + auto weights_zp = engine.allocate_memory(weights_zp_lay); set_values(weights_zp, weights_zp_rnd); VVVVVF expected_result(batch_num, VVVVF(output_f)); @@ -7405,7 +7374,7 @@ TEST_P(convolution_grouped_gpu, base) { comp_prim_name = { "compensation" }; } auto comp_lay = layout(data_types::f32, format::bfyx, tensor(batch(output_f))); - auto comp = memory::allocate(engine, comp_lay); + auto comp = engine.allocate_memory(comp_lay); set_values(comp, comp_val); auto stride_tensor = tensor(batch(1), feature(1), spatial(stride, stride, stride, 1)); @@ -7413,7 +7382,7 @@ TEST_P(convolution_grouped_gpu, base) { stride_tensor = tensor(batch(1), feature(1), spatial(stride, stride, 1, 1)); } - topology topology(input_layout("input", input.get_layout()), + topology topology(input_layout("input", input->get_layout()), data("weights", weights), reorder("input_fsv", "input", {data_types::i8, input_data_format, input_size}), convolution("conv", @@ -7443,6 +7412,7 @@ TEST_P(convolution_grouped_gpu, base) { build_options options; options.set_option(build_option::optimize_data(true)); + options.set_option(build_option::outputs({"conv", "out"})); implementation_desc conv_impl = {input_data_format, impl_name}; options.set_option(build_option::force_implementations({{"conv", conv_impl}})); @@ -7451,10 +7421,10 @@ TEST_P(convolution_grouped_gpu, base) { network.execute(); auto out_mem = network.get_output("conv").get_memory(); - auto out_ptr = out_mem.pointer(); - auto out_lay = out_mem.get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); + auto out_lay = out_mem->get_layout(); - ASSERT_EQ(out_mem.get_layout().format, input_data_format); + ASSERT_EQ(out_mem->get_layout().format, input_data_format); ASSERT_EQ(out_lay.size.batch[0], expected_result.size()); ASSERT_EQ(out_lay.size.feature[0], expected_result[0].size()); ASSERT_EQ(out_lay.size.spatial[2], expected_result[0][0].size()); @@ -7501,10 +7471,9 @@ INSTANTIATE_TEST_CASE_P(conv_fp16_cases, convolution_general_gpu::PrintToStringParamName); TEST_P(convolution_general_gpu, conv_fp16_cases) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) - { + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; @@ -7529,13 +7498,13 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { auto input_size = tensor(batch_num, input_f, input_x, input_y); auto input_data = generate_random_4d(batch_num, input_f, input_y, input_x, -1, 1); auto input_data_bfyx = flatten_4d(format::bfyx, input_data); - auto input_mem = memory::allocate(engine, { data_types::f16, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f16, format::bfyx, input_size }); set_values(input_mem, input_data_bfyx); auto weights_size = tensor(output_f, input_f, filter_y, filter_x, 1); auto weights_data = generate_random_4d(output_f, input_f, filter_y, filter_x, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = memory::allocate(engine, {data_types::f16, format::bfyx, weights_size}); + auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size}); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias @@ -7546,7 +7515,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { if (with_bias) { auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = memory::allocate(engine, {data_types::f16, format::bfyx, biases_size}); + auto biases_mem = engine.allocate_memory({data_types::f16, format::bfyx, biases_size}); set_values(biases_mem, biases_data); for (auto bi = 0; bi < batch_num; ++bi) { @@ -7561,7 +7530,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { } } - topology.add(input_layout("input", input_mem.get_layout()), + topology.add(input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem), data("bias", biases_mem), reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size})); @@ -7589,7 +7558,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { } } - topology.add(input_layout("input", input_mem.get_layout()), + topology.add(input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem), reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size})); @@ -7612,10 +7581,10 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { network.execute(); auto out_mem = network.get_output("conv_fsv").get_memory(); - auto out_ptr = out_mem.pointer(); - auto out_lay = out_mem.get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); + auto out_lay = out_mem->get_layout(); - ASSERT_EQ(out_mem.get_layout().format, input_data_format); + ASSERT_EQ(out_mem->get_layout().format, input_data_format); ASSERT_EQ(out_lay.size.batch[0], expected_result.size()); ASSERT_EQ(out_lay.size.feature[0], expected_result[0].size()); ASSERT_EQ(out_lay.size.spatial[1], expected_result[0][0].size()); @@ -7643,11 +7612,11 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { template class convolution_test_base { public: - virtual topology build_topology(const cldnn::engine& engine) { + virtual topology build_topology(cldnn::engine& engine) { auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size()); auto wei_lay = layout(weights_type(), format::bfyx, weights_size()); - auto wei_mem = memory::allocate(engine, wei_lay); + auto wei_mem = engine.allocate_memory(wei_lay); auto weights_flat = flatten_4d(format::bfyx, _weights); set_values(wei_mem, weights_flat); layout reordered_layout = layout{input_type(), input_format(), input_size(), padding_size()}; @@ -7657,7 +7626,7 @@ public: std::string input_id = "input_reorder"; if (has_input_zp()) { auto input_zp_lay = layout(input_type(), format::bfyx, tensor(feature(input_features()))); - auto input_zp_mem = memory::allocate(engine, input_zp_lay); + auto input_zp_mem = engine.allocate_memory(input_zp_lay); set_values(input_zp_mem, _input_zp); topo.add(data("input_zp", input_zp_mem)); topo.add(eltwise("input_asymm", { "input_reorder", "input_zp" }, eltwise_mode::sub)); @@ -7667,7 +7636,7 @@ public: std::string weights_id = "weights"; if (has_weights_zp()) { auto weights_zp_lay = layout(weights_type(), format::bfyx, tensor(batch(output_features()))); - auto weights_zp_mem = memory::allocate(engine, weights_zp_lay); + auto weights_zp_mem = engine.allocate_memory(weights_zp_lay); set_values(weights_zp_mem, _weights_zp); topo.add(data("weights_zp", weights_zp_mem)); topo.add(eltwise("weights_asymm", { "weights", "weights_zp" }, eltwise_mode::sub)); @@ -7686,7 +7655,7 @@ public: topo.add(conv_prim); } else { auto bias_lay = layout(output_type(), format::bfyx, tensor(feature(output_features()))); - auto bias_mem = memory::allocate(engine, bias_lay); + auto bias_mem = engine.allocate_memory(bias_lay); set_values(bias_mem, _bias); topo.add(data("bias", bias_mem)); auto conv_prim = convolution( @@ -7710,7 +7679,7 @@ public: } virtual void run_expect(const VVVVF& expected) { - auto engine = get_test_engine(); + auto& engine = get_test_engine(); auto topo = build_topology(engine); @@ -7723,7 +7692,7 @@ public: auto net = network(prog, 0); auto input_lay = layout(input_type(), format::bfyx, input_size(), padding_size()); - auto input_mem = memory::allocate(engine, input_lay); + auto input_mem = engine.allocate_memory(input_lay); std::vector input_flat(input_lay.get_linear_size(), static_cast(0)); for (size_t bi = 0; bi < batch_num(); ++bi) for (size_t fi = 0; fi < input_features(); ++fi) @@ -7738,8 +7707,8 @@ public: net.set_input_data("input", input_mem); auto result = net.execute(); auto out_mem = result.at(output_primitive_id()).get_memory(); - auto out_lay = out_mem.get_layout(); - auto out_ptr = out_mem.cldnn::memory::template pointer(); + auto out_lay = out_mem->get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); std::stringstream description; for (auto i : net.get_primitives_info()) { @@ -8008,11 +7977,11 @@ template class convolution_random_test_fsv4_input : public convolution_random_test_base { public: using parent = convolution_random_test_base; - topology build_topology(const cldnn::engine& engine) override { + topology build_topology(cldnn::engine& engine) override { auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size(), this->padding_size()); auto wei_lay = layout(this->weights_type(), format::bfyx, this->weights_size()); - auto wei_mem = memory::allocate(engine, wei_lay); + auto wei_mem = engine.allocate_memory(wei_lay); auto wei_flat = flatten_4d(format::bfyx, this->_weights); set_values(wei_mem, wei_flat); layout reordered_layout = layout{this->input_type(), this->input_format(), this->input_size(), this->padding_size()}; @@ -8022,7 +7991,7 @@ public: std::string input_id = "input_reorder"; if (this->has_input_zp()) { auto input_zp_lay = layout(this->input_type(), format::bfyx, tensor(feature(this->input_features()))); - auto input_zp_mem = memory::allocate(engine, input_zp_lay); + auto input_zp_mem = engine.allocate_memory(input_zp_lay); set_values(input_zp_mem, this->_input_zp); topo.add(data("input_zp", input_zp_mem)); topo.add(eltwise("input_asymm", { "input_reorder", "input_zp" }, eltwise_mode::sub)); @@ -8032,7 +8001,7 @@ public: std::string weights_id = "weights"; if (this->has_weights_zp()) { auto weights_zp_lay = layout(this->weights_type(), format::bfyx, tensor(batch(this->output_features()))); - auto weights_zp_mem = memory::allocate(engine, weights_zp_lay); + auto weights_zp_mem = engine.allocate_memory(weights_zp_lay); set_values(weights_zp_mem, this->_weights_zp); topo.add(data("weights_zp", weights_zp_mem)); topo.add(eltwise("weights_asymm", { "weights", "weights_zp" }, eltwise_mode::sub)); @@ -8051,7 +8020,7 @@ public: topo.add(conv_prim); } else { auto bias_lay = layout(this->output_type(), format::bfyx, tensor(feature(this->output_features()))); - auto bias_mem = memory::allocate(engine, bias_lay); + auto bias_mem = engine.allocate_memory(bias_lay); set_values(bias_mem, this->_bias); topo.add(data("bias", bias_mem)); auto conv_prim = convolution( @@ -8070,7 +8039,7 @@ public: return topo; } void run_expect(const VVVVF& expected) override { - auto engine = get_test_engine(); + auto& engine = get_test_engine(); auto topo = this->build_topology(engine); @@ -8083,7 +8052,7 @@ public: auto net = network(prog, 0); auto input_lay = layout(this->input_type(), format::b_fs_yx_fsv4, this->input_size(), this->padding_size()); - auto input_mem = memory::allocate(engine, input_lay); + auto input_mem = engine.allocate_memory(input_lay); std::vector input_flat(input_lay.get_linear_size(), static_cast(0)); for (size_t bi = 0; bi < this->batch_num(); ++bi) for (size_t fi = 0; fi < this->input_features(); ++fi) @@ -8098,8 +8067,8 @@ public: net.set_input_data("input", input_mem); auto result = net.execute(); auto out_mem = result.at(this->output_primitive_id()).get_memory(); - auto out_lay = out_mem.get_layout(); - auto out_ptr = out_mem.cldnn::memory::template pointer(); + auto out_lay = out_mem->get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); std::stringstream description; for (auto i : net.get_primitives_info()) { @@ -8142,14 +8111,14 @@ public: return "scale_wa_reorder"; } - topology build_topology(const cldnn::engine& engine) override { + topology build_topology(cldnn::engine& engine) override { topology topo = parent::build_topology(engine); auto scale_lay = layout(this->output_type(), format::bfyx, tensor(batch(1), feature(this->output_features()))); auto shift_lay = layout(this->output_type(), format::bfyx, tensor(batch(1), feature(this->output_features()))); - auto scale_mem = memory::allocate(engine, scale_lay); - auto shift_mem = memory::allocate(engine, shift_lay); + auto scale_mem = engine.allocate_memory(scale_lay); + auto shift_mem = engine.allocate_memory(shift_lay); set_values(scale_mem, _scale); set_values(shift_mem, _shift); @@ -8397,20 +8366,17 @@ INSTANTIATE_TEST_CASE_P( to_string_convolution_all_params ); -class convolution_test : public tests::generic_test -{ +class convolution_test : public tests::generic_test { public: - static void TearDownTestCase() - { + static void TearDownTestCase() { all_generic_params.clear(); all_layer_params.clear(); all_test_params.clear(); } - static std::vector> generate_specific_test_params() - { + static std::vector> generate_specific_test_params() { // TODO: check split // TODO: check convolution without bias @@ -8443,8 +8409,7 @@ public: return all_layer_params; } - static std::vector, std::shared_ptr>> generate_all_test_params() - { + static std::vector, std::shared_ptr>> generate_all_test_params() { generate_specific_test_params(); std::vector input_formats = { cldnn::format::bfyx, cldnn::format::yxfb }; @@ -8457,23 +8422,16 @@ public: auto data_types = test_data_types(); - for (cldnn::data_types data_type : data_types) - { - for (cldnn::format input_format : input_formats) - { - for (cldnn::format weights_format : weights_formats) - { + for (cldnn::data_types data_type : data_types) { + for (cldnn::format input_format : input_formats) { + for (cldnn::format weights_format : weights_formats) { cldnn::build_options network_build_options; - if (input_format == cldnn::format::bfyx) - { + if (input_format == cldnn::format::bfyx) { network_build_options.set_option(cldnn::build_option::optimize_data(true)); } - for (cldnn::tensor input_size : input_tensor_size) - { - for (cldnn::tensor kernel_size : kernel_sizes) - { - for (auto output_features : output_features_sizes) - { + for (cldnn::tensor input_size : input_tensor_size) { + for (cldnn::tensor kernel_size : kernel_sizes) { + for (auto output_features : output_features_sizes) { std::shared_ptr params = std::make_shared(data_type, input_format, input_size.batch[0], input_size.feature[0], tensor(1, 1, input_size.spatial[0], input_size.spatial[1]), network_build_options); int input_features = params->input_layouts[0].size.feature[0]; params->input_layouts.push_back(cldnn::layout(params->data_type, weights_format, cldnn::tensor(output_features, input_features, kernel_size.spatial[0], kernel_size.spatial[1]))); // weights @@ -8487,10 +8445,8 @@ public: } // Create all the combinations for the test. - for (const auto& layer_param : all_layer_params) - { - for (auto test_param : all_generic_params) - { + for (const auto& layer_param : all_layer_params) { + for (auto test_param : all_generic_params) { all_test_params.push_back(std::make_tuple(test_param, layer_param)); } } @@ -8498,13 +8454,11 @@ public: return all_test_params; } - virtual bool is_format_supported(cldnn::format format) - { + virtual bool is_format_supported(cldnn::format format) { return ((format == cldnn::format::bfyx) || (format == cldnn::format::yxfb)); } - virtual cldnn::tensor get_expected_output_tensor() - { + virtual cldnn::tensor get_expected_output_tensor() { auto convolution = std::static_pointer_cast(layer_params); tensor input_size = generic_params->input_layouts[0].size; tensor dilation = convolution->dilation; @@ -8523,58 +8477,52 @@ public: return cldnn::tensor(input_size.batch[0], output_features, output_size_x, output_size_y); } - virtual void prepare_input_for_test(std::vector& inputs) - { - if (generic_params->data_type == data_types::f32) - { + virtual void prepare_input_for_test(std::vector& inputs) { + if (generic_params->data_type == data_types::f32) { prepare_input_for_test_typed(inputs); - } - else - { + } else { prepare_input_for_test_typed(inputs); } } template - void prepare_input_for_test_typed(std::vector& inputs) - { + void prepare_input_for_test_typed(std::vector& inputs) { int k = (generic_params->data_type == data_types::f32) ? 8 : 4; // Update inputs. auto input = inputs[0]; - auto input_size = inputs[0].get_layout().size; + auto input_size = inputs[0]->get_layout().size; VVVVF input_rnd = generate_random_4d(input_size.batch[0], input_size.feature[0], input_size.spatial[1], input_size.spatial[0], -2, 2, k); - VF input_rnd_vec = flatten_4d(input.get_layout().format, input_rnd); + VF input_rnd_vec = flatten_4d(input->get_layout().format, input_rnd); set_values(input, input_rnd_vec); // Update weights. auto weight_input = inputs[1]; - auto weight_size = inputs[1].get_layout().size; + auto weight_size = inputs[1]->get_layout().size; VVVVF weight_rnd = generate_random_4d(weight_size.batch[0], weight_size.feature[0], weight_size.spatial[1], weight_size.spatial[0], -2, 2, k); - VF weight_rnd_vec = flatten_4d(weight_input.get_layout().format, weight_rnd); + VF weight_rnd_vec = flatten_4d(weight_input->get_layout().format, weight_rnd); set_values(weight_input, weight_rnd_vec); // Update biases. auto bias_input = inputs[2]; - auto bias_size = inputs[2].get_layout().size; + auto bias_size = inputs[2]->get_layout().size; VF bias_rnd = generate_random_1d(bias_size.spatial[0], -2, 2, k); set_values(bias_input, bias_rnd); } template - memory generate_reference_typed(const std::vector& inputs) - { + memory::ptr generate_reference_typed(const std::vector& inputs) { // Output reference is always bfyx. auto convolution = std::static_pointer_cast(layer_params); - data_types dt = inputs[0].get_layout().data_type; + data_types dt = inputs[0]->get_layout().data_type; - tensor input_size = inputs[0].get_layout().size; + tensor input_size = inputs[0]->get_layout().size; tensor dilation = convolution->dilation; tensor stride = convolution->stride; tensor input_offset = convolution->input_offset; - tensor weights_size = inputs[1].get_layout().size; + tensor weights_size = inputs[1]->get_layout().size; padding output_padding = convolution->output_padding; tensor output_size = get_expected_output_tensor(); @@ -8585,27 +8533,23 @@ public: int output_features = weights_size.batch[0]; int input_features = weights_size.feature[0]; - auto output = memory::allocate( engine, cldnn::layout(dt, cldnn::format::bfyx, output_size, output_padding) ); + auto output = engine.allocate_memory(cldnn::layout(dt, cldnn::format::bfyx, output_size, output_padding)); - auto input_mem = inputs[0].pointer(); - auto weights_mem = inputs[1].pointer(); - auto bias_mem = inputs[2].pointer(); - auto output_mem = output.pointer(); + cldnn::mem_lock input_mem(inputs[0], get_test_stream()); + cldnn::mem_lock weights_mem(inputs[1], get_test_stream()); + cldnn::mem_lock bias_mem(inputs[2], get_test_stream()); + cldnn::mem_lock output_mem(output, get_test_stream()); - tensor output_buffer_size = output.get_layout().get_buffer_size(); + tensor output_buffer_size = output->get_layout().get_buffer_size(); // Initialized output with zeros. std::fill(output_mem.begin(), output_mem.end(), static_cast(0)); // Add the bias - for (int b = 0; b < input_size.batch[0]; b++) - { - for (int out_f = 0; out_f < output_features; out_f++) - { - for (int y = 0; y < output_size_y; y++) - { - for (int x = 0; x < output_size_x; x++) - { + for (int b = 0; b < input_size.batch[0]; b++) { + for (int out_f = 0; out_f < output_features; out_f++) { + for (int y = 0; y < output_size_y; y++) { + for (int x = 0; x < output_size_x; x++) { int output_index = (b * output_buffer_size.feature[0] + out_f) * output_buffer_size.spatial[1] * output_buffer_size.spatial[0]; tensor lower_output_padding = convolution->output_padding.lower_size(); output_index += (lower_output_padding.spatial[1] + y) * output_buffer_size.spatial[0] + lower_output_padding.spatial[0] + x; @@ -8616,22 +8560,17 @@ public: } } - const auto input0_desc = get_linear_memory_desc(inputs[0].get_layout()); - const auto input1_desc = get_linear_memory_desc(inputs[1].get_layout()); + const auto input0_desc = get_linear_memory_desc(inputs[0]->get_layout()); + const auto input1_desc = get_linear_memory_desc(inputs[1]->get_layout()); // Convolve with weights - for (int b = 0; b < input_size.batch[0]; b++) - { + for (int b = 0; b < input_size.batch[0]; b++) { int input_bi = b; - for (int out_f = 0; out_f < output_features; out_f++) - { - for (int in_f = 0; in_f < input_features; in_f++) - { + for (int out_f = 0; out_f < output_features; out_f++) { + for (int in_f = 0; in_f < input_features; in_f++) { int input_fi = in_f; - for (int y = 0; y < output_size_y; y++) - { - for (int x = 0; x < output_size_x; x++) - { + for (int y = 0; y < output_size_y; y++) { + for (int x = 0; x < output_size_x; x++) { int output_bi = b; int output_fi = out_f; int output_yi = y; @@ -8640,29 +8579,25 @@ public: tensor lower_output_padding = convolution->output_padding.lower_size(); output_index += (lower_output_padding.spatial[1] + output_yi) * output_buffer_size.spatial[0] + lower_output_padding.spatial[0] + output_xi; - for (int kernel_y = 0; kernel_y < weights_size.spatial[1]; kernel_y++) - { + for (int kernel_y = 0; kernel_y < weights_size.spatial[1]; kernel_y++) { int input_yi = y * stride.spatial[1] + input_offset.spatial[1] + kernel_y * dilation.spatial[1]; - if ((input_yi < 0) || (input_yi >= input_size.spatial[1])) - { + if ((input_yi < 0) || (input_yi >= input_size.spatial[1])) { continue; } - for (int kernel_x = 0; kernel_x < weights_size.spatial[0]; kernel_x++) - { + for (int kernel_x = 0; kernel_x < weights_size.spatial[0]; kernel_x++) { int input_xi = x * stride.spatial[0] + input_offset.spatial[0] + kernel_x * dilation.spatial[0]; - if ((input_xi < 0) || (input_xi >= input_size.spatial[0])) - { + if ((input_xi < 0) || (input_xi >= input_size.spatial[0])) { continue; } - size_t input_index = get_linear_index(inputs[0].get_layout(), input_bi, input_fi, input_yi, input_xi, input0_desc); + size_t input_index = get_linear_index(inputs[0]->get_layout(), input_bi, input_fi, input_yi, input_xi, input0_desc); int weight_bi = out_f; int weight_fi = in_f; int weight_yi = kernel_y; int weight_xi = kernel_x; - size_t weight_index = get_linear_index(inputs[1].get_layout(), weight_bi, weight_fi, weight_yi, weight_xi, input1_desc); + size_t weight_index = get_linear_index(inputs[1]->get_layout(), weight_bi, weight_fi, weight_yi, weight_xi, input1_desc); output_mem[output_index] += input_mem[input_index] * weights_mem[weight_index]; } } @@ -8675,14 +8610,10 @@ public: return output; } - virtual memory generate_reference(const std::vector& inputs) - { - if (generic_params->data_type == data_types::f32) - { + virtual memory::ptr generate_reference(const std::vector& inputs) { + if (generic_params->data_type == data_types::f32) { return generate_reference_typed(inputs); - } - else - { + } else { return generate_reference_typed(inputs); } } @@ -8698,8 +8629,7 @@ std::vector> convolution_test::all_generic_p std::vector> convolution_test::all_layer_params = {}; std::vector, std::shared_ptr>> convolution_test::all_test_params = {}; -TEST_P(convolution_test, CONVOLUTION) -{ +TEST_P(convolution_test, CONVOLUTION) { run_single_test(); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp index da96bcab6f4..e5b6ed490a9 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp @@ -3,19 +3,15 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/crop.hpp" -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; template std::vector generate_random_input(size_t b, size_t f, size_t z, size_t y, size_t x, int min, int max) { @@ -40,7 +36,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) { // Input : 2x3x4x5 // Output : 1x2x2x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 3; @@ -52,10 +48,10 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 2; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -68,7 +64,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -84,7 +80,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) { } TEST(crop_gpu, basic_in2x2x2x3_crop_all) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 2; @@ -96,10 +92,10 @@ TEST(crop_gpu, basic_in2x2x2x3_crop_all) { auto crop_x_size = x_size - 1; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec; @@ -114,7 +110,7 @@ TEST(crop_gpu, basic_in2x2x2x3_crop_all) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); printf("Results:\n"); for (int b = 0; b < crop_batch_num; ++b) { //B @@ -135,7 +131,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all) { // Input : 2x3x4x5 // Output : 1x2x2x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 3; @@ -147,10 +143,10 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 2; - auto input = memory::allocate(engine, { data_types::i32, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i32, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -163,7 +159,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -183,7 +179,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all) { // Input : 2x3x4x5 // Output : 1x2x2x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 3; @@ -195,10 +191,10 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 2; - auto input = memory::allocate(engine, { data_types::i64, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i64, format::yxfb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -211,7 +207,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -231,7 +227,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) { // Input : 6x2x4x3 // Output : 3x1x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -243,10 +239,10 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, {0, 0, 0, 0} )); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -259,7 +255,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector a; for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -280,7 +276,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_bfyx) { // Input : 6x2x4x3 // Output : 3x1x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -292,10 +288,10 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_bfyx) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::i32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -308,7 +304,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_bfyx) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector a; for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -329,7 +325,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_bfyx) { // Input : 6x2x4x3 // Output : 3x1x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -341,10 +337,10 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_bfyx) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::i64,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i64,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -357,7 +353,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_bfyx) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector a; for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -378,7 +374,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_fyxb) { // Input : 6x2x4x3 // Output : 3x1x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -390,10 +386,10 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_fyxb) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::f32,format::fyxb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::f32,format::fyxb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, {0, 0, 0, 0} )); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -406,7 +402,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_fyxb) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F for (int y = 0; y < crop_y_size; ++y) { //Y @@ -425,7 +421,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_fyxb) { // Input : 6x2x4x3 // Output : 3x1x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -437,10 +433,10 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_fyxb) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::i32,format::fyxb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i32,format::fyxb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -453,7 +449,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_fyxb) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F for (int y = 0; y < crop_y_size; ++y) { //Y @@ -472,7 +468,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_fyxb) { // Input : 6x2x4x3 // Output : 3x1x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -484,10 +480,10 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_fyxb) { auto crop_x_size = x_size - 2; auto crop_y_size = y_size - 1; - auto input = memory::allocate(engine, { data_types::i64,format::fyxb,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i64,format::fyxb,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -500,7 +496,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_fyxb) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F for (int y = 0; y < crop_y_size; ++y) { //Y @@ -526,7 +522,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) { // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 // f1: b0: 7 8 -16 b1: 12 8 -17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 2; @@ -543,10 +539,10 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) { auto x_offset = 1; auto y_offset = 1; - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num)), { tensor(feature(0)) })); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, @@ -564,7 +560,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -591,7 +587,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_offsets) { // f1: b0: 5 6 -12 b1: 15 52 -13 // f1: b0: 7 8 -16 b1: 12 8 -17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 2; @@ -608,10 +604,10 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_offsets) { auto x_offset = 1; auto y_offset = 1; - auto input = memory::allocate(engine, { data_types::i32, format::yxfb,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::i32, format::yxfb,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num)), { tensor(feature(0)) })); std::vector input_vec = { 1, 0, 5, 15, @@ -629,7 +625,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_offsets) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -656,7 +652,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_offsets) { // f1: b0: 5 6 -12 b1: 15 52 -13 // f1: b0: 7 8 -16 b1: 12 8 -17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 2; @@ -673,10 +669,10 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_offsets) { auto x_offset = 1; auto y_offset = 1; - auto input = memory::allocate(engine, { data_types::i64, format::yxfb,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::i64, format::yxfb,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num)), { tensor(feature(0)) })); std::vector input_vec = { 1, 0, 5, 15, @@ -694,7 +690,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_offsets) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F @@ -738,7 +734,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { // Out2: // f0: 4.0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 1; auto feature_num = 4; @@ -752,10 +748,10 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { auto crop_y_size = 1; auto feature_offset_1 = 0; auto feature_offset_2 = 3; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop1", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); topology.add(crop("crop2", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_2)), { tensor(feature(feature_offset_2), spatial(0,0),batch(0)) })); @@ -772,21 +768,21 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { auto outputs = network.execute(); auto output = outputs.at("crop1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out1.size();i++) EXPECT_EQ(output_ptr[i], out1[i]); std::cout << std::endl; auto output_2 = outputs.at("crop2").get_memory(); - auto output_ptr_2 = output_2.pointer(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); for (size_t i = 0; i < out2.size();i++) EXPECT_EQ(output_ptr_2[i], out2[i]); } TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 1; auto feature_num = 4; @@ -798,12 +794,12 @@ TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { auto crop_x_size = 1; auto crop_y_size = 1; auto feature_offset_1 = 0; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1}); - auto padded_layout = input.get_layout().with_padding(in_pad); + auto padded_layout = input->get_layout().with_padding(in_pad); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reorder("input_reorder", "input", padded_layout)); topology.add(crop("crop1", "input_reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); topology.add(reorder("out_reorder", "crop1", format::bfyx, data_types::f32)); @@ -819,7 +815,7 @@ TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { auto outputs = network.execute(); auto output = outputs.at("out_reorder").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out1.size();i++) EXPECT_EQ(output_ptr[i], out1[i]); @@ -854,7 +850,7 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) { // Out2: // f0: 4 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 1; auto feature_num = 4; @@ -868,10 +864,10 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) { auto crop_y_size = 1; auto feature_offset_1 = 0; auto feature_offset_2 = 3; - auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop1", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); topology.add(crop("crop2", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_2)), { tensor(feature(feature_offset_2), spatial(0,0),batch(0)) })); @@ -888,14 +884,13 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) { auto outputs = network.execute(); auto output = outputs.at("crop1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out1.size(); i++) EXPECT_EQ(output_ptr[i], out1[i]); - std::cout << std::endl; auto output_2 = outputs.at("crop2").get_memory(); - auto output_ptr_2 = output_2.pointer(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); for (size_t i = 0; i < out2.size(); i++) EXPECT_EQ(output_ptr_2[i], out2[i]); @@ -930,7 +925,7 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) { // Out2: // f0: 4 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 1; auto feature_num = 4; @@ -944,10 +939,10 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) { auto crop_y_size = 1; auto feature_offset_1 = 0; auto feature_offset_2 = 3; - auto input = memory::allocate(engine, { data_types::i64, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop1", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); topology.add(crop("crop2", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_2)), { tensor(feature(feature_offset_2), spatial(0,0),batch(0)) })); @@ -964,14 +959,13 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) { auto outputs = network.execute(); auto output = outputs.at("crop1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out1.size(); i++) EXPECT_EQ(output_ptr[i], out1[i]); - std::cout << std::endl; auto output_2 = outputs.at("crop2").get_memory(); - auto output_ptr_2 = output_2.pointer(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); for (size_t i = 0; i < out2.size(); i++) EXPECT_EQ(output_ptr_2[i], out2[i]); @@ -1007,8 +1001,8 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { // Out2: // f0: 4.0 // disable memory pool when we want to check optimized out internal results - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, false /*mem_pool*/ }; - engine engine{ cfg }; + engine_configuration cfg{ false, queue_types::out_of_order, std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, false /*mem_pool*/ }; + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, cfg); auto batch_num = 1; auto feature_num = 4; auto x_size = 1; @@ -1020,10 +1014,10 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { auto crop_y_size = 1; auto feature_offset_1 = 0; auto feature_offset_2 = 3; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(crop("crop1", "relu", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); topology.add(crop("crop2", "relu", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_2)), { tensor(feature(feature_offset_2), spatial(0,0),batch(0)) })); @@ -1038,22 +1032,22 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { bo.set_option(build_option::optimize_data(true)); bo.set_option(build_option::debug(true)); //required to have optimized crop despite the fact that it's specified as an output - network network(engine, topology, bo); + network network(*engine, topology, bo); network.set_input_data("input", input); auto outputs = network.execute(); auto output = outputs.at("relu1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // check if crop has been executed in place - auto in_place = outputs.at("crop1").get_memory().is_the_same_buffer(outputs.at("relu").get_memory()); + auto in_place = engine->is_the_same_buffer(*outputs.at("crop1").get_memory(), *outputs.at("relu").get_memory()); EXPECT_TRUE(in_place); for (size_t i = 0; i < out1.size();i++) EXPECT_EQ(output_ptr[i], out1[i]); auto output_2 = outputs.at("relu2").get_memory(); - auto output_ptr_2 = output_2.pointer(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); for (size_t i = 0; i < out2.size();i++) EXPECT_EQ(output_ptr_2[i], out2[i]); @@ -1064,7 +1058,7 @@ TEST(crop_gpu, basic_in3x1x2x2x1_crop_all_bfzyx) { // Input : 6x2x4x3x2 // Output : 3x1x2x2x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -1078,10 +1072,10 @@ TEST(crop_gpu, basic_in3x1x2x2x1_crop_all_bfzyx) { auto crop_y_size = y_size - 1; auto crop_z_size = z_size - 1; - auto input = memory::allocate(engine, { data_types::f32,format::bfzyx,{ batch_num, feature_num, x_size, y_size, z_size } }); + auto input = engine.allocate_memory({ data_types::f32,format::bfzyx,{ batch_num, feature_num, x_size, y_size, z_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size, crop_z_size }, { 0, 0, 0, 0, 0 })); std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); @@ -1094,7 +1088,7 @@ TEST(crop_gpu, basic_in3x1x2x2x1_crop_all_bfzyx) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F for (int z = 0; z < crop_z_size; ++z) { //Z @@ -1115,7 +1109,7 @@ TEST(crop_gpu, basic_in3x1x3x2x2x1_crop_all_bfwzyx) { // Input : 6x2x6x4x3x2 // Output : 3x1x3x2x2x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 2; @@ -1133,10 +1127,10 @@ TEST(crop_gpu, basic_in3x1x3x2x2x1_crop_all_bfwzyx) { tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, w_size, z_size, y_size, x_size }); tensor crop_size = tensor(format::bfwzyx, { crop_batch_num, crop_feature_num, crop_w_size, crop_z_size, crop_y_size, crop_x_size }); - auto input = memory::allocate(engine, { data_types::f32,format::bfwzyx, in_size }); + auto input = engine.allocate_memory({ data_types::f32,format::bfwzyx, in_size }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(crop("crop", "input", crop_size, tensor{ 0 })); VVVVVVF input_rnd = generate_random_6d(batch_num, feature_num, w_size, z_size, y_size, x_size, -10, 10); @@ -1150,7 +1144,7 @@ TEST(crop_gpu, basic_in3x1x3x2x2x1_crop_all_bfwzyx) { auto outputs = network.execute(); auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < crop_batch_num; ++b) { //B for (int f = 0; f < crop_feature_num; ++f) { //F for (int w = 0; w < crop_w_size; ++w) { //W @@ -1176,7 +1170,7 @@ class crop_gpu : public ::testing::TestWithParam {}; TEST_P(crop_gpu, pad_test) { auto p = GetParam(); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = std::get<0>(p); auto feature_num = std::get<1>(p); @@ -1194,10 +1188,10 @@ TEST_P(crop_gpu, pad_test) { auto in_out_format = std::get<3>(p).first; auto crop_format = std::get<3>(p).second; - auto input = memory::allocate(engine, { data_types::f32, in_out_format, { tensor(spatial(x_size, y_size, z_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine.allocate_memory({ data_types::f32, in_out_format, { tensor(spatial(x_size, y_size, z_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reorder("reorder", "input", crop_format, data_types::f32)); topology.add(crop("crop1", "reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size, crop_z_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0,0), batch(0)) })); topology.add(reorder("out", "crop1", in_out_format, data_types::f32)); @@ -1225,7 +1219,7 @@ TEST_P(crop_gpu, pad_test) { auto outputs = network.execute(); auto output = outputs.at("out").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < res.size(); i++) EXPECT_EQ(output_ptr[i], res[i]); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/cum_sum_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/cum_sum_gpu_test.cpp index bb40415d16a..0020c53076a 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/cum_sum_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/cum_sum_gpu_test.cpp @@ -3,21 +3,18 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include "api/cum_sum.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include +#include "test_utils.h" + +#include +#include +#include #include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; template static std::vector cumsum(const std::vector& input, @@ -150,7 +147,7 @@ class cum_sum_gpu : public ::testing::TestWithParam {}; TEST_P(cum_sum_gpu, basic_test) { auto p = GetParam(); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto b = std::get<0>(p); auto f = std::get<1>(p); @@ -169,14 +166,14 @@ TEST_P(cum_sum_gpu, basic_test) { else if (in_out_format == format::bfzyx) size = 5; - auto input = memory::allocate(engine, { data_types::f32, in_out_format, shape }); + auto input = engine.allocate_memory({ data_types::f32, in_out_format, shape }); const int inputSize = b * f * w * z * y * x; auto inputVals = generateVector(inputSize); set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(cum_sum("cum_sum", "Input0", getCumSumAxis(axis, size), exclusive, reverse)); network network(engine, topology); @@ -189,7 +186,7 @@ TEST_P(cum_sum_gpu, basic_test) { EXPECT_EQ(outputs.begin()->first, "cum_sum"); auto output = outputs.at("cum_sum").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); auto answers = cumsum(inputVals, in_out_format, { b, f, w, z, y, x }, axis, exclusive, reverse); ASSERT_EQ(output_ptr.size(), answers.size()); @@ -306,21 +303,22 @@ INSTANTIATE_TEST_CASE_P( ::testing::ValuesIn(variants) ), ); -TEST(cum_sum_gpu_f16, basic_1d) { +// FIXME: This test fails on some driver versions. Looks like UB in impl or driver issue +TEST(cum_sum_gpu_f16, DISABLED_basic_1d) { // Input : 5x1x1x1 // Output : 5x1x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor shape = { 5, 1, 1, 1 }; std::vector inputVals = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f }; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, shape }); set_values(input, vectorCast(inputVals)); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(cum_sum("cum_sum", "Input0")); network network(engine, topology); @@ -333,65 +331,12 @@ TEST(cum_sum_gpu_f16, basic_1d) { EXPECT_EQ(outputs.begin()->first, "cum_sum"); auto output = outputs.at("cum_sum").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); auto answers = cumsum(inputVals, format::bfyx, { 5, 1, 1, 1, 1, 1 }); - ASSERT_EQ(output_ptr.size(), answers.size()); - for (size_t i = 0; i < answers.size(); ++i) - { + ASSERT_EQ(output->count(), answers.size()); + for (size_t i = 0; i < answers.size(); ++i) { EXPECT_TRUE(are_equal(answers[i], float16_to_float32(output_ptr[i]))) << i; } } - -TEST(cum_sum_gpu_f32, perf) { - // Input : 384x160x160x1 - // Output : 384x160x160x1 - - constexpr int batch = 384; - constexpr int features = 160; - constexpr int y = 160; - constexpr int x = 1; - engine_configuration configuration(true); - engine engine(configuration); - tensor shape = { batch, features, y, x }; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); - constexpr int inputSize = batch * features * y * x; - auto inputVals = generateVector(inputSize); - - set_values(input, inputVals); - - topology topology; - topology.add(input_layout("Input0", input.get_layout())); - topology.add(cum_sum("cum_sum", "Input0")); - - network network(engine, topology); - - network.set_input_data("Input0", input); - - auto outputs = network.execute(); - - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "cum_sum"); - - auto output = outputs.at("cum_sum").get_memory(); - auto output_ptr = output.pointer(); - - auto profilingTime = [](const primitive_id& id, const event& ev) { - cldnn::instrumentation::profiling_info cldnnInfo{id, ev.get_profiling_info()}; - long long time = 0; - for (auto &interval : cldnnInfo.intervals) { - using duration_t = std::chrono::duration; - time += std::chrono::duration_cast(interval.value->value()).count(); - } - return time; - }; - - - auto ep = network.get_executed_primitives(); - auto cumSumEP = ep.find("cum_sum"); - ASSERT_NE(cumSumEP, ep.end()) << "Cannot find 'cum_sum' id in executed primitives"; - - auto time = profilingTime(cumSumEP->first, cumSumEP->second); - std::cout << "Time, id: " << cumSumEP->first << ", time: " << time << std::endl; -} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp index a484f20291a..f93009ee639 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp @@ -2,18 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/eltwise.hpp" -#include "api/reorder.hpp" -#include "api/custom_gpu_primitive.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include namespace cldnn { @@ -21,7 +15,7 @@ namespace cldnn } using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) { // Input2 : 2x2x2 @@ -29,28 +23,28 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) { // Output : 2x2x2x2 // Input: - // f0: b0: 1 2 b1: 0 0 - // f0: b0: 3 4 b1: 0.5 -0.5 - // f1: b0: 5 6 b1: 1.5 5.2 - // f1: b0: 7 8 b1: 12 8 + // f0: b0: 1 2 b1: 0 0 + // f0: b0: 3 4 b1: 0.5 -0.5 + // f1: b0: 5 6 b1: 1.5 5.2 + // f1: b0: 7 8 b1: 12 8 // // Input2 - // f0: b0: 0.5 5 b1: 2.5 7 + // f0: b0: 0.5 5 b1: 2.5 7 // f0: b0: 15 -2 b1: 17 6.5 // f1: b0: 0.5 2 b1: 2.5 4 // f1: b0: 8 -0.5 b1: 10 -2.5 // // Output: - // f0: b0: 1.5 7 b1: 2.5 7 - // f0: b0: 18 2 b1: 17.5 6 - // f1: b0: 5.5 8 b1: 4 9.2 - // f1: b0: 15 16.5 b1: 22 16.5 + // f0: b0: 1.5 7 b1: 2.5 7 + // f0: b0: 18 2 b1: 17.5 6 + // f1: b0: 5.5 8 b1: 4 9.2 + // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); std::string kernel_code = R"__krnl( @@ -68,10 +62,10 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) { layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }; std::vector gws = { output_layout.count() }; topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(custom_gpu_primitive( - "user_kernel", + "user_kernel", { "input", "input2" }, { kernel_code }, entry_point, @@ -109,10 +103,9 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) { 18.f,17.5f, 15.f, 22.f, 2.f, 6.f, 7.5f, 5.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - for (int i = 0; i < 16; i++) - { + for (int i = 0; i < 16; i++) { EXPECT_TRUE(are_equal(answers[i], output_ptr[i])); } } @@ -125,28 +118,28 @@ void add_basic_in2x2x2x2_with_reorder() // Output : 2x2x2x2 // Input: - // f0: b0: 1 2 b1: 0 0 - // f0: b0: 3 4 b1: 0.5 -0.5 - // f1: b0: 5 6 b1: 1.5 5.2 - // f1: b0: 7 8 b1: 12 8 + // f0: b0: 1 2 b1: 0 0 + // f0: b0: 3 4 b1: 0.5 -0.5 + // f1: b0: 5 6 b1: 1.5 5.2 + // f1: b0: 7 8 b1: 12 8 // // Input2 - // f0: b0: 0.5 5 b1: 2.5 7 + // f0: b0: 0.5 5 b1: 2.5 7 // f0: b0: 15 -2 b1: 17 6.5 // f1: b0: 0.5 2 b1: 2.5 4 // f1: b0: 8 -0.5 b1: 10 -2.5 // // Output: - // f0: b0: 1.5 7 b1: 2.5 7 - // f0: b0: 18 2 b1: 17.5 6 - // f1: b0: 5.5 8 b1: 4 9.2 - // f1: b0: 15 16.5 b1: 22 16.5 + // f0: b0: 1.5 7 b1: 2.5 7 + // f0: b0: 18 2 b1: 17.5 6 + // f1: b0: 5.5 8 b1: 4 9.2 + // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); std::string data_type_string = "float"; switch (DType) @@ -172,8 +165,8 @@ void add_basic_in2x2x2x2_with_reorder() layout output_layout = { DType, format::yxfb,{ 2, 2, 2, 2 } }; std::vector gws = { output_layout.count() }; topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("to_int1", "input", { DType, format::yxfb,{ 2,2,2,2 } })); topology.add(reorder("to_int2", "input2", { DType, format::yxfb,{ 2,2,2,2 } })); topology.add(custom_gpu_primitive( @@ -216,7 +209,7 @@ void add_basic_in2x2x2x2_with_reorder() 18.f,17.f, 15.f, 22.f, 2.f, 6.f, 8.f, 6.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -238,28 +231,28 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) { // Output : 2x2x2x2 // Input: - // f0: b0: 1 2 b1: 0 0 - // f0: b0: 3 4 b1: 0.5 -0.5 - // f1: b0: 5 6 b1: 1.5 5.2 - // f1: b0: 7 8 b1: 12 8 + // f0: b0: 1 2 b1: 0 0 + // f0: b0: 3 4 b1: 0.5 -0.5 + // f1: b0: 5 6 b1: 1.5 5.2 + // f1: b0: 7 8 b1: 12 8 // // Input2 - // f0: b0: 0.5 5 b1: 2.5 7 + // f0: b0: 0.5 5 b1: 2.5 7 // f0: b0: 15 -2 b1: 17 6.5 // f1: b0: 0.5 2 b1: 2.5 4 // f1: b0: 8 -0.5 b1: 10 -2.5 // // Output: - // f0: b0: 1.5 7 b1: 2.5 7 - // f0: b0: 18 2 b1: 17.5 6 - // f1: b0: 5.5 8 b1: 4 9.2 - // f1: b0: 15 16.5 b1: 22 16.5 + // f0: b0: 1.5 7 b1: 2.5 7 + // f0: b0: 18 2 b1: 17.5 6 + // f1: b0: 5.5 8 b1: 4 9.2 + // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); std::string kernel_code = R"__krnl( @@ -274,8 +267,8 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) { layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }; std::vector gws = { output_layout.count() }; topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); topology.add(custom_gpu_primitive( "user_kernel", @@ -311,13 +304,13 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) { auto output = outputs.at("user_kernel").get_memory(); - float answers[16] = + float answers[16] = { 2.5f, 3.5f, 6.5f, 5.f, 8.f, 8.f, 9.f, 10.2f, 19.f, 18.5f, 16.f, 23.f, 3.f, 7.f, 8.5f, 6.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -331,28 +324,28 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) { // Output : 2x2x2x2 // Input: - // f0: b0: 1 2 b1: 0 0 - // f0: b0: 3 4 b1: 0.5 -0.5 - // f1: b0: 5 6 b1: 1.5 5.2 - // f1: b0: 7 8 b1: 12 8 + // f0: b0: 1 2 b1: 0 0 + // f0: b0: 3 4 b1: 0.5 -0.5 + // f1: b0: 5 6 b1: 1.5 5.2 + // f1: b0: 7 8 b1: 12 8 // // Input2 - // f0: b0: 0.5 5 b1: 2.5 7 + // f0: b0: 0.5 5 b1: 2.5 7 // f0: b0: 15 -2 b1: 17 6.5 // f1: b0: 0.5 2 b1: 2.5 4 // f1: b0: 8 -0.5 b1: 10 -2.5 // // Output: - // f0: b0: 1.5 7 b1: 2.5 7 - // f0: b0: 18 2 b1: 17.5 6 - // f1: b0: 5.5 8 b1: 4 9.2 - // f1: b0: 15 16.5 b1: 22 16.5 + // f0: b0: 1.5 7 b1: 2.5 7 + // f0: b0: 18 2 b1: 17.5 6 + // f1: b0: 5.5 8 b1: 4 9.2 + // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); std::string kernel_code = R"__krnl( @@ -367,8 +360,8 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) { layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }; std::vector gws = { output_layout.count() }; topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(custom_gpu_primitive( "user_kernel", { "input" }, @@ -410,7 +403,7 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) { 19.f, 18.5f, 16.f, 23.f, 3.f, 7.f, 8.5f, 6.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -424,27 +417,27 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2 // Output : 2x2x2x2 // Input: - // f0: b0: 1 2 b1: 0 0 - // f0: b0: 3 4 b1: 0.5 -0.5 - // f1: b0: 5 6 b1: 1.5 5.2 - // f1: b0: 7 8 b1: 12 8 + // f0: b0: 1 2 b1: 0 0 + // f0: b0: 3 4 b1: 0.5 -0.5 + // f1: b0: 5 6 b1: 1.5 5.2 + // f1: b0: 7 8 b1: 12 8 // // Input2 - // f0: b0: 0.5 5 b1: 2.5 7 + // f0: b0: 0.5 5 b1: 2.5 7 // f0: b0: 15 -2 b1: 17 6.5 // f1: b0: 0.5 2 b1: 2.5 4 // f1: b0: 8 -0.5 b1: 10 -2.5 // // Output: - // f0: b0: 1.5 7 b1: 2.5 7 - // f0: b0: 18 2 b1: 17.5 6 - // f1: b0: 5.5 8 b1: 4 9.2 - // f1: b0: 15 16.5 b1: 22 16.5 + // f0: b0: 1.5 7 b1: 2.5 7 + // f0: b0: 18 2 b1: 17.5 6 + // f1: b0: 5.5 8 b1: 4 9.2 + // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); std::string kernel_code1 = R"__krnl( @@ -454,7 +447,7 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2 output[idx] = input0[idx] + SCALAR; } )__krnl"; - + std::string kernel_code2 = R"__krnl( __kernel void add_kernel(const __global float* input0, __global float* output) @@ -468,7 +461,7 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2 layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }; std::vector gws = { output_layout.count() }; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(custom_gpu_primitive( "user_kernel1", { "input" }, @@ -505,20 +498,19 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2 auto output = outputs.at("user_kernel2").get_memory(); - auto output_ptr = output.pointer(); - auto input_ptr = input.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); - for (int i = 0; i < 16; i++) - { + for (int i = 0; i < 16; i++) { EXPECT_TRUE(are_equal(input_ptr[i] + 7, output_ptr[i])); } } TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }); std::string kernel_code = R"__krnl( @@ -533,8 +525,8 @@ TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) { layout output_layout = { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }; std::vector gws = { output_layout.count() }; topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(custom_gpu_primitive( "user_kernel", { "input", "input2" }, @@ -556,7 +548,7 @@ TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) { 0, 2, 0, 2, 55, 75, 20, 4, 15, 17, 80, 10, - 2, 60, 0, 20 + 2, 60, 0, 20 }); network network(engine, topology); @@ -577,10 +569,9 @@ TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) { 6, 160, 8, 200 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - for (int i = 0; i < 16; i++) - { + for (int i = 0; i < 16; i++) { EXPECT_TRUE(are_equal(answers[i], output_ptr[i])); } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp index a5789188fbf..eb82022c8b3 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp @@ -4,26 +4,20 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/deconvolution.hpp" -#include "api/crop.hpp" -#include -#include -#include -#include #include "test_utils/test_utils.h" -#include "test_utils/float16.h" -#include "api/reorder.hpp" -#include "src/include/to_string_utils.h" + +#include +#include +#include +#include +#include namespace cldnn { template<> struct type_to_data_type { static const data_types value = data_types::f16; }; } using namespace cldnn; -using namespace tests; +using namespace ::tests; template struct deconvolution_traits { @@ -147,18 +141,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) { // 18 0.75 7.25 // 23 42.5 15.5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 }) @@ -173,7 +167,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -14.f, 5.f, 2.25f, @@ -208,16 +202,16 @@ TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) { // 18 0.75 7.25 // 23 42.5 15.5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), deconvolution("deconv", "input", { "weights" }) ); @@ -231,7 +225,7 @@ TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -16.f, 3.f, 0.25f, @@ -265,18 +259,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filt // 18 0.75 7.25 // 23 42.5 15.5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1 }) @@ -291,7 +285,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filt auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -14.f, 5.f, 2.25f, @@ -325,18 +319,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) { // Output: // 0.75 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }) @@ -351,7 +345,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_FLOAT_EQ(0.75f, output_ptr[0]); } @@ -376,18 +370,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) { // Output: // 0.75 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) @@ -402,7 +396,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -15.f, 5.f, 0.f, 1.25f, @@ -441,18 +435,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2) { // 0 0 0 // 6 0 -18 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 3, 3 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 1.f, 3.5f, 1.5f, 2.f, 3.f, 4.f, 5.f }); set_values(biases, { 0.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 4, 4 }, { 0, 0, -2, -2 }) @@ -467,7 +461,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { 40.f, 0.f, 1.5f, @@ -503,18 +497,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_stride2_pad1) { // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 1.f, 0.5f, 3.f, 6.f, 2.f, 9.f, 4.f }); set_values(weights, { -2.f, 2.f, 7.f, -0.5f }); set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -529,7 +523,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_stride2_pad1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 0.5f, 4.5f, 22.f, @@ -568,20 +562,20 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) { // f1: 1 8.5 // f1: 17 - 13 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::yxio, { 2, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::yxio, { 2, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f }); set_values(biases, { 1.0f, 5.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -596,7 +590,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 1.f, 4.5f, 8.5f, @@ -631,18 +625,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) { // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { -2.f, 2.f, 7.f, -0.5f }); set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -657,7 +651,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -693,19 +687,19 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_p // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { -2.f, 2.f, 7.f, -0.5f }); set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })), data("weights", weights), data("biases", biases), deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -720,7 +714,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_p auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -760,21 +754,21 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd // f1: 1 8.5 // f1: 17 - 13 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::yxio,{ 2, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::yxio,{ 2, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.f, -2.f, 2.f, 2.f, 7.f, 7.f, -0.5f, -0.5f }); set_values(biases, { 1.0f, 5.0f }); topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 })), data("weights", weights), data("biases", biases), deconvolution("deconv", "reorder", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -789,7 +783,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 1.f, 4.5f, 8.5f, @@ -824,18 +818,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) { // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { -2.f, 2.f, 7.f, -0.5f }); set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -850,7 +844,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -885,11 +879,11 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) { // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); @@ -904,7 +898,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) { set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -919,7 +913,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -961,18 +955,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2) // -3 4.5 -8 -28 // 13 -17 1 -17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { -2.f, 2.f, 7.f, -0.5f, -4.f, 1.f, -9.f, -7.f }); set_values(biases, { 1.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -987,7 +981,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -1003,11 +997,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2) TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) { // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { @@ -1017,7 +1011,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) set_values(biases, { 1.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }) @@ -1032,7 +1026,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -1049,9 +1043,9 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 // Test for depthwise separable optimization, there are 16 joined weights and biases (group 16) // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 2, 2 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f, 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f, @@ -1063,13 +1057,13 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); std::vector weights_vec; std::vector bias_vec; - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); set_values(weights, { @@ -1108,7 +1102,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, @@ -1131,9 +1125,9 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 // Test for depthwise separable optimization, there are 16 joined weights and biases (group 16) // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 2, 2 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f, 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f, @@ -1145,13 +1139,13 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); std::vector weights_vec; std::vector bias_vec; - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(16), batch(2), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 32, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(2), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 32, 1, 1 } }); set_values(weights, { @@ -1197,7 +1191,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f, @@ -1219,11 +1213,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ofm3) { // data is similar as in basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(3), feature(2), spatial(1, 1)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 6, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 4, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(3), feature(2), spatial(1, 1)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 6, 1, 1 } }); set_values(input, { 1.5f, 0.5f, 2.0f, -1.0f @@ -1238,7 +1232,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }) @@ -1253,7 +1247,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f @@ -1284,18 +1278,18 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x1_in1x1x2x2x1_nopad) { // 18 0.75 7.25 // 23 42.5 15.5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oizyx,{ 1, 1, 2, 2, 1 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oizyx,{ 1, 1, 2, 2, 1 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1,1,1,1,1 }) @@ -1310,7 +1304,7 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x1_in1x1x2x2x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -14.f, 5.f, 2.25f, @@ -1410,10 +1404,10 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) { // 1 2 3 3 2 1 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oizyx,{ 1, 1, 3, 3, 3 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oizyx,{ 1, 1, 3, 3, 3 } }); set_values(input, { @@ -1447,7 +1441,7 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) { }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), deconvolution("deconv", "input", { "weights" }) ); @@ -1461,7 +1455,7 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, @@ -1532,10 +1526,10 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad) { // 3.5 1.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oizyx,{ 1, 1, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oizyx,{ 1, 1, 2, 2, 2 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f }); @@ -1543,7 +1537,7 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad) { //set_values(weights, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 }) ); @@ -1556,7 +1550,7 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad) { EXPECT_EQ(outputs.begin()->first, "deconv"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -16.f, 4.f, -1.f, 0.25f, @@ -1607,16 +1601,16 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) { // 12 1.75 // 3 -18 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oizyx,{ 1, 1, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oizyx,{ 1, 1, 2, 2, 2 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 }, { 0, 0, -1, -1, -1 }) ); @@ -1629,7 +1623,7 @@ TEST(deconvolution_f32_fw_gpu, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) { EXPECT_EQ(outputs.begin()->first, "deconv"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { 12.f, 1.75f, 3.f, -18.f, @@ -1649,8 +1643,8 @@ TEST(deconvolution_f16_gpu, basic_k9x9_s2x2_pad4x4) { // Stride : 2x2 // Pad : 4x4 - //const auto& engine = get_test_engine(); - engine engine; + //auto& engine = get_test_engine(); + auto& engine = get_test_engine(); VVVVF input_rnd = generate_random_4d(1, 32, 16, 16, -2, 2); VF input_rnd_vec = flatten_4d(format::bfyx, input_rnd); @@ -1665,11 +1659,11 @@ TEST(deconvolution_f16_gpu, basic_k9x9_s2x2_pad4x4) { for (unsigned int i = 0; i < bias_rnd.size(); i++) bias_f32_rnd.push_back(float(bias_rnd[i])); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 32, 16, 16 } }); - auto weights = memory::allocate(engine, { data_types::f16, format::oiyx, { 1, 32, 9, 9 } }); - auto biases = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 1, 1 } }); - auto weights_f32 = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 32, 9, 9 } }); - auto biases_f32 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 32, 16, 16 } }); + auto weights = engine.allocate_memory({ data_types::f16, format::oiyx, { 1, 32, 9, 9 } }); + auto biases = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 1, 1 } }); + auto weights_f32 = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 32, 9, 9 } }); + auto biases_f32 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, input_rnd_vec); set_values(weights, filter_rnd_vec); @@ -1678,7 +1672,7 @@ TEST(deconvolution_f16_gpu, basic_k9x9_s2x2_pad4x4) { set_values(biases_f32, bias_f32_rnd); topology topology_ref( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -4, -4 }, tensor{ 1, 1, 32, 32 }) @@ -1691,16 +1685,15 @@ TEST(deconvolution_f16_gpu, basic_k9x9_s2x2_pad4x4) { EXPECT_EQ(outputs_ref.size(), size_t(1)); EXPECT_EQ(outputs_ref.begin()->first, "deconv"); auto output_ref_prim = outputs_ref.begin()->second.get_memory(); - auto output_ref_ptr = output_ref_prim.pointer(); + cldnn::mem_lock output_ref_ptr(output_ref_prim, get_test_stream()); std::vector output_vec_ref; - for (unsigned int i = 0; i < output_ref_prim.get_layout().count(); i++) - { + for (unsigned int i = 0; i < output_ref_prim->get_layout().count(); i++) { output_vec_ref.push_back(output_ref_ptr[i]); } topology topology_act( - input_layout("input_act", input.get_layout()), + input_layout("input_act", input->get_layout()), data("weights_f32", weights_f32), data("biases_f32", biases_f32), deconvolution("deconv_act", "input_act", { "weights_f32" }, { "biases_f32" }, { 1, 1, 2, 2 }, { 0, 0, -4, -4 }), @@ -1716,11 +1709,10 @@ TEST(deconvolution_f16_gpu, basic_k9x9_s2x2_pad4x4) { EXPECT_EQ(outputs_act.size(), size_t(1)); EXPECT_EQ(outputs_act.begin()->first, "out"); auto output_act_prim = outputs_act.begin()->second.get_memory(); - auto output_act_ptr = output_act_prim.pointer(); + cldnn::mem_lock output_act_ptr(output_act_prim, get_test_stream()); std::vector output_vec; - for (unsigned int i = 0; i < output_act_prim.get_layout().count(); i++) - { + for (unsigned int i = 0; i < output_act_prim->get_layout().count(); i++) { float x = float_round(output_act_ptr[i]), y = float_round(output_vec_ref[i]); EXPECT_NEAR(x, y, 1e-0f); } @@ -1748,18 +1740,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::yxio, { 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::yxio, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { -2.f, 2.f, 7.f, -0.5f }); set_values(biases, { 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }), @@ -1780,7 +1772,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -1814,11 +1806,11 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad // -3 4.5 0.5 22 // 13 -17 5 -7 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f16, format::oiyx,{ 1, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f16, format::oiyx,{ 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f16, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), FLOAT16(6.f), FLOAT16(9.f), @@ -1830,7 +1822,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad set_values(biases, { FLOAT16(1.0f) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }), @@ -1851,7 +1843,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -1865,11 +1857,11 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad1_group2) { // data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2 - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { @@ -1879,7 +1871,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad set_values(biases, { 1.0f, -1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }), @@ -1900,7 +1892,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -3.f, 4.5f, 13.f, -17.f, @@ -1913,11 +1905,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad } TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad1_b_fs_yx_fsv16_dw) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f }); set_values(weights, { @@ -1927,7 +1919,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad set_values(biases, { 0.0f, 0.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }), @@ -1948,7 +1940,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -4.f, 3.5f, 12.f, -18.f, @@ -1961,18 +1953,18 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad } TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_b_fs_yx_fsv16_dw) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f }); set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f }); set_values(biases, { 2.0f, 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), reorder("input_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32), @@ -1994,7 +1986,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_b_fs_yx_fsv16_dw) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -14.f, 5.f, 2.25f, @@ -2013,11 +2005,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_b_fs_yx_fsv16_dw) { } TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f}); @@ -2026,7 +2018,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) { set_values(biases, { 2.0f, 2.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), reorder("input_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32), @@ -2048,7 +2040,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_FLOAT_EQ(0.75f, output_ptr[0]); EXPECT_FLOAT_EQ(0.75f, output_ptr[1]); @@ -2056,11 +2048,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) { TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv16_dw) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f}); @@ -2069,7 +2061,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv set_values(biases, { 1.0f, 1.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1,1,2,2 }), @@ -2090,7 +2082,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -15.f, 5.f, 0.f, 1.25f, @@ -2112,11 +2104,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv } TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv16_dw) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(3, 3)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(3, 3)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f}); @@ -2125,7 +2117,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 set_values(biases, { 0.0f, 0.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 4, 4 }, { 0, 0, -2, -2 }), @@ -2146,7 +2138,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { 40.f, 0.f, 1.5f, @@ -2165,11 +2157,11 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 } TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv16_dw_batch2) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(3, 3)) }); - auto biases = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(3, 3)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f, @@ -2181,7 +2173,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 set_values(biases, { 0.0f, 0.0f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 4, 4 }, { 0, 0, -2, -2 }), @@ -2202,7 +2194,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { 40.f, 0.f, 1.5f, @@ -2250,10 +2242,10 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid // 3.5 1.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 32, 1, 2, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oizyx,{ 1, 1, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 32, 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oizyx,{ 1, 1, 2, 2, 2 } }); std::vector input_single_batch = { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 6.f, 9.f }; std::vector input_batched; @@ -2267,7 +2259,7 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f, -2.0f, 0.5f, 3.5f, 1.5f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), deconvolution("deconv", "input", { "weights" }, { 1,1,2,2,2 }, { 0, 0, -1, -1, -1 }), reorder("out", "deconv", format::bfzyx, data_types::f32) @@ -2286,7 +2278,7 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid EXPECT_EQ(outputs.begin()->first, "out"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { 12.f, 1.75f, 3.f, -18.f, @@ -2301,11 +2293,11 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid } TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1_pad1_replace_to_conv) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); - auto weights = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); - auto biases = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f16, format::bfyx,{ 1, 2, 1, 1 } }); set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), FLOAT16(6.f), FLOAT16(9.f), FLOAT16(1.f), FLOAT16(3.f), FLOAT16(2.f), FLOAT16(4.f) @@ -2317,7 +2309,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1_pad set_values(biases, { FLOAT16(1.0f), FLOAT16(-1.0f) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", format::fs_b_yx_fsv32, data_types::f16), data("weights", weights), data("biases", biases), @@ -2337,7 +2329,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1_pad auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); std::vector expected_output_vec = { -15.f, 16.f, 2.f, 45.f, -5.5f, 18.75f, 43.f, 61.f, -3.5f, @@ -2345,7 +2337,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1_pad -1.f, -3.f, 7.f, 4.f, 17.5f, 7.5f, 15.f, 28.f, -1.f, -5.f, -12.f, 2.f, -18.f, -49.f, -18.f, -19.f, -51.f, -29.f, }; - ASSERT_EQ(expected_output_vec.size(), output_prim.count()); + ASSERT_EQ(expected_output_vec.size(), output_prim->count()); for (size_t i = 0; i < expected_output_vec.size(); i++) { EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]) << " index=" << i; @@ -2449,8 +2441,8 @@ template class deconvolution_random_test_base { public: template - void set_memory(cldnn::memory& mem, const VVVVVF& data) { - auto ptr = mem.pointer(); + void set_memory(cldnn::memory::ptr mem, const VVVVVF& data) { + cldnn::mem_lock ptr(mem, get_test_stream()); auto b = data.size(); auto f = data[0].size(); @@ -2464,7 +2456,7 @@ public: for (size_t yi = 0; yi < y; ++yi) { for (size_t xi = 0; xi < x; ++xi) { auto coords = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0)); - auto offset = mem.get_layout().get_linear_offset(coords); + auto offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = data[bi][fi][zi][yi][xi]; } } @@ -2474,8 +2466,8 @@ public: } template - void set_memory_weights(cldnn::memory& mem, const VVVVVVF& data) { - auto ptr = mem.pointer(); + void set_memory_weights(cldnn::memory::ptr mem, const VVVVVVF& data) { + cldnn::mem_lock ptr(mem, get_test_stream()); auto g = data.size(); auto b = data[0].size(); @@ -2491,7 +2483,7 @@ public: for (size_t yi = 0; yi < y; ++yi) { for (size_t xi = 0; xi < x; ++xi) { auto coords = cldnn::tensor(group(gi), batch(bi), feature(fi), spatial(xi, yi, zi, 0)); - auto offset = mem.get_layout().get_linear_offset(coords); + auto offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = data[gi][bi][fi][zi][yi][xi]; } } @@ -2526,7 +2518,7 @@ public: type_test_ranges::max); } - void run(cldnn::engine eng, const deconvolution_random_test_params& params, cldnn::build_options build_opts) { + void run(cldnn::engine& eng, const deconvolution_random_test_params& params, cldnn::build_options build_opts) { uint32_t groups = params.weights_size.group[0]; size_t ifm = params.weights_size.feature[0]; size_t ofm = params.weights_size.batch[0]; @@ -2537,8 +2529,8 @@ public: auto in_layout = cldnn::layout(cldnn::type_to_data_type::value, params.input_format, params.input_size); auto wei_layout = cldnn::layout(cldnn::type_to_data_type::value, params.weights_format, params.weights_size); - auto wei_mem = cldnn::memory::allocate(eng, wei_layout); - auto in_mem = cldnn::memory::allocate(eng, in_layout); + auto wei_mem = eng.allocate_memory(wei_layout); + auto in_mem = eng.allocate_memory(in_layout); this->set_memory_weights(wei_mem, weights_data); this->set_memory(in_mem, input_data); @@ -2553,7 +2545,7 @@ public: if (params.with_bias) { auto bias_size = cldnn::tensor(feature(params.weights_size.batch[0] * params.weights_size.group[0])); auto bias_lay = cldnn::layout(cldnn::type_to_data_type::value, cldnn::format::bfyx, bias_size); - auto bias_mem = cldnn::memory::allocate(eng, bias_lay); + auto bias_mem = eng.allocate_memory(bias_lay); bias_data = generate_random_1d(bias_lay.size.feature[0], -1, 1); set_values(bias_mem, bias_data); topo.add(cldnn::data("bias", bias_mem)); @@ -2581,10 +2573,10 @@ public: // Compare results { - auto ptr = out_mem.pointer(); + cldnn::mem_lock ptr(out_mem, get_test_stream()); - auto b = static_cast(out_mem.get_layout().size.batch[0]); - auto of = static_cast(out_mem.get_layout().size.feature[0]); + auto b = static_cast(out_mem->get_layout().size.batch[0]); + auto of = static_cast(out_mem->get_layout().size.feature[0]); for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < of; ++fi) { @@ -2597,16 +2589,16 @@ public: params.input_offset, group * ifm); - ASSERT_EQ(reference.size(), out_mem.get_layout().size.spatial[2]); - ASSERT_EQ(reference[0].size(), out_mem.get_layout().size.spatial[1]); - ASSERT_EQ(reference[0][0].size(), out_mem.get_layout().size.spatial[0]); + ASSERT_EQ(reference.size(), out_mem->get_layout().size.spatial[2]); + ASSERT_EQ(reference[0].size(), out_mem->get_layout().size.spatial[1]); + ASSERT_EQ(reference[0][0].size(), out_mem->get_layout().size.spatial[0]); for (size_t zi = 0; zi < reference.size(); zi++) { for (size_t yi = 0; yi < reference[0].size(); yi++) { for (size_t xi = 0; xi < reference[0][0].size(); xi++) { auto ref_val = reference[zi][yi][xi]; auto out_coords = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0)); - auto out_offset = out_mem.get_layout().get_linear_offset(out_coords); + auto out_offset = out_mem->get_layout().get_linear_offset(out_coords); auto out_val = ptr[out_offset]; TYPED_ASSERT_EQ(ref_val, out_val) << "at b=" << bi << ", f=" << fi << ", z=" << zi << ", y=" << yi << ", x=" << xi << std::endl @@ -2626,7 +2618,6 @@ public: class deconvolution_random_test : public testing::TestWithParam { protected: void SetUp() override { - eng = get_test_engine(); build_opts.set_option(cldnn::build_option::optimize_data(true)); } @@ -2650,7 +2641,6 @@ protected: } } - cldnn::engine eng; cldnn::build_options build_opts; private: @@ -2658,7 +2648,7 @@ private: void run_typed() { auto& params = GetParam(); deconvolution_random_test_base test; - test.run(eng, params, build_opts); + test.run(get_test_engine(), params, build_opts); } template diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp index 6af2651aa2f..116d6ba21f4 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp @@ -4,23 +4,19 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/concatenation.hpp" -#include "api/convolution.hpp" -#include "api/data.hpp" -#include "api/eltwise.hpp" -#include "api/fully_connected.hpp" -#include "api/pooling.hpp" -#include "api/crop.hpp" -#include "api/resample.hpp" -#include "api/reshape.hpp" -#include -#include -#include #include "test_utils/test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + using namespace cldnn; using namespace tests; @@ -59,16 +55,16 @@ TEST(depth_concatenate_f32_gpu, test01) { // 0 -0.2 :f4 // - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, {data_types::f32, format::yxfb, {2, 2, 1, 1}}); - auto input2 = memory::allocate(engine, {data_types::f32, format::yxfb, {2, 3, 1, 1}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::yxfb, {2, 2, 1, 1}}); + auto input2 = engine.allocate_memory({data_types::f32, format::yxfb, {2, 3, 1, 1}}); set_values(input1, {0.5f, 0.7f, 0.2f, 0.4f}); set_values(input2, {1.0f, 0.1f, 0.3f, -0.5f, 0.0f, -0.2f}); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(concatenation("depth1", {"input1", "input2"}, concatenation::along_f)); network network(engine, topology); @@ -82,7 +78,7 @@ TEST(depth_concatenate_f32_gpu, test01) { auto output = outputs.at("depth1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_FLOAT_EQ(0.5f, output_ptr[0]); EXPECT_FLOAT_EQ(0.7f, output_ptr[1]); EXPECT_FLOAT_EQ(0.2f, output_ptr[2]); @@ -118,16 +114,16 @@ void concat_basic_with_reorder() { // 0 0 :f4 // - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, {data_types::f32, format::yxfb, {2, 2, 1, 1}}); - auto input2 = memory::allocate(engine, {data_types::f32, format::yxfb, {2, 3, 1, 1}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::yxfb, {2, 2, 1, 1}}); + auto input2 = engine.allocate_memory({data_types::f32, format::yxfb, {2, 3, 1, 1}}); auto outs = {3.0f, 4.0f, 0.0f, 1.0f, 1.0f, 4.0f, -4.0f, -8.0f, 0.0f, 0.0f}; set_values(input1, {2.5f, 3.7f, 0.2f, 1.4f}); set_values(input2, {1.0f, 4.1f, -4.3f, -7.5f, 0.0f, -0.2f}); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("to_int1", "input1", {DType, format::yxfb, {2, 2, 1, 1}})); topology.add(reorder("to_int2", "input2", {DType, format::yxfb, {2, 3, 1, 1}})); topology.add(concatenation("depth1", {"to_int1", "to_int2"}, concatenation::along_f)); @@ -144,7 +140,7 @@ void concat_basic_with_reorder() { auto output = outputs.at("to_float").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int ptr_cntr = 0; for (const auto& ref : outs) { EXPECT_FLOAT_EQ(ref, output_ptr[ptr_cntr++]); @@ -194,19 +190,19 @@ TEST(depth_concatenate_f32_gpu, test02) { // 0 -0.2 :f7 // - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, {data_types::f32, format::yxfb, {2, 2, 1, 1}}); - auto input2 = memory::allocate(engine, {data_types::f32, format::yxfb, {2, 3, 1, 1}}); - auto input3 = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 3, 1, 1}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::yxfb, {2, 2, 1, 1}}); + auto input2 = engine.allocate_memory({data_types::f32, format::yxfb, {2, 3, 1, 1}}); + auto input3 = engine.allocate_memory({data_types::f32, format::bfyx, {2, 3, 1, 1}}); set_values(input1, {0.5f, 0.7f, 0.2f, 0.4f}); set_values(input2, {1.0f, 0.1f, 0.3f, -0.5f, 0.0f, -0.2f}); set_values(input3, {1.0f, 0.3f, 0.0f, 0.1f, -0.5f, -0.2f}); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input3.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input3->get_layout())); topology.add(concatenation("depth1", {"input1", "input2", "input3"}, concatenation::along_f)); network network(engine, topology); @@ -221,7 +217,7 @@ TEST(depth_concatenate_f32_gpu, test02) { auto output = outputs.at("depth1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_FLOAT_EQ(0.5f, output_ptr[0]); EXPECT_FLOAT_EQ(0.7f, output_ptr[1]); EXPECT_FLOAT_EQ(0.2f, output_ptr[2]); @@ -241,14 +237,14 @@ TEST(depth_concatenate_f32_gpu, test02) { } TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool) { - engine engine; - auto input1 = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 2, 2}}); - auto weights = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 2, 1}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 2, 2}}); + auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 2, 1}}); set_values(input1, {16.0f, 32.0f, 128.0f, 256.0f}); set_values(weights, {.1f, .2f}); topology topology; - topology.add(input_layout("input1", input1.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); topology.add(pooling("pool1", "input1", cldnn::pooling_mode::max, {1, 1, 2, 1}, /*kernel*/ @@ -267,7 +263,7 @@ TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool) { auto outputs = network.execute({}); auto output = outputs.at("conv").get_memory(); std::vector out_ref = {6.4f, 8.f, 51.2f, 64.f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { EXPECT_NEAR(output_ptr[i], out_ref[i], 1e-3); } @@ -278,13 +274,13 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) { // Despite having concatenations one after another and connected to different non padded activation primitives, // graph should remove all concatenations from execution. - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 2, 2, 1}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 2, 2, 1}}); set_values(input1, {16.0f, 32.0f, 128.0f, 256.0f}); topology topology; - topology.add(input_layout("input1", input1.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); topology.add(activation("relu1", "input1", activation_func::relu)); topology.add(activation("relu2", "relu1", activation_func::sqrt)); topology.add(concatenation("depth1", {"relu2", "relu1"}, concatenation::along_f)); @@ -303,7 +299,7 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) { auto outputs = network.execute({}); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); auto executed_primitives = network.get_executed_primitives(); EXPECT_TRUE(executed_primitives.count("depth1") == 0); @@ -331,9 +327,9 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) { TEST(depth_concatenate_f32_gpu, test04_fused_relu) { // 2 inputs of size 3x10x10 concatenated on f axis with fused relu - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 10, 10}}); - auto input2 = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 10, 10}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 10, 10}}); + auto input2 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 10, 10}}); std::vector input1_vec = generate_random_input(1, 3, 10, 10, -10, 10); set_values(input1, input1_vec); @@ -341,8 +337,8 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) { set_values(input2, input2_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(concatenation("depth1", {"input1", "input2"}, concatenation::along_f)); topology.add(activation("relu1", "depth1", activation_func::relu)); @@ -359,7 +355,7 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) { auto output = outputs.at("relu1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); unsigned int input_element_count = 300; for (unsigned int i = 0; i < 600; i++) { if (i < input_element_count) @@ -372,9 +368,9 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) { TEST(depth_concatenate_f32_gpu, test05_different_formats) { // 2 inputs of size 3x2x2 concatenated on f axis - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 2, 2}}); - auto input2 = memory::allocate(engine, {data_types::f32, format::yxfb, {1, 3, 2, 2}}); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 2, 2}}); + auto input2 = engine.allocate_memory({data_types::f32, format::yxfb, {1, 3, 2, 2}}); set_values(input1, {1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, @@ -393,8 +389,8 @@ TEST(depth_concatenate_f32_gpu, test05_different_formats) { -3.0f, -3.0f, -3.0f, -3.0f}; topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reshape("reshape1", "input1", {1, 3, 2, 2})); topology.add(reshape("reshape2", "input2", {1, 3, 2, 2})); topology.add(concatenation("depth1", {"reshape1", "reshape2"}, concatenation::along_f)); @@ -412,7 +408,7 @@ TEST(depth_concatenate_f32_gpu, test05_different_formats) { EXPECT_EQ(outputs.begin()->first, "output"); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int cntr = 0; for (float val : output_ptr) { EXPECT_EQ(val, out_ref[cntr++]); @@ -429,16 +425,16 @@ TEST(depth_concatenate_f32_gpu, test06_padded_input) { const int32_t input_f = 32; const int32_t output_f = 3 * input_f; - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); - auto input2 = memory::allocate(engine, { data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({ data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); + auto input2 = engine.allocate_memory({ data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); auto input1_data = generate_random_4d(1, input_f, 1, 1, -1, 1); auto input2_data = generate_random_4d(1, input_f, 1, 1, -1, 1); set_values(input1, flatten_4d(format::bfyx, input1_data)); set_values(input2, flatten_4d(format::bfyx, input2_data)); - auto weights = memory::allocate(engine, { data_types::f16, format::oiyx, {input_f, input_f, 3, 3} }); + auto weights = engine.allocate_memory({ data_types::f16, format::oiyx, {input_f, input_f, 3, 3} }); // Construct weights for convolution that just double input values. VVVVF weights_data; weights_data.resize(input_f); @@ -449,8 +445,8 @@ TEST(depth_concatenate_f32_gpu, test06_padded_input) { set_values(weights, flatten_4d(format::bfyx, weights_data)); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(activation("actv1", "input1", activation_func::linear, { 0.75f, 0.0f })); topology.add(activation("actv2", "input2", activation_func::linear, { 0.5f, 0.0f })); topology.add(data("weights", weights)); @@ -482,8 +478,8 @@ TEST(depth_concatenate_f32_gpu, test06_padded_input) { } auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); - ASSERT_EQ(output.count(), output_f); + cldnn::mem_lock output_ptr(output, get_test_stream()); + ASSERT_EQ(output->count(), output_f); for (size_t i = 0; i < output_f; ++i) { auto& val = output_ptr[i]; float ref; @@ -506,16 +502,16 @@ TEST(depth_concatenate_f32_gpu, test07_padded_output) { const int32_t input_f = 32; const int32_t output_f = 2 * input_f; - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); - auto input2 = memory::allocate(engine, { data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({ data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); + auto input2 = engine.allocate_memory({ data_types::f16, format::fs_b_yx_fsv32, {1, input_f, 1, 1} }); auto input1_data = generate_random_4d(1, input_f, 1, 1, -1, 1); auto input2_data = generate_random_4d(1, input_f, 1, 1, -1, 1); set_values(input1, flatten_4d(format::bfyx, input1_data)); set_values(input2, flatten_4d(format::bfyx, input2_data)); - auto weights = memory::allocate(engine, { data_types::f16, format::oiyx, {output_f, output_f, 3, 3} }); + auto weights = engine.allocate_memory({ data_types::f16, format::oiyx, {output_f, output_f, 3, 3} }); // Construct weights for convolution that just double input values. VVVVF weights_data; weights_data.resize(output_f); @@ -526,8 +522,8 @@ TEST(depth_concatenate_f32_gpu, test07_padded_output) { set_values(weights, flatten_4d(format::bfyx, weights_data)); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(activation("actv1", "input1", activation_func::linear, { 0.75f, 0.0f })); topology.add(activation("actv2", "input2", activation_func::linear, { 0.5f, 0.0f })); topology.add(concatenation("depth1", { "actv1", "actv2" }, concatenation::along_f)); @@ -557,8 +553,8 @@ TEST(depth_concatenate_f32_gpu, test07_padded_output) { } auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); - ASSERT_EQ(output.count(), output_f); + cldnn::mem_lock output_ptr(output, get_test_stream()); + ASSERT_EQ(output->count(), output_f); for (size_t i = 0; i < output_f; ++i) { auto& val = output_ptr[i]; float ref; @@ -579,9 +575,9 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { const int32_t input_f = 16; const int32_t output_f = 2 * input_f; - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, {1, input_f, 1, 1} }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, {1, input_f, 1, 1} }); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, {1, input_f, 1, 1} }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, {1, input_f, 1, 1} }); auto input1_data = generate_random_4d(1, input_f, 1, 1, -1, 1); auto input2_data = generate_random_4d(1, input_f, 1, 1, -1, 1); @@ -589,8 +585,8 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { set_values(input2, flatten_4d(format::bfyx, input2_data)); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(activation("actv1", "input1", activation_func::linear, { 0.75f, 0.0f })); topology.add(activation("actv2", "input2", activation_func::linear, { 0.5f, 0.0f })); topology.add(concatenation("depth1", { "actv1", "actv2" }, concatenation::along_f)); @@ -610,8 +606,8 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { EXPECT_TRUE(executed_primitives.count("depth1") == 1); auto output = outputs.at("depth1").get_memory(); - auto output_ptr = output.pointer(); - ASSERT_EQ(output.count(), output_f); + cldnn::mem_lock output_ptr(output, get_test_stream()); + ASSERT_EQ(output->count(), output_f); for (size_t i = 0; i < output_f; ++i) { auto& val = output_ptr[i]; float ref; @@ -625,23 +621,23 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { } TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; const int in1_f = 2, in2_f = 1; const int b = 2, x = 2, y = 4; - auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ b, in1_f, y, x } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ b, in2_f, y, x } }); - unsigned input2_start_value = (unsigned)input1.count() + 1; + auto input1 = engine.allocate_memory({ data_types::f32, format::yxfb,{ b, in1_f, y, x } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ b, in2_f, y, x } }); + unsigned input2_start_value = (unsigned)input1->count() + 1; - std::vector in1(input1.count()); - std::vector in2(input2.count()); + std::vector in1(input1->count()); + std::vector in2(input2->count()); - for (unsigned i = 0; i < input1.count(); i++) + for (unsigned i = 0; i < input1->count(); i++) { in1[i] = (float)(i + 1); } - for (unsigned i = 0; i < input2.count(); i++) + for (unsigned i = 0; i < input2->count(); i++) { in2[i] = (float)(i + input2_start_value); } @@ -652,8 +648,8 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { // Special constrution of topology to run buffer fusing optimization // for concatenation with different format inputs topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(concatenation("depth1", { "input1" }, concatenation::along_f)); topology.add(concatenation("depth2", { "input2" }, concatenation::along_f)); // In the step below there will be run of buffer fusing optimization for concatenation with @@ -672,7 +668,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { EXPECT_EQ(outputs.begin()->first, "depth4"); auto output = outputs.at("depth4").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int input1_values_count = in1_f * x; int input2_values_count = in2_f * x; @@ -680,7 +676,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { int input2_batch_offset = x * y; int out_offset = 0; - for (unsigned i = 0; i < input1.count(); i++) + for (unsigned i = 0; i < input1->count(); i++) { int value = i + 1; EXPECT_FLOAT_EQ(float(value), output_ptr[out_offset++]); @@ -692,7 +688,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { } out_offset = input1_values_count; - for (unsigned i = 0; i < input2.count() / b; i++) + for (unsigned i = 0; i < input2->count() / b; i++) { for (unsigned j = 0; j < b; j++) { @@ -709,9 +705,9 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2,4,1,2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2,4,1,2 } }); std::vector values = { 0.1f, 0.2f, 0.3f, 0.4f, @@ -722,7 +718,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { set_values(input1, values); topology topology; - topology.add(input_layout("input1", input1.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); topology.add(reshape("reshape", "input1", tensor(2, 1, 4, 2))); topology.add(concatenation("depth1", { "reshape" }, concatenation::along_f)); topology.add(concatenation("depth2", { "depth1" }, concatenation::along_f)); @@ -738,7 +734,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { auto output = outputs.at("depth2").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -747,13 +743,13 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { } TEST(depth_concatenate_i32_gpu, optimize_data01) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; - auto input = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 1, 1}}); + auto input = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 1, 1}}); topology topology; topology.add( - input_layout("input", input.get_layout())); + input_layout("input", input->get_layout())); topology.add(cldnn::concatenation("int1", {"input"}, cldnn::concatenation::along_f)); topology.add(cldnn::concatenation("result1", {"int1"}, cldnn::concatenation::along_f)); topology.add(cldnn::concatenation("result2", {"int1"}, cldnn::concatenation::along_f)); @@ -768,28 +764,28 @@ TEST(depth_concatenate_i32_gpu, optimize_data01) { auto outputs = network.execute(); for (auto& it : outputs) { - auto output_ptr = it.second.get_memory().pointer(); + cldnn::mem_lock output_ptr(it.second.get_memory(), get_test_stream()); EXPECT_EQ(output_ptr[0], out_data[0]); } } TEST(depth_concatenate_i32_gpu, optimize_data02) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; - auto input1 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); - auto input2 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); - auto input3 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); - auto input4 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input2 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input3 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input4 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; topology.add( - input_layout("input1", input1.get_layout())); + input_layout("input1", input1->get_layout())); topology.add( - input_layout("input2", input2.get_layout())); + input_layout("input2", input2->get_layout())); topology.add( - input_layout("input3", input3.get_layout())); + input_layout("input3", input3->get_layout())); topology.add( - input_layout("input4", input4.get_layout())); + input_layout("input4", input4->get_layout())); topology.add(cldnn::concatenation("concat1", {"input1", "input2"}, cldnn::concatenation::along_x)); topology.add(cldnn::concatenation("concat2", {"input3", "input4"}, cldnn::concatenation::along_x)); @@ -833,7 +829,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data02) { network.set_input_data("input4", input4); auto outputs = network.execute(); - auto output_concat6 = outputs.at("concat6").get_memory().pointer(); + cldnn::mem_lock output_concat6(outputs.at("concat6").get_memory(), get_test_stream()); for (size_t i = 0; i < output_concat6.size(); i++) { EXPECT_EQ(output_concat6[i], c6_data[i]); @@ -841,13 +837,13 @@ TEST(depth_concatenate_i32_gpu, optimize_data02) { } TEST(depth_concatenate_i32_gpu, optimize_data03) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; - auto input1 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; topology.add( - input_layout("input1", input1.get_layout())); + input_layout("input1", input1->get_layout())); topology.add(cldnn::concatenation("concat1", {"input1"}, cldnn::concatenation::along_x)); @@ -873,7 +869,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data03) { auto outputs = network.execute(); for (auto& it : outputs) { - auto output_ptr = it.second.get_memory().pointer(); + cldnn::mem_lock output_ptr(it.second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { EXPECT_EQ(output_ptr[i], output_data[i]); } @@ -881,13 +877,13 @@ TEST(depth_concatenate_i32_gpu, optimize_data03) { } TEST(depth_concatenate_i32_gpu, optimize_data04) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; - auto input1 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; topology.add( - input_layout("input1", input1.get_layout())); + input_layout("input1", input1->get_layout())); topology.add(cldnn::concatenation("concat1", {"input1"}, cldnn::concatenation::along_x)); @@ -913,7 +909,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data04) { auto outputs = network.execute(); for (auto& it : outputs) { - auto output_ptr = it.second.get_memory().pointer(); + cldnn::mem_lock output_ptr(it.second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { EXPECT_EQ(output_ptr[i], output_data[i]); } @@ -921,13 +917,13 @@ TEST(depth_concatenate_i32_gpu, optimize_data04) { } TEST(depth_concatenate_i32_gpu, optimize_data05) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; - auto input1 = memory::allocate(engine, {data_types::i32, format::bfyx, {1, 1, 2, 2}}); + auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; topology.add( - input_layout("input1", input1.get_layout())); + input_layout("input1", input1->get_layout())); topology.add(cldnn::concatenation("concat1", {"input1"}, cldnn::concatenation::along_x)); @@ -953,7 +949,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data05) { auto outputs = network.execute(); - auto output_concat5 = outputs.at("concat5").get_memory().pointer(); + cldnn::mem_lock output_concat5(outputs.at("concat5").get_memory(), get_test_stream()); for (size_t i = 0; i < output_concat5.size(); i++) { EXPECT_EQ(output_concat5[i], c5_data[i]); @@ -961,7 +957,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data05) { } TEST(depth_concatenate_f32_gpu, basic_bfwzyx_along_w) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int b = 2; const int f = 3; const int x = 2; @@ -970,16 +966,16 @@ TEST(depth_concatenate_f32_gpu, basic_bfwzyx_along_w) { const int w = 9; auto input1_layout = layout(data_types::f32, format::bfwzyx, tensor{batch(b), feature(f), spatial(x, y, z, w)}); - auto input1 = memory::allocate(engine, input1_layout); + auto input1 = engine.allocate_memory(input1_layout); auto output_layout = layout(data_types::f32, format::bfwzyx, tensor{batch(b), feature(f), spatial(x, y, z, w * 2)}); topology topology; - topology.add(input_layout("input1", input1.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); topology.add(concatenation("concat", {"input1", "input1"}, concatenation::along_w)); - auto input_data = generate_random_1d(input1.count(), -1, 1); + auto input_data = generate_random_1d(input1->count(), -1, 1); - auto expected_output = std::vector(input1.count() * 2); + auto expected_output = std::vector(input1->count() * 2); for (int bi = 0; bi < b; bi++) for (int fi = 0; fi < f; fi++) @@ -1002,7 +998,7 @@ TEST(depth_concatenate_f32_gpu, basic_bfwzyx_along_w) { auto outputs = network.execute(); - auto output_concat = outputs.at("concat").get_memory().pointer(); + cldnn::mem_lock output_concat(outputs.at("concat").get_memory(), get_test_stream()); ASSERT_EQ(output_concat.size(), expected_output.size()); for (size_t i = 0; i < output_concat.size(); i++) { @@ -1021,19 +1017,19 @@ static network setup_depth_concatatenate_network(const std::vector d assert(dts.size() == ts.size()); const size_t sz = ts.size(); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); topology topology; std::vector input_names; input_names.resize(sz); for (size_t i = 0; i < sz; ++i) { - auto input = memory::allocate(engine, {dts[i], fmt[i], ts[i]}); + auto input = engine.allocate_memory({dts[i], fmt[i], ts[i]}); input_names[i] = "input"; input_names[i] += std::to_string(i); - topology.add(input_layout(input_names[i], input.get_layout())); + topology.add(input_layout(input_names[i], input->get_layout())); } //TODO: ask Uzi if something tests cases where there's missing input_names (nodes not present in the topology, etc.) topology.add(concatenation("depth_concat_node", input_names, concatenation::along_f)); @@ -1194,44 +1190,44 @@ public: } template - memory generate_reference_typed(const std::vector& inputs) { + memory::ptr generate_reference_typed(const std::vector& inputs) { assert(!inputs.empty()); - const int in_b = inputs[0].get_layout().size.batch[0]; - const int in_h = inputs[0].get_layout().size.spatial[1]; - const int in_w = inputs[0].get_layout().size.spatial[0]; + const int in_b = inputs[0]->get_layout().size.batch[0]; + const int in_h = inputs[0]->get_layout().size.spatial[1]; + const int in_w = inputs[0]->get_layout().size.spatial[0]; int out_f = 0; - for (const memory& input : inputs) { - assert(input.get_layout().size.batch[0] == in_b); - assert(input.get_layout().size.spatial[1] == in_h); - assert(input.get_layout().size.spatial[0] == in_w); + for (const memory::ptr& input : inputs) { + assert(input->get_layout().size.batch[0] == in_b); + assert(input->get_layout().size.spatial[1] == in_h); + assert(input->get_layout().size.spatial[0] == in_w); - out_f += input.get_layout().size.feature[0]; + out_f += input->get_layout().size.feature[0]; - assert(input.get_layout().data_type == inputs[0].get_layout().data_type); - assert(input.get_layout().format.value == inputs[0].get_layout().format.value); + assert(input->get_layout().data_type == inputs[0]->get_layout().data_type); + assert(input->get_layout().format.value == inputs[0]->get_layout().format.value); } //Output is bfyx - auto output = memory::allocate(engine, cldnn::layout(inputs[0].get_layout().data_type, cldnn::format::bfyx, tensor(in_b, out_f, in_w, in_h))); - auto out_mem = output.pointer(); + auto output = engine.allocate_memory(cldnn::layout(inputs[0]->get_layout().data_type, cldnn::format::bfyx, tensor(in_b, out_f, in_w, in_h))); + cldnn::mem_lock out_mem(output, get_test_stream()); int out_f_off = 0; - for (const memory& input : inputs) { - const auto input_desc = get_linear_memory_desc(input.get_layout()); - const auto output_desc = get_linear_memory_desc(output.get_layout()); + for (const memory::ptr& input : inputs) { + const auto input_desc = get_linear_memory_desc(input->get_layout()); + const auto output_desc = get_linear_memory_desc(output->get_layout()); - const int in_f = input.get_layout().size.feature[0]; - const auto in_mem = input.pointer(); + const int in_f = input->get_layout().size.feature[0]; + cldnn::mem_lock in_mem(input, get_test_stream()); for (int n = 0; n < in_b; ++n) for (int f = 0; f < in_f; ++f) for (int y = 0; y < in_h; ++y) for (int x = 0; x < in_w; ++x) { - const size_t in_idx = get_linear_index(input.get_layout(), n, f, y, x, input_desc); - const size_t out_idx = get_linear_index(output.get_layout(), n, out_f_off + f, y, x, output_desc); + const size_t in_idx = get_linear_index(input->get_layout(), n, f, y, x, input_desc); + const size_t out_idx = get_linear_index(output->get_layout(), n, out_f_off + f, y, x, output_desc); out_mem[out_idx] = in_mem[in_idx]; } @@ -1242,7 +1238,7 @@ public: return output; } - virtual memory generate_reference(const std::vector& inputs) override { + virtual memory::ptr generate_reference(const std::vector& inputs) override { if (generic_params->data_type == data_types::f32) { return generate_reference_typed(inputs); } else { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp index 6f608e9efe5..8e67dfbb4eb 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp @@ -3,19 +3,16 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include -#include "api/permute.hpp" -#include "api/reorder.hpp" +#include "test_utils.h" + +#include +#include +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -26,9 +23,9 @@ TEST(depth_to_space_fp16_gpu, d1411_bs2) { // Output : 1x1x2x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 4, 1, 1 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 4, 1, 1 } }); size_t block_size = 2; set_values(input1, { @@ -37,7 +34,7 @@ TEST(depth_to_space_fp16_gpu, d1411_bs2) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -49,7 +46,7 @@ TEST(depth_to_space_fp16_gpu, d1411_bs2) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f @@ -66,9 +63,9 @@ TEST(depth_to_space_fp16_gpu, d1421_bs2) { // Output : 1x1x4x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 4, 1, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 4, 1, 2 } }); size_t block_size = 2; set_values(input1, { @@ -79,7 +76,7 @@ TEST(depth_to_space_fp16_gpu, d1421_bs2) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -91,7 +88,7 @@ TEST(depth_to_space_fp16_gpu, d1421_bs2) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 2.0f, 4.0f, 6.0f, 1.0f, 3.0f, 5.0f, 7.0f @@ -108,9 +105,9 @@ TEST(depth_to_space_fp16_gpu, d1933_bs3) { // Output : 1x1x9x9 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 9, 3, 3 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 9, 3, 3 } }); size_t block_size = 3; set_values(input1, { @@ -134,7 +131,7 @@ TEST(depth_to_space_fp16_gpu, d1933_bs3) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -146,7 +143,7 @@ TEST(depth_to_space_fp16_gpu, d1933_bs3) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 9.0f, 18.0f, 1.0f, 10.0f, 19.0f, 2.0f, 11.0f, 20.0f, 27.0f, @@ -171,9 +168,9 @@ TEST(depth_to_space_fp32_gpu, d1411_bs2) { // Output : 1x1x2x2 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); size_t block_size = 2; set_values(input1, { @@ -181,7 +178,7 @@ TEST(depth_to_space_fp32_gpu, d1411_bs2) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -193,7 +190,7 @@ TEST(depth_to_space_fp32_gpu, d1411_bs2) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f @@ -210,9 +207,9 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) { // Output : 1x3x1920x1080 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 12, 960, 540 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 12, 960, 540 } }); size_t block_size = 2; auto random_input = generate_random_4d(1, 12, 540, 960, -1, 1); @@ -220,7 +217,7 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) { set_values(input1, input_rnd_vec); topology topology_act; - topology_act.add(input_layout("Input0", input1.get_layout())); + topology_act.add(input_layout("Input0", input1->get_layout())); topology_act.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -232,12 +229,12 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) { auto outputs = network_act.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr (output, get_test_stream()); std::vector perm = { 0,4,5,2,1,3 }; topology topology_ref; - topology_ref.add(input_layout("Input0", input1.get_layout())); + topology_ref.add(input_layout("Input0", input1->get_layout())); topology_ref.add(reorder("reorder1", "Input0", { data_types::f16, format::bfwzyx, tensor{ batch(1), feature(12), spatial(1, 1, 960, 540) } })); topology_ref.add( @@ -259,9 +256,9 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) { auto outputs_ref = network_ref.execute(); auto output_ref = outputs_ref.at("reshape2").get_memory(); - auto output_ptr_ref = output_ref.pointer(); + cldnn::mem_lock output_ptr_ref(output_ref, get_test_stream()); - for (size_t i = 0; i < output.get_layout().count(); ++i) { + for (size_t i = 0; i < output->get_layout().count(); ++i) { EXPECT_EQ(output_ptr_ref[i], output_ptr[i]); } } @@ -272,9 +269,9 @@ TEST(depth_to_space_fp32_gpu, d1933_bs3) { // Output : 1x1x9x9 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 9, 3, 3 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 9, 3, 3 } }); size_t block_size = 3; set_values(input1, { @@ -290,7 +287,7 @@ TEST(depth_to_space_fp32_gpu, d1933_bs3) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -302,7 +299,7 @@ TEST(depth_to_space_fp32_gpu, d1933_bs3) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 9.0f, 18.0f, 1.0f, 10.0f, 19.0f, 2.0f, 11.0f, 20.0f, 27.0f, @@ -328,9 +325,9 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_blocks_first) { // Output : 1x2x4x4 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 8, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 2, 2 } }); size_t block_size = 2; set_values(input1, { @@ -345,7 +342,7 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_blocks_first) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::blocks_first) ); @@ -357,7 +354,7 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_blocks_first) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 8.0f, 1.0f, 9.0f, 16.0f, 24.0f, 17.0f, 25.0f, @@ -378,9 +375,9 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_depth_first) { // Output : 1x2x4x4 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 8, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 2, 2 } }); size_t block_size = 2; set_values(input1, { @@ -395,7 +392,7 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_depth_first) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( depth_to_space("depth_to_space", "Input0", block_size, depth_to_space_mode::depth_first) ); @@ -407,7 +404,7 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_depth_first) { auto outputs = network.execute(); auto output = outputs.at("depth_to_space").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 4.0f, 1.0f, 5.0f, 8.0f, 12.0f, 9.0f, 13.0f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp index 2112f227ca2..5d85613c1c5 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp @@ -3,21 +3,18 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/detection_output.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" + +#include "test_utils.h" + +#include +#include namespace cldnn { template<> struct type_to_data_type { static const data_types value = data_types::f16; }; } using namespace cldnn; -using namespace tests; +using namespace ::tests; template class detection_output_test : public ::testing::Test { @@ -26,12 +23,12 @@ public: detection_output_test() : nms_threshold(0.1f) {} - void init_buffers(cldnn::memory prior_memory, cldnn::memory confidence_memory, cldnn::memory location_memory, + void init_buffers(cldnn::memory::ptr prior_memory, cldnn::memory::ptr confidence_memory, cldnn::memory::ptr location_memory, bool share_location, bool variance_encoded_in_target = false, int prior_info_size = 4, int prior_coordinates_offset = 0, bool prior_is_normalized = true) { - auto location_ptr = location_memory.pointer(); - auto confidence_ptr = confidence_memory.pointer(); - auto prior_box_ptr = prior_memory.pointer(); + cldnn::mem_lock location_ptr(location_memory, get_test_stream()); + cldnn::mem_lock confidence_ptr(confidence_memory, get_test_stream()); + cldnn::mem_lock prior_box_ptr(prior_memory, get_test_stream()); T* prior_data = prior_box_ptr.data(); T* confidence_data = confidence_ptr.data(); @@ -93,10 +90,10 @@ public: } } - void init_buffer_sort(cldnn::memory input_buff) { - auto input_data_ptr = input_buff.pointer(); + void init_buffer_sort(cldnn::memory::ptr input_buff) { + cldnn::mem_lock input_data_ptr(input_buff, get_test_stream()); - EXPECT_EQ((int)input_buff.count(), 128); + EXPECT_EQ((int)input_buff->count(), 128); T* input_data = input_data_ptr.data(); input_data[0] = 8; @@ -119,8 +116,8 @@ public: input_data[121] = -1; input_data[122] = 0; input_data[123] = 0; input_data[124] = 0; input_data[125] = 0; input_data[126] = 0; input_data[127] = 0; } - void check_results(const memory& output, const int num, const std::string values) { - assert(num < output.get_layout().size.spatial[1]); + void check_results(const memory::ptr output, const int num, const std::string values) { + assert(num < output->get_layout().size.spatial[1]); // Split values to vector of items. std::vector items; @@ -129,13 +126,13 @@ public: EXPECT_EQ((int)items.size(), 7); // Check data. - auto out_ptr = output.pointer(); + cldnn::mem_lock out_ptr(output, get_test_stream()); const T* data = out_ptr.data(); for (int i = 0; i < 2; ++i) { - EXPECT_EQ(static_cast((float)data[num * output.get_layout().size.spatial[0] + i]), atoi(items[i].c_str())); + EXPECT_EQ(static_cast((float)data[num * output->get_layout().size.spatial[0] + i]), atoi(items[i].c_str())); } for (int i = 2; i < 7; ++i) { - EXPECT_TRUE(floating_point_equal(data[num * output.get_layout().size.spatial[0] + i], (T)(float)atof(items[i].c_str()))); + EXPECT_TRUE(floating_point_equal(data[num * output->get_layout().size.spatial[0] + i], (T)(float)atof(items[i].c_str()))); } } @@ -144,15 +141,15 @@ public: const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 150; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k)); @@ -167,10 +164,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); } void setup_two_layers() { @@ -178,15 +175,15 @@ public: const int num_loc_classes = share_location ? 1 : this->num_classes; const int keep_top_k = 150; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output_1", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k)); topology.add(detection_output("detection_output_2", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k)); @@ -205,10 +202,10 @@ public: EXPECT_EQ(it->first, "detection_output_" + std::to_string(i)); - EXPECT_EQ(it->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(it->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(it->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(it->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(it->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(it->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(it->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(it->second.get_memory()->get_layout().size.spatial[0], 7); i++; } } @@ -219,17 +216,17 @@ public: const int keep_top_k = 4; const int background_label_id = 0; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); @@ -244,10 +241,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -267,17 +264,17 @@ public: const int keep_top_k = 1; const int background_label_id = 0; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); @@ -292,10 +289,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -309,17 +306,17 @@ public: const int keep_top_k = 6; const int background_label_id = 0; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); @@ -334,10 +331,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -362,17 +359,17 @@ public: const int top_k = 2; const int background_label_id = 0; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); @@ -387,10 +384,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -406,17 +403,17 @@ public: const int keep_top_k = 10; const int background_label_id = -1; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); @@ -431,10 +428,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -467,17 +464,17 @@ public: const int background_label_id = -1; const int top_k = 2; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); @@ -492,10 +489,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -515,17 +512,17 @@ public: const int keep_top_k = 5; const int background_label_id = 0; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); @@ -540,10 +537,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -566,17 +563,17 @@ public: const int background_label_id = 0; const int top_k = 2; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); @@ -591,10 +588,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -611,18 +608,18 @@ public: const int background_label_id = -1; const int top_k = 2; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); - topology.add(reorder("input_location_padded", "input_location", input_location.get_layout().with_padding(padding{ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } }))); - topology.add(reorder("input_confidence_padded", "input_confidence", input_location.get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } }))); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); + topology.add(reorder("input_location_padded", "input_location", input_location->get_layout().with_padding(padding{ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } }))); + topology.add(reorder("input_confidence_padded", "input_confidence", input_location->get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } }))); topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); @@ -637,10 +634,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); @@ -668,20 +665,20 @@ public: const int32_t prior_coordinates_offset = 1; const bool prior_is_normalized = true; - const auto& engine = get_test_engine(); - cldnn::memory input_location = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); - cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); - cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 1, 1, this->num_priors * prior_info_size } }); + auto& engine = get_test_engine(); + cldnn::memory::ptr input_location = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } }); + cldnn::memory::ptr input_confidence = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } }); + cldnn::memory::ptr input_prior_box = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 1, 1, this->num_priors * prior_info_size } }); this->init_buffers(input_prior_box, input_confidence, input_location, share_location, variance_encoded_in_target, prior_info_size, prior_coordinates_offset, prior_is_normalized); topology topology; - topology.add(input_layout("input_location", input_location.get_layout())); - topology.add(input_layout("input_confidence", input_confidence.get_layout())); - topology.add(input_layout("input_prior_box", input_prior_box.get_layout())); - topology.add(reorder("input_location_padded", "input_location", input_location.get_layout().with_padding(padding{ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } }))); - topology.add(reorder("input_confidence_padded", "input_confidence", input_location.get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } }))); + topology.add(input_layout("input_location", input_location->get_layout())); + topology.add(input_layout("input_confidence", input_confidence->get_layout())); + topology.add(input_layout("input_prior_box", input_prior_box->get_layout())); + topology.add(reorder("input_location_padded", "input_location", input_location->get_layout().with_padding(padding{ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } }))); + topology.add(reorder("input_confidence_padded", "input_confidence", input_location->get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } }))); topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k, @@ -700,10 +697,10 @@ public: EXPECT_EQ(outputs.size(), size_t(1)); EXPECT_EQ(outputs.begin()->first, "detection_output"); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images); - EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.batch[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.feature[0], 1); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[1], keep_top_k * this->num_of_images); + EXPECT_EQ(outputs.begin()->second.get_memory()->get_layout().size.spatial[0], 7); auto output_prim = outputs.begin()->second.get_memory(); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp index 32456c52da3..016f4b98c69 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp @@ -3,16 +3,13 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/eltwise.hpp" -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" + +#include "test_utils.h" + +#include +#include +#include +#include namespace cldnn { @@ -20,7 +17,7 @@ namespace cldnn } using namespace cldnn; -using namespace tests; +using namespace ::tests; template T eltwise_execute(cldnn::eltwise_mode mode, T x, T y) { @@ -88,17 +85,17 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f VF input1_rnd_vec = flatten_4d(test_input_fmt, input1_rnd); VF input2_rnd_vec = flatten_4d(test_input_fmt, input2_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor( input_b, input_f, input_x, input_y ); - auto input1 = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); - auto input2 = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); + auto input1 = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); + auto input2 = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); set_values(input1, input1_rnd_vec); set_values(input2, input2_rnd_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 }))); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(reorder("reorder1", "input1", input1->get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 }))); topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 })); primitive_id out_id = "eltwise"; if (relu) @@ -114,8 +111,8 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f EXPECT_EQ(outputs.begin()->first, out_id); auto output_memory = outputs.at(out_id).get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VVVVF output_cpu = eltwise_reference(input1_rnd, input2_rnd, mode, relu, slope, input_padding_y, input_padding_x, output_padding_y, output_padding_x); EXPECT_EQ(output_layout.format.value, test_input_fmt.value); @@ -176,10 +173,10 @@ TEST(eltwise_gpu_f32, equal_in2_float_out1_int) { // 0, 0, 0, 0, // 0, 1, 0, 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -196,8 +193,8 @@ TEST(eltwise_gpu_f32, equal_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::eq)); network network(engine, topology); @@ -211,7 +208,7 @@ TEST(eltwise_gpu_f32, equal_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 1, 0, 1, 0, 0, 1, 0, @@ -246,10 +243,10 @@ TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) { // 1, 1, 1, 1, // 1, 0, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -266,8 +263,8 @@ TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ne)); network network(engine, topology); @@ -281,7 +278,7 @@ TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 0, 1, 0, 1, 1, 0, 1, @@ -316,10 +313,10 @@ TEST(eltwise_gpu_f32, less_in2_float_out1_int) { // 1, 1, 1, 0, // 0, 0, 0, 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -336,8 +333,8 @@ TEST(eltwise_gpu_f32, less_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::lt)); network network(engine, topology); @@ -351,7 +348,7 @@ TEST(eltwise_gpu_f32, less_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 0, 0, 0, 1, 1, 0, 0, @@ -386,10 +383,10 @@ TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) { // 1, 1, 1, 0, // 0, 1, 0, 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -406,8 +403,8 @@ TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::le)); network network(engine, topology); @@ -421,7 +418,7 @@ TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 1, 0, 1, 1, 1, 1, 0, @@ -456,10 +453,10 @@ TEST(eltwise_gpu_f32, greater_in2_float_out1_int) { // 0, 0, 0, 1, // 1, 0, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -476,8 +473,8 @@ TEST(eltwise_gpu_f32, greater_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::gt)); network network(engine, topology); @@ -491,7 +488,7 @@ TEST(eltwise_gpu_f32, greater_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 0, 1, 0, 0, 0, 0, 1, @@ -526,10 +523,10 @@ TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) { // 0, 0, 0, 1, // 1, 1, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -546,8 +543,8 @@ TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ge)); network network(engine, topology); @@ -561,7 +558,7 @@ TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 1, 1, 1, 0, 0, 1, 1, @@ -596,10 +593,10 @@ TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) { // 1, 1, 1, 1, // 1, 0, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -616,8 +613,8 @@ TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_and)); network network(engine, topology); @@ -631,7 +628,7 @@ TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 1, 1, 1, 1, 0, 1, 1, @@ -673,11 +670,11 @@ TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) { // 1, 1, 1, 1, // 1, 0, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -701,9 +698,9 @@ TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_and)); network network(engine, topology); @@ -718,7 +715,7 @@ TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -753,10 +750,10 @@ TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) { // 1, 1, 1, 1, // 1, 0, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -773,8 +770,8 @@ TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_or)); network network(engine, topology); @@ -788,7 +785,7 @@ TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 1, 1, 1, 1, 1, 1, 1, @@ -830,11 +827,11 @@ TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) { // 1, 1, 1, 1, // 1, 1, 1, 1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -858,9 +855,9 @@ TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_or)); network network(engine, topology); @@ -875,7 +872,7 @@ TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 1, 1, 1, 1, 1, 1, 1, @@ -910,10 +907,10 @@ TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) { // 0, 0, 0, 0, // 0, 0, 0, 0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); set_values(input1, { 1.f, 2.5f, 5.f, 1.5f, @@ -930,8 +927,8 @@ TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) { }); topology topology; - topology.add(input_layout("input", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_xor)); network network(engine, topology); @@ -945,7 +942,7 @@ TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) { EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output = outputs.at("eltwise").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 0, 0, 0, 0, 1, 0, 0, @@ -981,14 +978,14 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) { // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); set_values(input, { @@ -1020,7 +1017,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) { 18.f,17.5f, 15.f, 22.f, 2.f, 6.f, 7.5f, 5.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1029,14 +1026,14 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) { } TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); set_values(input, { @@ -1083,7 +1080,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) { 1.f, 7.f, 3.5f, -3.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1092,14 +1089,14 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) { } TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); set_values(input, { @@ -1152,7 +1149,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) { 2.f, 10.5f, -1.f, -3.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1161,14 +1158,14 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) { } TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); set_values(input, { @@ -1215,7 +1212,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) { 1.f, 7.f, 3.5f, -3.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1224,14 +1221,14 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) { } TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); set_values(input, { @@ -1280,7 +1277,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) { 2.f, 6.f, 3.5f, -3.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1289,14 +1286,14 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) { } TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum)); set_values(input, { @@ -1339,7 +1336,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) { 0.f, 8.5f, 1.5f, -0.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1348,14 +1345,14 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) { } TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::pow)); set_values(input, { @@ -1396,7 +1393,7 @@ TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) { 169.f, 196.f, 225.f, 256.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1405,16 +1402,16 @@ TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) { } TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input3.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input3->get_layout())); topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum)); set_values(input, { @@ -1474,7 +1471,7 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) { -2.f, 6.5f, -0.5f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1483,16 +1480,16 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) { } TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input3.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input3->get_layout())); topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum)); set_values(input, { @@ -1550,7 +1547,7 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) { 0.f, 7.5f, -0.5f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1581,14 +1578,14 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) { // f1: b0: 5 6 b1: 2.5 5.2 // f1: b0: 8 8 b1: 12 8 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::max)); set_values(input, { @@ -1621,7 +1618,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) { 15.f, 17.f, 8.f, 12.f, 6.f, 8.f, 8.f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1653,13 +1650,13 @@ TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) { // f1: b0: -1 8.5 b1: 3.5 -2.5 // - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sub)); set_values(input, { @@ -1692,7 +1689,7 @@ TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) { -12.f, -16.5f, -1.f, 3.5f, -2.f, -8.5f, 8.5f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1710,13 +1707,13 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) { { for (auto& mode : eltwise_ops_to_test) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } })); topology.add(reorder("input2_reorder", "input2", { data_type, format::yxfb,{ 2, 2, 2, 2 } })); topology.add(eltwise("eltwise", { "input_reorder", "input2_reorder" }, mode)); @@ -1747,7 +1744,7 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) { auto output = outputs.at("eltwise_reorder").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1786,13 +1783,13 @@ TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) { { for (auto& mode : eltwise_ops_to_test) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } })); topology.add(eltwise("eltwise", { "input_reorder", "input2" }, mode)); topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } })); @@ -1822,7 +1819,7 @@ TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) { auto output = outputs.at("eltwise_reorder").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1872,12 +1869,12 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) { // f1: b0: 119 80 b1: 96 -18.75 // - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::prod)); set_values(input, { @@ -1910,7 +1907,7 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) { 7.5f, 3.5f, 119.f, 96.0f, 10.0f, -2.0f, 80.f, -18.75f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1942,16 +1939,16 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) { // f1: b0: 5 6 b1: 2.5 5.2 // f1: b0: 8 8 b1: 12 8 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); - topology.add(reorder("reorder2", "input2", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(reorder("reorder2", "input2", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); topology.add(eltwise("eltwise", {"reorder", "reorder2"}, eltwise_mode::max)); set_values(input, { @@ -1984,7 +1981,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) { 15.f, 17.f, 8.f, 12.f, 6.f, 8.f, 8.f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -2016,14 +2013,14 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) { // f1: b0: 7.5 8.25 b1: 11 8.25 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum, {0.5f, 0.5f}, data_types::f32)); set_values(input, { @@ -2055,7 +2052,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) { 9.f, 8.75f, 7.5f, 11.f, 1.f, 3.f, 3.75f, 2.75f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -2064,16 +2061,16 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) { } TEST(eltwise_gpu_f32, coefficients_count_check) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input3.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input3->get_layout())); std::vector coeffs0 = {}; std::vector coeffs1 = {0.5f}; @@ -2124,16 +2121,16 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) { // f1: b0: 8.5 8.75 b1: 11 8.75 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input3.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input3->get_layout())); topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum, {0.5f, 0.5f, 0.5f}, data_types::f32)); set_values(input, { @@ -2172,7 +2169,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) { 12.f, 8.75f, 8.5f, 11.f, 3.5f, 3.5f, 4.25f, 3.25f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -2211,19 +2208,19 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) { // f1: b0: 5 6 b1: 2.5 7 // f1: b0: 9 8 b1: 12 8 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("input3", input3.get_layout())); - topology.add(reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); - topology.add(reorder("reorder2", "input2", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); - topology.add(reorder("reorder3", "input3", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("input3", input3->get_layout())); + topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(reorder("reorder2", "input2", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(reorder("reorder3", "input3", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); topology.add(eltwise("eltwise", {"reorder", "reorder2", "reorder3"}, eltwise_mode::max)); set_values(input, { @@ -2263,7 +2260,7 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) { 15.f, 17.f, 9.f, 12.f, 6.f, 8.f, 8.f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -2300,14 +2297,14 @@ TEST(eltwise_gpu_f32, stride_test_2x2) { // f1: b0: 33 35 b1: 49 51 // f1: b0: 41 43 b1: 57 59 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 4, 4 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", "input", "input2", { {0,0,1,1}, {0,0,2,2} }, eltwise_mode::max)); set_values(input, { @@ -2352,7 +2349,7 @@ TEST(eltwise_gpu_f32, stride_test_2x2) { 9, 25, 41, 57, 11, 27, 43, 59 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -2382,14 +2379,14 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) { // f1: b0: 15 16.5 b1: 22 16.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 1, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { "input", "input2" }, eltwise_mode::sum)); set_values(input, { @@ -2420,7 +2417,7 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) { 3.5f, 3.f, 7.5f, 14.5f, 4.5f, 2.f, 8.5f, 10.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -2452,23 +2449,23 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic) FLOAT16(2211),FLOAT16(2212),FLOAT16(2221),FLOAT16(2222) }; - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; } - auto input1 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); - auto input2 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); + auto input1 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); + auto input2 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); set_values(input1, fp16_bfyx_2x2x2x2_input); set_values(input2, fp16_bfyx_2x2x2x2_input); // GOLDEN BFYX ELTWISE topology golden_topology; - golden_topology.add(input_layout("input1", input1.get_layout())); - golden_topology.add(input_layout("input2", input2.get_layout())); + golden_topology.add(input_layout("input1", input1->get_layout())); + golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::sum)); network golden_network(engine, golden_topology); @@ -2477,12 +2474,12 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic) auto golden_outputs = golden_network.execute(); auto golden_output = golden_outputs.at("eltwise").get_memory(); - auto golden_ptr = golden_output.pointer(); + cldnn::mem_lock golden_ptr(golden_output, get_test_stream()); // GOLDEN BFYX ELTWISE - END // FS_B_YX_FSV32 ELTWISE topology FSV32_topology; - FSV32_topology.add(input_layout("input1", input1.get_layout())); - FSV32_topology.add(input_layout("input2", input2.get_layout())); + FSV32_topology.add(input_layout("input1", input1->get_layout())); + FSV32_topology.add(input_layout("input2", input2->get_layout())); FSV32_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); FSV32_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); FSV32_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum)); @@ -2494,13 +2491,12 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic) auto FSV32_outputs = FSV32_network.execute(); auto FSV32_output = FSV32_outputs.at("reorderOutput").get_memory(); - auto FSV32_ptr = FSV32_output.pointer(); + cldnn::mem_lock FSV32_ptr(FSV32_output, get_test_stream()); // FS_B_YX_FSV32 ELTWISE - END ASSERT_EQ(golden_ptr.size(), FSV32_ptr.size()); - for (size_t i = 0; i < golden_ptr.size(); i++) - { + for (size_t i = 0; i < golden_ptr.size(); i++) { EXPECT_EQ(float(golden_ptr[i]), float(FSV32_ptr[i])); } } @@ -2510,14 +2506,14 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2x2) { // Input2 : 2x2x1x1x2 // Output : 2x2x2x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 2, 2, 1 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 1, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 2, 2, 2, 1 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 2, 1, 1, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { "input", "input2" }, eltwise_mode::sum)); set_values(input, { @@ -2551,7 +2547,7 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2x2) { 8.f, 3.5f, 12.f, 12.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 32; i++) { @@ -2567,23 +2563,23 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) VVVVF input_rnd = generate_random_4d(32, 96, 20, 20, 1, 3); VF fp16_bfyx_32x96x2x2_input = flatten_4d(format::bfyx, input_rnd); - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; } - auto input1 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); - auto input2 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); + auto input1 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); + auto input2 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); set_values(input1, fp16_bfyx_32x96x2x2_input); set_values(input2, fp16_bfyx_32x96x2x2_input); // GOLDEN BFYX ELTWISE topology golden_topology; - golden_topology.add(input_layout("input1", input1.get_layout())); - golden_topology.add(input_layout("input2", input2.get_layout())); + golden_topology.add(input_layout("input1", input1->get_layout())); + golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::sum)); network golden_network(engine, golden_topology); @@ -2592,12 +2588,12 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) auto golden_outputs = golden_network.execute(); auto golden_output = golden_outputs.at("eltwise").get_memory(); - auto golden_ptr = golden_output.pointer(); + cldnn::mem_lock golden_ptr(golden_output, get_test_stream()); // GOLDEN BFYX ELTWISE - END // MIXED INPUT, FS_B_YX_FSV32 OUTPUT topology FS_B_YX_FSV32_OUTPUT_topology; - FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input1", input1.get_layout())); - FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2.get_layout())); + FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input1", input1->get_layout())); + FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2->get_layout())); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::byxf, input_tensor))); FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum)); @@ -2609,12 +2605,12 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) auto FS_B_YX_FSV32_OUTPUT_outputs = FS_B_YX_FSV32_OUTPUT_network.execute(); auto FS_B_YX_FSV32_OUTPUT_output = FS_B_YX_FSV32_OUTPUT_outputs.at("reorderOutput").get_memory(); - auto FS_B_YX_FSV32_OUTPUT_ptr = FS_B_YX_FSV32_OUTPUT_output.pointer(); + cldnn::mem_lock FS_B_YX_FSV32_OUTPUT_ptr(FS_B_YX_FSV32_OUTPUT_output, get_test_stream()); // MIXED INPUT, FS_B_YX_FSV32 OUTPUT - END // MIXED INPUT, BYXF OUTPUT topology BYXF_OUTPUT_topology; - BYXF_OUTPUT_topology.add(input_layout("input1", input1.get_layout())); - BYXF_OUTPUT_topology.add(input_layout("input2", input2.get_layout())); + BYXF_OUTPUT_topology.add(input_layout("input1", input1->get_layout())); + BYXF_OUTPUT_topology.add(input_layout("input2", input2->get_layout())); BYXF_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::byxf, input_tensor))); BYXF_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); BYXF_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum)); @@ -2626,47 +2622,44 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) auto BYXF_OUTPUT_outputs = BYXF_OUTPUT_network.execute(); auto BYXF_OUTPUT_output = BYXF_OUTPUT_outputs.at("reorderOutput").get_memory(); - auto BYXF_OUTPUT_ptr = BYXF_OUTPUT_output.pointer(); + cldnn::mem_lock BYXF_OUTPUT_ptr(BYXF_OUTPUT_output, get_test_stream()); // MIXED INPUT, BYXF OUTPUT - END ASSERT_EQ(golden_ptr.size(), FS_B_YX_FSV32_OUTPUT_ptr.size()); ASSERT_EQ(golden_ptr.size(), BYXF_OUTPUT_ptr.size()); - for (size_t i = 0; i < golden_ptr.size(); i++) - { + for (size_t i = 0; i < golden_ptr.size(); i++) { EXPECT_EQ(float(golden_ptr[i]), float(FS_B_YX_FSV32_OUTPUT_ptr[i])); } - for (size_t i = 0; i < golden_ptr.size(); i++) - { + for (size_t i = 0; i < golden_ptr.size(); i++) { EXPECT_EQ(float(golden_ptr[i]), float(BYXF_OUTPUT_ptr[i])); } } -TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) -{ +TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) { // Inputs are 32x96x2x2 tensor input_tensor(32, 96, 20, 20); VVVVF input_rnd = generate_random_4d(32, 96, 20, 20, 1, 3); VF fp16_bfyx_32x96x2x2_input = flatten_4d(format::bfyx, input_rnd); - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; } - auto input1 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); - auto input2 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); + auto input1 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); + auto input2 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); set_values(input1, fp16_bfyx_32x96x2x2_input); set_values(input2, fp16_bfyx_32x96x2x2_input); // GOLDEN BFYX ELTWISE topology golden_topology; - golden_topology.add(input_layout("input1", input1.get_layout())); - golden_topology.add(input_layout("input2", input2.get_layout())); + golden_topology.add(input_layout("input1", input1->get_layout())); + golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::sum, padding{ {0,0,5,10} , 0 })); network golden_network(engine, golden_topology); @@ -2675,12 +2668,12 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) auto golden_outputs = golden_network.execute(); auto golden_output = golden_outputs.at("eltwise").get_memory(); - auto golden_ptr = golden_output.pointer(); + cldnn::mem_lock golden_ptr(golden_output, get_test_stream()); // GOLDEN BFYX ELTWISE - END // MIXED INPUT, FS_B_YX_FSV32 OUTPUT topology FS_B_YX_FSV32_OUTPUT_topology; - FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input1", input1.get_layout())); - FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2.get_layout())); + FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input1", input1->get_layout())); + FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2->get_layout())); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::byxf, input_tensor))); FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum, padding{ {0,0,5,10} , 0 })); @@ -2693,12 +2686,12 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) auto FS_B_YX_FSV32_OUTPUT_outputs = FS_B_YX_FSV32_OUTPUT_network.execute(); auto FS_B_YX_FSV32_OUTPUT_output = FS_B_YX_FSV32_OUTPUT_outputs.at("reorderOutput").get_memory(); - auto FS_B_YX_FSV32_OUTPUT_ptr = FS_B_YX_FSV32_OUTPUT_output.pointer(); + cldnn::mem_lock FS_B_YX_FSV32_OUTPUT_ptr(FS_B_YX_FSV32_OUTPUT_output, get_test_stream()); // MIXED INPUT, FS_B_YX_FSV32 OUTPUT - END // MIXED INPUT, BYXF OUTPUT topology BYXF_OUTPUT_topology; - BYXF_OUTPUT_topology.add(input_layout("input1", input1.get_layout())); - BYXF_OUTPUT_topology.add(input_layout("input2", input2.get_layout())); + BYXF_OUTPUT_topology.add(input_layout("input1", input1->get_layout())); + BYXF_OUTPUT_topology.add(input_layout("input2", input2->get_layout())); BYXF_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::byxf, input_tensor))); BYXF_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); BYXF_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum, padding{ {0,0,5,10} , 0 })); @@ -2711,18 +2704,16 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) auto BYXF_OUTPUT_outputs = BYXF_OUTPUT_network.execute(); auto BYXF_OUTPUT_output = BYXF_OUTPUT_outputs.at("reorderOutput").get_memory(); - auto BYXF_OUTPUT_ptr = BYXF_OUTPUT_output.pointer(); + cldnn::mem_lock BYXF_OUTPUT_ptr(BYXF_OUTPUT_output, get_test_stream()); // MIXED INPUT, BYXF OUTPUT - END ASSERT_EQ(golden_ptr.size(), FS_B_YX_FSV32_OUTPUT_ptr.size()); ASSERT_EQ(golden_ptr.size(), BYXF_OUTPUT_ptr.size()); - for (size_t i = 0; i < golden_ptr.size(); i++) - { + for (size_t i = 0; i < golden_ptr.size(); i++) { EXPECT_EQ(float(golden_ptr[i]), float(FS_B_YX_FSV32_OUTPUT_ptr[i])); } - for (size_t i = 0; i < golden_ptr.size(); i++) - { + for (size_t i = 0; i < golden_ptr.size(); i++) { EXPECT_EQ(float(golden_ptr[i]), float(BYXF_OUTPUT_ptr[i])); } } @@ -2735,23 +2726,23 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) VVVVF input_rnd = generate_random_4d(32, 96, 20, 20, 1, 3); VF fp16_bfyx_32x96x2x2_input = flatten_4d(format::bfyx, input_rnd); - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; } - auto input1 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); - auto input2 = memory::allocate(engine, { data_types::f16,format::bfyx, input_tensor }); + auto input1 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); + auto input2 = engine.allocate_memory({ data_types::f16,format::bfyx, input_tensor }); set_values(input1, fp16_bfyx_32x96x2x2_input); set_values(input2, fp16_bfyx_32x96x2x2_input); // GOLDEN BFYX ELTWISE topology golden_topology; - golden_topology.add(input_layout("input1", input1.get_layout())); - golden_topology.add(input_layout("input2", input2.get_layout())); + golden_topology.add(input_layout("input1", input1->get_layout())); + golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,10,15},0.0f }))); golden_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,7},0.0f }))); golden_topology.add(eltwise("eltwise", "input1", "input2", eltwise_mode::sum)); @@ -2762,12 +2753,12 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) auto golden_outputs = golden_network.execute(); auto golden_output = golden_outputs.at("eltwise").get_memory(); - auto golden_ptr = golden_output.pointer(); + cldnn::mem_lock golden_ptr(golden_output, get_test_stream()); // GOLDEN BFYX ELTWISE - END // MIXED INPUT, FS_B_YX_FSV32 OUTPUT topology FS_B_YX_FSV32_OUTPUT_topology; - FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input1", input1.get_layout())); - FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2.get_layout())); + FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input1", input1->get_layout())); + FS_B_YX_FSV32_OUTPUT_topology.add(input_layout("input2", input2->get_layout())); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0,0,10,15},0.0f }))); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::byxf, input_tensor, padding{ {0,0,5,7},0.0f }))); FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum)); @@ -2779,12 +2770,12 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) auto FS_B_YX_FSV32_OUTPUT_outputs = FS_B_YX_FSV32_OUTPUT_network.execute(); auto FS_B_YX_FSV32_OUTPUT_output = FS_B_YX_FSV32_OUTPUT_outputs.at("reorderOutput").get_memory(); - auto FS_B_YX_FSV32_OUTPUT_ptr = FS_B_YX_FSV32_OUTPUT_output.pointer(); + cldnn::mem_lock FS_B_YX_FSV32_OUTPUT_ptr(FS_B_YX_FSV32_OUTPUT_output, get_test_stream()); // MIXED INPUT, FS_B_YX_FSV32 OUTPUT - END // MIXED INPUT, BYXF OUTPUT topology BYXF_OUTPUT_topology; - BYXF_OUTPUT_topology.add(input_layout("input1", input1.get_layout())); - BYXF_OUTPUT_topology.add(input_layout("input2", input2.get_layout())); + BYXF_OUTPUT_topology.add(input_layout("input1", input1->get_layout())); + BYXF_OUTPUT_topology.add(input_layout("input2", input2->get_layout())); BYXF_OUTPUT_topology.add(reorder("reorder1", "input1", layout(data_types::f16, format::byxf, input_tensor, padding{ {0,0,10,15},0.0f }))); BYXF_OUTPUT_topology.add(reorder("reorder2", "input2", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0,0,5,7},0.0f }))); BYXF_OUTPUT_topology.add(eltwise("eltwise", "reorder1", "reorder2", eltwise_mode::sum)); @@ -2796,7 +2787,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) auto BYXF_OUTPUT_outputs = BYXF_OUTPUT_network.execute(); auto BYXF_OUTPUT_output = BYXF_OUTPUT_outputs.at("reorderOutput").get_memory(); - auto BYXF_OUTPUT_ptr = BYXF_OUTPUT_output.pointer(); + cldnn::mem_lock BYXF_OUTPUT_ptr(BYXF_OUTPUT_output, get_test_stream()); // MIXED INPUT, BYXF OUTPUT - END ASSERT_EQ(golden_ptr.size(), FS_B_YX_FSV32_OUTPUT_ptr.size()); @@ -2874,17 +2865,17 @@ void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int in VF input1_rnd_vec = flatten_4d(test_input_fmt, input1_rnd); VF input2_rnd_vec = flatten_4d(test_input_fmt, input2_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor( input_b, input_f, input_x, input_y ); - auto input1 = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); - auto input2 = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); + auto input1 = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); + auto input2 = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); set_values(input1, input1_rnd_vec); set_values(input2, input2_rnd_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 }))); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(reorder("reorder1", "input1", input1->get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 }))); topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 })); network network(engine, topology); @@ -2895,8 +2886,8 @@ void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int in EXPECT_EQ(outputs.begin()->first, "eltwise"); auto output_memory = outputs.at("eltwise").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VVVVF output_cpu = eltwise_bool_reference(input1_rnd, input2_rnd, mode, input_padding_y, input_padding_x, output_padding_y, output_padding_x); EXPECT_EQ(output_layout.format.value, test_input_fmt.value); @@ -2978,8 +2969,8 @@ void run_eltwise_generic_test(cldnn::eltwise_mode mode) cldnn::format test_inputs_fmt = cldnn::format::bfyx; std::pair input_size = { 227, 227 }; - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; } @@ -3012,7 +3003,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { const int BATCH = 1; const int in_B = BATCH; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int in_F = 256; @@ -3027,26 +3018,17 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { // Mem initialization // This is user data, no kernels here - auto input1 = memory::allocate(engine, - { data_types::i8, - format::bfyx, - { in_B, in_F, in_X, in_Y } }); + auto input1 = engine.allocate_memory({ data_types::i8, format::bfyx, { in_B, in_F, in_X, in_Y } }); std::vector data_i1(DataGold); for (size_t i = 0; i < data_i1.size(); i++) data_i1[i] = data_i1[i] + 1; set_values(input1, std::move(data_i1)); - auto input2 = memory::allocate(engine, - { data_types::i8, - format::bfyx, - { in_B, in_F, in_X, in_Y } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::bfyx, { in_B, in_F, in_X, in_Y } }); std::vector data_i2(DataGold); for (size_t i = 0; i < data_i2.size(); i++) data_i2[i] = data_i2[i] + 2; set_values(input2, std::move(data_i2)); - auto input3 = memory::allocate(engine, - { data_types::i8, - format::bfyx, - { in_B, in_F, in_X, in_Y } }); + auto input3 = engine.allocate_memory({ data_types::i8, format::bfyx, { in_B, in_F, in_X, in_Y } }); std::vector data_i3(DataGold); for (size_t i = 0; i < data_i3.size(); i++) data_i3[i] = data_i3[i] + 3; set_values(input3, std::move(data_i3)); @@ -3070,9 +3052,9 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { auto actv = activation("eltw_GOLD", eltw, activation_func::relu); // Create a topology - topology.add(input_layout("input1", input1.get_layout()), - input_layout("input2", input2.get_layout()), - input_layout("input3", input3.get_layout()), + topology.add(input_layout("input1", input1->get_layout()), + input_layout("input2", input2->get_layout()), + input_layout("input3", input3->get_layout()), eltw, actv); // Network processing @@ -3086,7 +3068,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { auto searchC = outputs.find("eltw_GOLD"); EXPECT_NE(searchC, outputs.end()); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); vGoldOutput.reserve(output_ptr.size()); for (size_t i = 0; i < output_ptr.size(); i++) vGoldOutput.push_back(output_ptr[i]); @@ -3119,9 +3101,9 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { "reorder3_Swizzelled" }, mode[i]); auto actv = activation("eltw_IMAD", eltw, activation_func::relu); - topology.add(input_layout("input1", input1.get_layout()), - input_layout("input2", input2.get_layout()), - input_layout("input3", input3.get_layout()), + topology.add(input_layout("input1", input1->get_layout()), + input_layout("input2", input2->get_layout()), + input_layout("input3", input3->get_layout()), eltw, actv); // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling @@ -3142,7 +3124,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { auto searchC = outputs.find("reorder_UnSwizzelled"); EXPECT_NE(searchC, outputs.end()); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); vTestOutput.reserve(output_ptr.size()); for (size_t i = 0; i < output_ptr.size(); i++) vTestOutput.push_back(output_ptr[i]); @@ -3162,8 +3144,8 @@ TEST(DISABLED_eltwise_gpu, generic_random) { VF slopes = { 0.0f, -0.0f, -17.19f, 1028.8f, std::numeric_limits::infinity(), -std::numeric_limits::infinity() }; std::vector> input_sizes = { { 100, 100 },{ 227, 227 },{ 400, 600 } }; - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; } @@ -3207,21 +3189,21 @@ struct eltwise_same_input_test_params { struct eltwise_same_input_test : testing::TestWithParam { template - void fill_random_typed(memory& mem, int min, int max, int k) { - auto size = mem.get_layout().size; + void fill_random_typed(memory::ptr mem, int min, int max, int k) { + auto size = mem->get_layout().size; size_t b = size.batch[0]; size_t f = size.feature[0]; size_t x = size.spatial[0]; size_t y = size.spatial[1]; auto data = generate_random_4d(b, f, y, x, min, max, k); - auto ptr = mem.pointer(); + mem_lock ptr{mem, get_test_stream()}; for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { for (size_t xi = 0; xi < x; ++xi) { auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); - auto offset = mem.get_layout().get_linear_offset(coords); + auto offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = data[bi][fi][yi][xi]; } } @@ -3229,8 +3211,8 @@ struct eltwise_same_input_test : testing::TestWithParamget_layout().data_type; switch (dt) { case data_types::f32: fill_random_typed(mem, -127, 127, 2); @@ -3250,16 +3232,16 @@ struct eltwise_same_input_test : testing::TestWithParam - bool compare_outputs(const memory& out_ref, const memory& input_ref) { - auto output_lay = out_ref.get_layout(); - auto opt_output_lay = input_ref.get_layout(); + bool compare_outputs(const memory::ptr out_ref, const memory::ptr input_ref) { + auto output_lay = out_ref->get_layout(); + auto opt_output_lay = input_ref->get_layout(); size_t b = output_lay.size.batch[0]; size_t f = output_lay.size.feature[0]; size_t x = output_lay.size.spatial[0]; size_t y = output_lay.size.spatial[1]; - auto ref_ptr = out_ref.pointer(); - auto input_ptr = input_ref.pointer(); + mem_lock ref_ptr{out_ref, get_test_stream()}; + mem_lock input_ptr{input_ref, get_test_stream()}; for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { @@ -3282,22 +3264,22 @@ struct eltwise_same_input_test : testing::TestWithParamget_layout())); + topo.add(input_layout("input2", input->get_layout())); auto prim = eltwise("eltwise", {"input1", "input2"}, eltwise_mode::sum); topo.add(prim); auto build_ops = build_options(); build_ops.set_option(build_option::outputs({"eltwise"})); - auto net = network(eng, topo, build_ops); + auto net = network(engine, topo, build_ops); net.set_input_data("input1", input); net.set_input_data("input2", input); @@ -3437,21 +3419,21 @@ TEST_P(eltwise_test, fsv16) { VF input1_rnd_vec = flatten_6d(format::bfwzyx, input1_rnd); VF input2_rnd_vec = flatten_6d(format::bfwzyx, input2_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto fmt_pln = input0_size.size() == 4 ? format::bfyx : format::bfzyx; auto fmt_fsv16 = input0_size.size() == 4 ? format::b_fs_yx_fsv16 : format::b_fs_zyx_fsv16; auto in0_size = tensor(fmt_pln, input0_size); auto in1_size = tensor(fmt_pln, input1_size); - auto input1 = memory::allocate(engine, { data_types::f32, fmt_pln, in0_size }); - auto input2 = memory::allocate(engine, { data_types::f32, fmt_pln, in1_size }); + auto input1 = engine.allocate_memory({ data_types::f32, fmt_pln, in0_size }); + auto input2 = engine.allocate_memory({ data_types::f32, fmt_pln, in1_size }); set_values(input1, input1_rnd_vec); set_values(input2, input2_rnd_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("reorder1", "input1", fmt_fsv16, dt)); topology.add(reorder("reorder2", "input2", fmt_fsv16, dt)); topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode)); @@ -3469,7 +3451,7 @@ TEST_P(eltwise_test, fsv16) { EXPECT_EQ(outputs.begin()->first, out_id); auto output_memory = outputs.at(out_id).get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VF output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode); for (size_t i = 0; i < output_cpu_vec.size(); ++i) { @@ -3549,15 +3531,15 @@ TEST_P(eltwise_test_6d, bfwzyx) { auto in0_size = tensor(format::bfwzyx, input0_size); auto in1_size = tensor(format::bfwzyx, input1_size); - const auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfwzyx, in0_size }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfwzyx, in1_size }); + auto& engine = get_test_engine(); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfwzyx, in0_size }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfwzyx, in1_size }); set_values(input1, input1_rnd_vec); set_values(input2, input2_rnd_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("reorder1", "input1", format::bfwzyx, dt)); topology.add(reorder("reorder2", "input2", format::bfwzyx, dt)); topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode)); @@ -3575,7 +3557,7 @@ TEST_P(eltwise_test_6d, bfwzyx) { EXPECT_EQ(outputs.begin()->first, out_id); auto output_memory = outputs.at(out_id).get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VF output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode); for (size_t i = 0; i < output_cpu_vec.size(); ++i) { @@ -3628,21 +3610,21 @@ TEST_P(eltwise_test_mixed_precision, fsv16) { VF input1_rnd_vec = flatten_6d(format::bfwzyx, input1_rnd); VF input2_rnd_vec = flatten_6d(format::bfwzyx, input2_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto fmt_pln = input0_size.size() == 4 ? format::bfyx : format::bfzyx; auto fmt_fsv16 = input0_size.size() == 4 ? format::b_fs_yx_fsv16 : format::b_fs_zyx_fsv16; auto in0_size = tensor(fmt_pln, input0_size); auto in1_size = tensor(fmt_pln, input1_size); - auto input1 = memory::allocate(engine, { data_types::f32, fmt_pln, in0_size }); - auto input2 = memory::allocate(engine, { data_types::i32, fmt_pln, in1_size }); + auto input1 = engine.allocate_memory({ data_types::f32, fmt_pln, in0_size }); + auto input2 = engine.allocate_memory({ data_types::i32, fmt_pln, in1_size }); set_values(input1, input1_rnd_vec); set_values(input2, input2_rnd_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("reorder1", "input1", fmt_fsv16, input0_dt)); topology.add(reorder("reorder2", "input2", fmt_fsv16, input1_dt)); topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode)); @@ -3660,7 +3642,7 @@ TEST_P(eltwise_test_mixed_precision, fsv16) { EXPECT_EQ(outputs.begin()->first, out_id); auto output_memory = outputs.at(out_id).get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VF output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode); for (size_t i = 0; i < output_cpu_vec.size(); ++i) { @@ -3728,18 +3710,18 @@ TEST_P(eltwise_test_mixed_layout, mixed_layout) { VF input1_rnd_vec = flatten_6d(format::bfwzyx, input1_rnd); VF input2_rnd_vec = flatten_6d(format::bfwzyx, input2_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto in0_size = tensor(format::bfyx, input0_size); auto in1_size = tensor(format::bfyx, input1_size); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, in0_size }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, in1_size }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, in0_size }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, in1_size }); set_values(input1, input1_rnd_vec); set_values(input2, input2_rnd_vec); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); topology.add(reorder("reorder1", "input1", format0, data_types::f32)); topology.add(reorder("reorder2", "input2", format1, data_types::f32)); topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode)); @@ -3757,7 +3739,7 @@ TEST_P(eltwise_test_mixed_layout, mixed_layout) { EXPECT_TRUE(network.get_primitive_info("eltwise").find(selected_kernel) != std::string::npos); auto output_memory = outputs.at(out_id).get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VF output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode); for (size_t i = 0; i < output_cpu_vec.size(); ++i) { @@ -3794,21 +3776,21 @@ struct eltwise_random_test_params { struct eltwise_random_test : testing::TestWithParam { template - void fill_random_typed(memory& mem, int min, int max, int k) { - auto size = mem.get_layout().size; + void fill_random_typed(memory::ptr mem, int min, int max, int k) { + auto size = mem->get_layout().size; size_t b = size.batch[0]; size_t f = size.feature[0]; size_t x = size.spatial[0]; size_t y = size.spatial[1]; auto data = generate_random_4d(b, f, y, x, min, max, k); - auto ptr = mem.pointer(); + mem_lock ptr{mem, get_test_stream()}; for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { for (size_t xi = 0; xi < x; ++xi) { auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); - auto offset = mem.get_layout().get_linear_offset(coords); + auto offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = data[bi][fi][yi][xi]; } } @@ -3816,8 +3798,8 @@ struct eltwise_random_test : testing::TestWithParam } } - void fill_random(memory& mem) { - auto dt = mem.get_layout().data_type; + void fill_random(memory::ptr mem) { + auto dt = mem->get_layout().data_type; switch (dt) { case data_types::f32: fill_random_typed(mem, -127, 127, 2); @@ -3837,16 +3819,16 @@ struct eltwise_random_test : testing::TestWithParam } template - bool compare_outputs(const memory& out_ref, const memory& out_opt) { - auto output_lay = out_ref.get_layout(); - auto opt_output_lay = out_opt.get_layout(); + bool compare_outputs(const memory::ptr out_ref, const memory::ptr out_opt) { + auto output_lay = out_ref->get_layout(); + auto opt_output_lay = out_opt->get_layout(); size_t b = output_lay.size.batch[0]; size_t f = output_lay.size.feature[0]; size_t x = output_lay.size.spatial[0]; size_t y = output_lay.size.spatial[1]; - auto ref_ptr = out_ref.pointer(); - auto opt_ptr = out_opt.pointer(); + mem_lock ref_ptr{out_ref, get_test_stream()}; + mem_lock opt_ptr{out_opt, get_test_stream()}; for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { @@ -3869,18 +3851,18 @@ struct eltwise_random_test : testing::TestWithParam } void execute_compare(const eltwise_random_test_params& params, bool check_result) { - auto eng = cldnn::engine(); + auto& engine = get_test_engine(); auto in_layout1 = layout(params.input_type, params.in_format, params.first_input_size); auto in_layout2 = layout(params.input_type, params.in_format_second, params.second_input_size); - auto input1 = memory::allocate(eng, in_layout1); - auto input2 = memory::allocate(eng, in_layout2); + auto input1 = engine.allocate_memory(in_layout1); + auto input2 = engine.allocate_memory(in_layout2); fill_random(input1); fill_random(input2); cldnn::topology topo; - topo.add(input_layout("input1", input1.get_layout())); - topo.add(input_layout("input2", input2.get_layout())); + topo.add(input_layout("input1", input1->get_layout())); + topo.add(input_layout("input2", input2->get_layout())); auto prim = eltwise("eltwise", {"input1", "input2"}, params.mode); topo.add(prim); @@ -3888,26 +3870,23 @@ struct eltwise_random_test : testing::TestWithParam build_ops.set_option(build_option::outputs({"eltwise"})); build_ops.set_option(build_option::force_implementations({ {"eltwise", {params.in_format, "generic_eltwise_ref"}} })); - auto net = network(eng, topo, build_ops); + auto net = network(engine, topo, build_ops); net.set_input_data("input1", input1); net.set_input_data("input2", input2); auto result = net.execute(); auto output = result.at("eltwise").get_memory(); - // Execute optimized eltwise 'eltwise_opt' - auto eng_opt = cldnn::engine(); - cldnn::topology topo_opt; - topo_opt.add(input_layout("input1", input1.get_layout())); - topo_opt.add(input_layout("input2", input2.get_layout())); + topo_opt.add(input_layout("input1", input1->get_layout())); + topo_opt.add(input_layout("input2", input2->get_layout())); auto prim_opt = eltwise("eltwise_opt", {"input1", "input2"}, params.mode); topo_opt.add(prim_opt); auto buildops_opt = build_options(); buildops_opt.set_option(build_option::outputs({"eltwise_opt"})); - auto net_opt = network(eng_opt, topo_opt, buildops_opt); + auto net_opt = network(engine, topo_opt, buildops_opt); net_opt.set_input_data("input1", input1); net_opt.set_input_data("input2", input2); @@ -3970,4 +3949,4 @@ INSTANTIATE_TEST_CASE_P(eltwise_smoke_fsv4, .simple_params(data_types::f16, format::b_fs_yx_fsv4, format::b_fs_yx_fsv4) .simple_params(data_types::i8, format::b_fs_yx_fsv4, format::b_fs_yx_fsv4) .simple_params(data_types::u8, format::b_fs_yx_fsv4, format::b_fs_yx_fsv4) - ), ); \ No newline at end of file + ), ); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/embedding_bag_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/embedding_bag_gpu_test.cpp index 531e290461d..b5be9527e60 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/embedding_bag_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/embedding_bag_gpu_test.cpp @@ -3,17 +3,14 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" + +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -24,11 +21,11 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic) { // per_sample_weights : 3x2 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -51,8 +48,8 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic) { auto type = embedding_bag::packed_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); topology.add(data("Input2", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) @@ -66,7 +63,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.05f, -1.2f, @@ -84,10 +81,10 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic_without_weights) { // indices : 3x2 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -105,8 +102,8 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic_without_weights) { auto type = embedding_bag::packed_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1"}, type, output_shape) ); @@ -119,7 +116,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic_without_weights) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -2.1f, -2.4f, @@ -139,11 +136,11 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim2) { // Output : 3x2x2 // Input values in fp16 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 2, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 2, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); tensor output_shape = {3, 2, 2, 1}; /* @@ -185,8 +182,8 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim2) { auto type = embedding_bag::packed_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); topology.add(data("Input2", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) @@ -200,7 +197,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim2) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); /* * [ 3 @@ -232,11 +229,11 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim3) { // per_sample_weights : 3x2 // Output : 3x2x3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 3, 2 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 3, 2 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); tensor output_shape = {3, 2, 3, 2}; /* @@ -308,8 +305,8 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim3) { auto type = embedding_bag::packed_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); topology.add(data("Input2", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) @@ -323,7 +320,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim3) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); /* * [ 3 @@ -375,12 +372,12 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic) { // default_index : 1x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto offsets = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto offsets = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -402,9 +399,9 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic) { auto type = embedding_bag::offsets_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", offsets.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", offsets->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 0) @@ -418,7 +415,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.05f, -1.2f, @@ -439,12 +436,12 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_first_empty) { // default_index : 1x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto offsets = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto offsets = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -466,9 +463,9 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_first_empty) { auto type = embedding_bag::offsets_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", offsets.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", offsets->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 2) @@ -483,7 +480,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_first_empty) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.9f, -1.8f, @@ -504,12 +501,12 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_last_empty) { // default_index : 1x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto offsets = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto offsets = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -531,9 +528,9 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_last_empty) { auto type = embedding_bag::offsets_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", offsets.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", offsets->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 2) @@ -548,7 +545,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_last_empty) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.05f, -1.2f, @@ -567,11 +564,11 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_without_weights_and_def_index) { // offsets : 3x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto offsets = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto offsets = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -590,9 +587,9 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_without_weights_and_def_index) { auto type = embedding_bag::offsets_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", offsets.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", offsets->get_layout())); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) ); @@ -606,7 +603,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_without_weights_and_def_index) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -2.1f, -2.4f, @@ -627,12 +624,12 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_dim3) { // default_index : 1x1 // Output : 3x2x3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 3, 2 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto offsets = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 3, 2 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto offsets = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 3, 2}; /* @@ -704,9 +701,9 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_dim3) { auto type = embedding_bag::offsets_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", offsets.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", offsets->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 0) @@ -721,7 +718,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_dim3) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); /* * [ 3 @@ -773,12 +770,12 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) { // default_index : 1x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto segment_ids = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -800,9 +797,9 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) { auto type = embedding_bag::segments_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", segment_ids.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", segment_ids->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 0) @@ -817,7 +814,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.05f, -1.2f, @@ -838,12 +835,12 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) { // default_index : 1x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto segment_ids = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -865,9 +862,9 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) { auto type = embedding_bag::segments_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", segment_ids.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", segment_ids->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 2) @@ -882,7 +879,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.9f, -1.8f, @@ -903,12 +900,12 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) { // default_index : 1x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto segment_ids = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -930,9 +927,9 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) { auto type = embedding_bag::segments_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", segment_ids.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", segment_ids->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 2) @@ -947,7 +944,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.05f, -1.2f, @@ -966,11 +963,11 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) { // segment_ids : 4x1 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto segment_ids = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -989,9 +986,9 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) { auto type = embedding_bag::segments_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", segment_ids.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", segment_ids->get_layout())); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) ); @@ -1005,7 +1002,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -2.1f, -2.4f, @@ -1026,12 +1023,12 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) { // default_index : 1x1 // Output : 3x2x3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f16, format::bfyx, { 5, 2, 3, 2 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto segment_ids = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f16, format::bfyx, { 5, 2, 3, 2 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); tensor output_shape = {3, 2, 3, 2}; /* @@ -1103,9 +1100,9 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) { auto type = embedding_bag::segments_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", segment_ids.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", segment_ids->get_layout())); topology.add(data("Input3", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2", "Input3"}, type, output_shape, 0) @@ -1120,7 +1117,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); /* * [ 3 @@ -1170,11 +1167,11 @@ TEST(embedding_bag_fp32_gpu, packed_sum_basic) { // per_sample_weights : 3x2 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f32, format::bfyx, { 5, 2, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f32, format::bfyx, { 5, 2, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); tensor output_shape = {3, 2, 1, 1}; set_values(emb_table, { @@ -1197,8 +1194,8 @@ TEST(embedding_bag_fp32_gpu, packed_sum_basic) { auto type = embedding_bag::packed_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); topology.add(data("Input2", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) @@ -1212,7 +1209,7 @@ TEST(embedding_bag_fp32_gpu, packed_sum_basic) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { -1.05f, -1.2f, @@ -1231,11 +1228,11 @@ TEST(embedding_bag_fp32_gpu, packed_sum_dim3) { // per_sample_weights : 3x2 // Output : 3x2x3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f32, format::bfyx, { 5, 2, 3, 2 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); - auto per_sample_weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f32, format::bfyx, { 5, 2, 3, 2 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 1 } }); + auto per_sample_weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); tensor output_shape = {3, 2, 3, 2}; /* @@ -1307,8 +1304,8 @@ TEST(embedding_bag_fp32_gpu, packed_sum_dim3) { auto type = embedding_bag::packed_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); topology.add(data("Input2", per_sample_weights)); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) @@ -1322,7 +1319,7 @@ TEST(embedding_bag_fp32_gpu, packed_sum_dim3) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); /* * [ 3 @@ -1372,11 +1369,11 @@ TEST(embedding_bag_fp32_gpu, extended5_6) { // per_sample_weights : 3x2 // Output : 3x2 // Input values in fp16 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto emb_table = memory::allocate(engine, { data_types::f32, format::bfyx, { 5, 6, 1, 1 } }); - auto indices = memory::allocate(engine, { data_types::i32, format::bfyx, { 5, 1, 1, 1 } }); - auto segment_ids = memory::allocate(engine, { data_types::i32, format::bfyx, { 5, 1, 1, 1 } }); + auto emb_table = engine.allocate_memory({ data_types::f32, format::bfyx, { 5, 6, 1, 1 } }); + auto indices = engine.allocate_memory({ data_types::i32, format::bfyx, { 5, 1, 1, 1 } }); + auto segment_ids = engine.allocate_memory({ data_types::i32, format::bfyx, { 5, 1, 1, 1 } }); tensor output_shape = {5, 6, 1, 1}; set_values(emb_table, { @@ -1391,9 +1388,9 @@ TEST(embedding_bag_fp32_gpu, extended5_6) { auto type = embedding_bag::segments_sum; topology topology; - topology.add(input_layout("Input0", emb_table.get_layout())); - topology.add(input_layout("Input1", indices.get_layout())); - topology.add(input_layout("Input2", segment_ids.get_layout())); + topology.add(input_layout("Input0", emb_table->get_layout())); + topology.add(input_layout("Input1", indices->get_layout())); + topology.add(input_layout("Input2", segment_ids->get_layout())); topology.add( embedding_bag("embedding_bag", {"Input0", "Input1", "Input2"}, type, output_shape) ); @@ -1407,7 +1404,7 @@ TEST(embedding_bag_fp32_gpu, extended5_6) { auto outputs = network.execute(); auto output = outputs.at("embedding_bag").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0, 8, 15, 15, 9, 7, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp index ccbe6f5a769..c627b088d10 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/extract_image_patches_gpu_test.cpp @@ -3,26 +3,23 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" + +#include +#include +#include -#include using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(extract_image_patches_gpu, basic) { // Input : 1x1x10x10 // Output : 1x9x2x2 tensor output_shape = {1, 9, 2, 2}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 1; auto in_rows = 10; @@ -32,7 +29,7 @@ TEST(extract_image_patches_gpu, basic) { std::vector rates = {1, 1}; std::string auto_pad = "valid"; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -43,7 +40,7 @@ TEST(extract_image_patches_gpu, basic) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -54,7 +51,7 @@ TEST(extract_image_patches_gpu, basic) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 6, @@ -95,7 +92,7 @@ TEST(extract_image_patches_gpu, basic2) { // Input : 1x1x10x10 // Output : 1x16x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 1; auto in_rows = 10; @@ -106,7 +103,7 @@ TEST(extract_image_patches_gpu, basic2) { std::string auto_pad = "valid"; tensor output_shape = {1, 16, 1, 1}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -117,7 +114,7 @@ TEST(extract_image_patches_gpu, basic2) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -128,7 +125,7 @@ TEST(extract_image_patches_gpu, basic2) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, @@ -159,7 +156,7 @@ TEST(extract_image_patches_gpu, basic3) { // Input : 1x1x10x10 // Output : 1x16x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 1; auto in_rows = 10; @@ -170,7 +167,7 @@ TEST(extract_image_patches_gpu, basic3) { std::string auto_pad = "same_upper"; tensor output_shape = {1, 16, 2, 2}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -181,7 +178,7 @@ TEST(extract_image_patches_gpu, basic3) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -192,7 +189,7 @@ TEST(extract_image_patches_gpu, basic3) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 0, @@ -254,7 +251,7 @@ TEST(extract_image_patches_gpu, basic3_same_lower) { // Input : 1x1x10x10 // Output : 1x16x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 1; auto in_rows = 10; @@ -265,7 +262,7 @@ TEST(extract_image_patches_gpu, basic3_same_lower) { std::string auto_pad = "same_lower"; tensor output_shape = {1, 16, 2, 2}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -276,7 +273,7 @@ TEST(extract_image_patches_gpu, basic3_same_lower) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -287,7 +284,7 @@ TEST(extract_image_patches_gpu, basic3_same_lower) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0, 0, @@ -349,7 +346,7 @@ TEST(extract_image_patches_gpu, basic3_enough_space) { // Input : 1x1x10x10 // Output : 1x9x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 1; auto in_rows = 10; @@ -360,7 +357,7 @@ TEST(extract_image_patches_gpu, basic3_enough_space) { std::string auto_pad = "same_upper"; tensor output_shape = {1, 9, 2, 2}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -371,7 +368,7 @@ TEST(extract_image_patches_gpu, basic3_enough_space) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -382,7 +379,7 @@ TEST(extract_image_patches_gpu, basic3_enough_space) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 8, @@ -423,7 +420,7 @@ TEST(extract_image_patches_gpu, basic4) { // Input : 1x1x10x10 // Output : 1x9x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 1; auto in_rows = 10; @@ -434,7 +431,7 @@ TEST(extract_image_patches_gpu, basic4) { std::string auto_pad = "valid"; tensor output_shape = {1, 9, 2, 2}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -445,7 +442,7 @@ TEST(extract_image_patches_gpu, basic4) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -456,7 +453,7 @@ TEST(extract_image_patches_gpu, basic4) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 6, @@ -497,7 +494,7 @@ TEST(extract_image_patches_gpu, basic5) { // Input : 1x2x5x5 // Output : 1x8x2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch = 1; auto depth = 2; auto in_rows = 5; @@ -508,7 +505,7 @@ TEST(extract_image_patches_gpu, basic5) { std::string auto_pad = "valid"; tensor output_shape = {1, 8, 2, 2}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { batch, depth, in_cols, in_rows } }); std::vector inputVals(batch * depth * in_rows * in_cols); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -519,7 +516,7 @@ TEST(extract_image_patches_gpu, basic5) { set_values(input, inputVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", "Input0", sizes, strides, rates, auto_pad, output_shape)); network network(engine, topology); @@ -530,7 +527,7 @@ TEST(extract_image_patches_gpu, basic5) { EXPECT_EQ(outputs.begin()->first, "extract_image_patches"); auto output = outputs.at("extract_image_patches").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 4, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp index 21f51050aa8..b96ca63bb2e 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp @@ -3,20 +3,14 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/fully_connected.hpp" -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include "instrumentation.h" -#include "test_utils/network_test.h" + +#include "test_utils.h" +#include "network_test.h" + +#include +#include "cldnn/primitives/fully_connected.hpp" +#include +#include #include @@ -26,7 +20,7 @@ namespace cldnn } using namespace cldnn; -using namespace tests; +using namespace ::tests; cldnn::format::type layout_4d(cldnn::format f) { switch (f.value) { @@ -75,19 +69,19 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te VF input_rnd_vec = flatten_4d(test_input_fmt, input_rnd); VF weights_rnd_vec = flatten_4d(test_weights_fmt, weights_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(input_b, f, x, y); tensor weights_tensor(output_f, f, x, y); - auto input = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); - auto weights = memory::allocate(engine, { type_to_data_type::value, test_weights_fmt, weights_tensor }); - auto bias = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1,1,output_f,1 } }); + auto input = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); + auto weights = engine.allocate_memory({ type_to_data_type::value, test_weights_fmt, weights_tensor }); + auto bias = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1,1,output_f,1 } }); set_values(input, input_rnd_vec); set_values(weights, weights_rnd_vec); set_values(bias, bias_rnd_vec); primitive_id out_id = "fully_connected"; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), data("bias", bias), fully_connected(out_id, "input", "weights", "bias") @@ -105,8 +99,8 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te EXPECT_EQ(outputs.begin()->first, out_id); auto output_memory = outputs.at(out_id).get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); //EXPECT_EQ(output_layout.format.value, test_input_fmt); tensor output_tensor = output_layout.size; @@ -150,8 +144,8 @@ TEST(DISABLED_fully_connected_gpu, generic_random_short) { std::vector> input_sizes = { {28, 28}, {64, 64}, {100, 100}, {227, 227}, {1000, 1}, {1, 4096} }; VF outputs_x = { 5, 16 }; - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; } @@ -199,15 +193,15 @@ TEST(fully_connected_gpu, no_biases) { const int32_t input_x = 3, input_b = 1, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1} }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::yxfb,{ input_b, 1, input_x, 1} }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); set_values(input_prim, { -0.5f, 2.0f, 0.5f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); - auto input = input_layout("input", input_prim.get_layout()); + auto input = input_layout("input", input_prim->get_layout()); auto w_data = data("weights", weights_prim); auto fc = fully_connected("full_con_prim", "input", "weights"); topology topology; @@ -224,7 +218,7 @@ TEST(fully_connected_gpu, no_biases) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(1.5f, output_ptr[0]); EXPECT_EQ(0.75f, output_ptr[1]); @@ -256,15 +250,15 @@ TEST(fully_connected_gpu, no_biases_int8) { const int32_t input_x = 3, input_b = 1, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ input_b, 1, input_x, 1 } }); - auto weights_prim = memory::allocate(engine, { data_types::i8,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ input_b, 1, input_x, 1 } }); + auto weights_prim = engine.allocate_memory({ data_types::i8,format::bfyx,{ weight_b, 1, weight_x, 1 } }); set_values(input_prim, { 8.4f, 2.3f, -4.49f }); set_values(weights_prim, { 2, 1, 0, -3, -2, 1, 0, -2, -4, -5, 10, 8 }); - auto input = input_layout("input", input_prim.get_layout()); + auto input = input_layout("input", input_prim->get_layout()); auto w_data = data("weights", weights_prim); auto ri = reorder("reorder_to_int", "input", { data_types::i8,format::bfyx,{ input_b, 1, input_x, 1 } }); auto fc = fully_connected("full_con_prim", "reorder_to_int", "weights"); @@ -284,7 +278,7 @@ TEST(fully_connected_gpu, no_biases_int8) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(18.0f, output_ptr[0]); EXPECT_EQ(-32.0f, output_ptr[1]); @@ -317,18 +311,18 @@ TEST(fully_connected_gpu, xb_f32_batch_1) { input_x = 3, input_b = 1, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate( engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1 } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,output_f, 1} }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { input_b, 1, input_x, 1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx, { 1,1,output_f, 1} }); set_values(input_prim, { -0.5f, 2.0f, 0.5f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, 2.0f, 3.0f, 4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias") @@ -343,7 +337,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.5f, output_ptr[0]); EXPECT_EQ(2.75f, output_ptr[1]); @@ -377,18 +371,18 @@ TEST(fully_connected_gpu, xb_f32_batch_2) { input_x = 3, input_b = 2, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b,1,input_x, 1 } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::yxfb,{ input_b,1,input_x, 1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); set_values(input_prim, { -0.5f, 1.0f, 2.0f, 1.5f, 0.5f, 0.0f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, 2.0f, 3.0f, 4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias") @@ -403,7 +397,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.50f, output_ptr[0]); EXPECT_EQ(4.00f, output_ptr[1]); @@ -438,19 +432,19 @@ TEST(fully_connected_gpu, x_f32) { input_x = 3, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,input_x,1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx, { 1,1,input_x,1 } }); //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); set_values(input_prim, { -0.5f, 2.0f, 0.5f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, 2.0f, 3.0f, 4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias") @@ -465,7 +459,7 @@ TEST(fully_connected_gpu, x_f32) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.50f, output_ptr[0]); EXPECT_EQ(2.75f, output_ptr[1]); @@ -494,19 +488,19 @@ TEST(fully_connected_gpu, yxfn_f32) { // Output: // 10 -28.5 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 2, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 2, 1 } }); //auto output_prim = memory::allocate({ memory::format::xb_f32,{ 2 ,{ { 1 } }, 1 } }); - auto weights_prim = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 1 } }); set_values(input_prim, { 1.f, 3.f, -2.f, -4.f }); set_values(weights_prim, { 1.f, -1.f, 2.0f, 0.f, 3.0f, 4.0f, 0.5f, 5.0f }); set_values(bias_prim, { 1.0f, -5.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias") @@ -521,7 +515,7 @@ TEST(fully_connected_gpu, yxfn_f32) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(10, output_ptr[0]); EXPECT_EQ(-28.5, output_ptr[1]); @@ -552,19 +546,19 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) { input_x = 3, input_b = 1, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } }); //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f, 1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,output_f, 1 } }); set_values(input_prim, { -0.5f, 2.0f, 0.5f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias"), @@ -580,7 +574,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.50f, output_ptr[0]); EXPECT_EQ(0.00f, output_ptr[1]); @@ -614,19 +608,19 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) { input_x = 3, input_b = 2, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } }); //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); set_values(input_prim, { -0.5f, 1.0f, 2.0f, 1.5f, 0.5f, 0.0f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias"), @@ -642,7 +636,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.50f, output_ptr[0]); EXPECT_EQ(4.00f, output_ptr[1]); @@ -677,19 +671,19 @@ TEST(fully_connected_gpu, x_f32_relu) { input_x = 3, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,input_x,1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,input_x,1 } }); //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1 ,{ { output_f } }, 1 } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); set_values(input_prim, { -0.5f, 2.0f, 0.5f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias"), @@ -705,7 +699,7 @@ TEST(fully_connected_gpu, x_f32_relu) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.50f, output_ptr[0]); EXPECT_EQ(0.00f, output_ptr[1]); @@ -737,19 +731,19 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) { input_x = 3, // size of whole input buffer weight_b = 4, weight_x = 3; // size of whole weights buffer - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,input_x,1 } }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,input_x,1 } }); //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1 ,{ { output_f } }, 1 } }); - auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); - auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); + auto weights_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } }); + auto bias_prim = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1,1,output_f,1 } }); set_values(input_prim, { -0.5f, 2.0f, 0.5f }); set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f }); set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f }); topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), fully_connected("full_con_prim", "input", "weights", "bias"), @@ -765,7 +759,7 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.50f, output_ptr[0]); EXPECT_EQ(-0.125f, output_ptr[1]); @@ -775,7 +769,7 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) { TEST(fully_connected_gpu, b_fs_yx_fsv4) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int in_B = 2; const int in_F = 2048; @@ -791,11 +785,11 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4) std::vector Data(in_F * in_B); // in_X=in_Y=1 int i = 0; std::generate(Data.begin(), Data.end(), [i]() mutable { return i++ % 9; }); - auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {in_B, in_F, in_X, in_Y}}); + auto input = engine.allocate_memory({data_types::i8, format::bfyx, {in_B, in_F, in_X, in_Y}}); set_values(input, std::move(Data)); // Create a topology - topology topology(input_layout("input", input.get_layout())); + topology topology(input_layout("input", input->get_layout())); // Reorder topology.add(reorder("reorder_in", @@ -809,15 +803,19 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4) return i % 2 ? -(i++) / W_F - 1 : (i++) / W_F + 1; }); auto weights_gold = - memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}}); + engine.allocate_memory({data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}}); auto weights_imad = - memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}}); + engine.allocate_memory({data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}}); set_values(weights_gold, Weights); set_values(weights_imad, std::move(Weights)); topology.add(data("weights_gold", weights_gold), data("weights_imad", weights_imad)); - auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}}); - auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}}); + auto bias_gold = engine.allocate_memory({data_types::f32, format::bfyx, {1, in_F, 1, 1}}); + auto bias_imad = engine.allocate_memory({data_types::f32, format::bfyx, {1, in_F, 1, 1}}); + + std::vector bias_data(in_F, 0); + set_values(bias_gold, bias_data); + set_values(bias_imad, bias_data); topology.add(data("bias_gold", bias_gold)); topology.add(data("bias_imad", bias_imad)); @@ -830,10 +828,10 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4) topology.add(fullc_gold, fullc_imad); - auto input_low_mem = memory::allocate(engine, { data_types::f32, format::bfyx, {1, W_B, 1, 1} }); - auto input_high_mem = memory::allocate(engine, { data_types::f32, format::bfyx, {1, W_B, 1, 1} }); - auto output_low_mem = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 1, 1, 1} }); - auto output_high_mem = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 1, 1, 1} }); + auto input_low_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, W_B, 1, 1} }); + auto input_high_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, W_B, 1, 1} }); + auto output_low_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 1, 1, 1} }); + auto output_high_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 1, 1, 1} }); set_values(input_low_mem, generate_random_1d(W_B, -200, 0)); set_values(input_high_mem, generate_random_1d(W_B, 1, 200)); set_values(output_low_mem, {-127.0f}); @@ -868,21 +866,19 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4) ASSERT_NE(out_gold, outputs.end()); ASSERT_NE(out_test, outputs.end()); - auto gold_ptr = out_gold->second.get_memory().pointer(); - auto test_ptr = out_test->second.get_memory().pointer(); + cldnn::mem_lock gold_ptr(out_gold->second.get_memory(), get_test_stream()); + cldnn::mem_lock test_ptr(out_test->second.get_memory(), get_test_stream()); ASSERT_EQ(gold_ptr.size(), test_ptr.size()); - for (size_t i = 0; i < gold_ptr.size(); i++) - { + for (size_t i = 0; i < gold_ptr.size(); i++) { ASSERT_EQ(gold_ptr[i], test_ptr[i]); } } -TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) -{ - const auto& engine = get_test_engine(); +TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) { + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); @@ -896,9 +892,9 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) const int input_f = 64; // Allocate memory - auto input_prim = memory::allocate(engine, { data_types::f16, format::bfyx, { batch_num, input_f, input_y, input_x } }); - auto weights_prim = memory::allocate(engine, { data_types::f16,format::bfyx,{ output_f, input_f, input_y, input_x } }); - auto bias_prim = memory::allocate(engine, { data_types::f16,format::bfyx,{ 1, 1, output_f, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::bfyx, { batch_num, input_f, input_y, input_x } }); + auto weights_prim = engine.allocate_memory({ data_types::f16,format::bfyx,{ output_f, input_f, input_y, input_x } }); + auto bias_prim = engine.allocate_memory({ data_types::f16,format::bfyx,{ 1, 1, output_f, 1 } }); // Generate random input data and set values auto input_data = generate_random_4d(batch_num, input_f, input_y, input_x, -1, 1); @@ -917,7 +913,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) // Create topology to test topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), reorder("input_fsv", "input", {data_types::f16, format::fs_b_yx_fsv32, { batch_num, input_f, input_y, input_x } }), @@ -935,7 +931,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) auto outputs = network.execute(); auto output_prim = outputs.at("out").get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t bi = 0; bi < batch_num; ++bi) { @@ -956,9 +952,9 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); @@ -972,9 +968,9 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34) const int input_f = 64; // Allocate memory - auto input_prim = memory::allocate(engine, { data_types::f16, format::bfyx, { batch_num, input_f, input_y, input_x } }); - auto weights_prim = memory::allocate(engine, { data_types::f16,format::bfyx,{ output_f, input_f, input_y, input_x } }); - auto bias_prim = memory::allocate(engine, { data_types::f16,format::bfyx,{ 1, 1, output_f, 1 } }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::bfyx, { batch_num, input_f, input_y, input_x } }); + auto weights_prim = engine.allocate_memory({ data_types::f16,format::bfyx,{ output_f, input_f, input_y, input_x } }); + auto bias_prim = engine.allocate_memory({ data_types::f16,format::bfyx,{ 1, 1, output_f, 1 } }); // Generate random input data and set values auto input_data = generate_random_4d(batch_num, input_f, input_y, input_x, -1, 1); @@ -993,7 +989,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34) // Create topology to test topology topology( - input_layout("input", input_prim.get_layout()), + input_layout("input", input_prim->get_layout()), data("weights", weights_prim), data("bias", bias_prim), reorder("input_fsv", "input", { data_types::f16, format::fs_b_yx_fsv32, { batch_num, input_f, input_y, input_x } }), @@ -1011,7 +1007,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34) auto outputs = network.execute(); auto output_prim = outputs.at("out").get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t bi = 0; bi < batch_num; ++bi) { @@ -1054,7 +1050,7 @@ struct fully_connected_random_test : ::testing::TestWithParam(output_f, input_f, input_y, input_x); auto bias_data = generate_smart_random_2d(1, output_f); - auto eng = get_test_engine(); + auto& eng = get_test_engine(); auto net = network_test(eng); auto input = net.add_input_layout("input", input_format, std::move(input_data)); auto weights = net.add_data("weights", format::oiyx, std::move(weights_data)); @@ -1152,7 +1148,7 @@ struct fully_connected_random_test_3d : ::testing::TestWithParam(output_y, input_y, 1, 1); auto bias_data = generate_smart_random_2d(1, output_y); - auto eng = get_test_engine(); + auto& eng = get_test_engine(); auto net = network_test(eng); auto input = net.add_input_layout("input", input_format, std::move(input_data)); auto weights = net.add_data("weights", format::oiyx, std::move(weights_data)); @@ -1314,14 +1310,14 @@ public: auto input_size = tensor(TensorValue(batch_num()), TensorValue(input_f()), TensorValue(input_x()), TensorValue(input_y())); auto weights_size = tensor(TensorValue(output_f()), TensorValue(input_f()), TensorValue(input_x()), TensorValue(input_y())); - auto input_prim = memory::allocate(engine, { input_data_type(), _fmt, input_size }); - auto weights_prim = memory::allocate(engine, { data_types::i8, format::bfyx, weights_size }); - auto quantization_input_low = memory::allocate(engine, { data_types::f32, format::bfyx, tensor(feature(output_f())) }); - auto quantization_input_high = memory::allocate(engine, { data_types::f32, format::bfyx, tensor(feature(output_f())) }); - auto quantization_output_low = memory::allocate(engine, { data_types::f32, format::bfyx, tensor(feature(1)) }); - auto quantization_output_high = memory::allocate(engine, { data_types::f32, format::bfyx, tensor(feature(1)) }); + auto input_prim = engine.allocate_memory({ input_data_type(), _fmt, input_size }); + auto weights_prim = engine.allocate_memory({ data_types::i8, format::bfyx, weights_size }); + auto quantization_input_low = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(output_f())) }); + auto quantization_input_high = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(output_f())) }); + auto quantization_output_low = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(1)) }); + auto quantization_output_high = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(1)) }); - VF input_flattened(input_prim.get_layout().get_linear_size()); + VF input_flattened(input_prim->get_layout().get_linear_size()); for (size_t bi = 0; bi < batch_num(); ++bi) for (size_t fi = 0; fi < input_f(); ++fi) for (size_t yi = 0; yi < input_y(); ++yi) @@ -1338,14 +1334,14 @@ public: set_values(quantization_output_low, { _quantization.output_low }); set_values(quantization_output_high, { _quantization.output_high }); - auto bias_prim = memory::allocate(engine, { data_types::i32, format::bfyx, tensor(feature(output_f())) }); + auto bias_prim = engine.allocate_memory({ data_types::i32, format::bfyx, tensor(feature(output_f())) }); set_values(bias_prim, _bias); topology topo; topo.add(data("weights", weights_prim)); topo.add(data("bias", bias_prim)); - topo.add(input_layout("input", input_prim.get_layout())); + topo.add(input_layout("input", input_prim->get_layout())); auto fc_prim = fully_connected("fc", "input", "weights", "bias"); fc_prim.output_data_type = type_to_data_type::value; topo.add(fc_prim); @@ -1374,7 +1370,7 @@ public: auto output = net.execute(); auto out_mem = output.at("output").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); for (size_t bi = 0; bi < batch_num(); ++bi) { for (size_t fi = 0; fi < output_f(); ++fi) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp index 237c7aa12eb..bcca8e4dd4f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp @@ -3,36 +3,31 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/convolution.hpp" -#include "api/eltwise.hpp" -#include "api/reorder.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include +#include #include #include -#include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace testing; TEST(fused_conv_eltwise, basic_0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f, @@ -40,7 +35,7 @@ TEST(fused_conv_eltwise, basic_0) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }), eltwise("eltwise", "input", "conv", eltwise_mode::sum), @@ -56,7 +51,7 @@ TEST(fused_conv_eltwise, basic_0) EXPECT_EQ(outputs.begin()->first, "out"); auto output = outputs.begin()->second.get_memory(); - auto&& out_layout = output.get_layout(); + auto&& out_layout = output->get_layout(); EXPECT_EQ(out_layout.format, format::bfyx); EXPECT_EQ(out_layout.size.batch[0], 1); @@ -67,11 +62,11 @@ TEST(fused_conv_eltwise, basic_0) TEST(fused_conv_eltwise, basic_image2d) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 4, 128, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 3, 256, 4 } }); - auto weights = memory::allocate(engine, { data_types::f16, format::bfyx, { 12, 4, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 4, 128, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 3, 256, 4 } }); + auto weights = engine.allocate_memory({ data_types::f16, format::bfyx, { 12, 4, 1, 1 } }); auto input_data1 = generate_random_4d(1, 4, 2, 128, -1, 1); auto input_data1_bfyx = flatten_4d(format::bfyx, input_data1); @@ -86,8 +81,8 @@ TEST(fused_conv_eltwise, basic_image2d) set_values(weights, weights_data_bfyx); topology topology_act( - input_layout("input", input.get_layout()), - input_layout("input2", input2.get_layout()), + input_layout("input", input->get_layout()), + input_layout("input2", input2->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }), depth_to_space("depth_to_space", "conv", 2, depth_to_space_mode::blocks_first), @@ -105,11 +100,11 @@ TEST(fused_conv_eltwise, basic_image2d) EXPECT_EQ(outputs_act.begin()->first, "eltwise"); auto output_act = outputs_act.begin()->second.get_memory(); - auto out_act_ptr = output_act.pointer(); + cldnn::mem_lock out_act_ptr(output_act, get_test_stream()); topology topology_ref( - input_layout("input", input.get_layout()), - input_layout("input2", input2.get_layout()), + input_layout("input", input->get_layout()), + input_layout("input2", input2->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }), depth_to_space("depth_to_space", "conv", 2, depth_to_space_mode::blocks_first), @@ -127,7 +122,7 @@ TEST(fused_conv_eltwise, basic_image2d) EXPECT_EQ(outputs_ref.begin()->first, "out"); auto output_ref = outputs_ref.begin()->second.get_memory(); - auto out_ref_ptr = output_ref.pointer(); + cldnn::mem_lock out_ref_ptr(output_ref, get_test_stream()); for (int i = 0;i < 3 * 256 * 4;i++) { EXPECT_EQ(out_act_ptr[i], out_ref_ptr[i]); @@ -136,10 +131,10 @@ TEST(fused_conv_eltwise, basic_image2d) TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 5 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f, @@ -147,7 +142,7 @@ TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }), eltwise("out", "input", "conv", eltwise_mode::sum)); @@ -162,7 +157,7 @@ TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs) EXPECT_EQ(outputs.begin()->first, "out"); auto output = outputs.begin()->second.get_memory(); - auto&& out_layout = output.get_layout(); + auto&& out_layout = output->get_layout(); EXPECT_EQ(out_layout.format, format::bfyx); EXPECT_EQ(out_layout.size.batch[0], 1); @@ -211,7 +206,7 @@ protected: void do_test(const fused_conv_eltwise& fused_prim) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int n_features = static_cast(biases_values.size()); @@ -220,19 +215,11 @@ protected: auto biases_shape = tensor(1, n_features, 1, 1); auto sum_input_shape = tensor(1, n_features, 2, 1); - auto input = memory::allocate( - engine, - {type_to_data_type::value, format::bfyx, input_shape}); - auto weights = memory::allocate( - engine, - {type_to_data_type::value, format::bfyx, weights_shape}); + auto input = engine.allocate_memory({type_to_data_type::value, format::bfyx, input_shape}); + auto weights = engine.allocate_memory({type_to_data_type::value, format::bfyx, weights_shape}); - auto biases = memory::allocate( - engine, - {type_to_data_type::value, format::bfyx, biases_shape}); - auto sum_input = memory::allocate( - engine, - {type_to_data_type::value, format::bfyx, sum_input_shape}); + auto biases = engine.allocate_memory({type_to_data_type::value, format::bfyx, biases_shape}); + auto sum_input = engine.allocate_memory({type_to_data_type::value, format::bfyx, sum_input_shape}); set_values(input, input_values); std::vector post_processed_weights_values(n_features @@ -254,7 +241,7 @@ protected: set_values(biases, biases_values); set_values(sum_input, non_conv_input_values); - the_topology.add(input_layout("input", input.get_layout())); + the_topology.add(input_layout("input", input->get_layout())); the_topology.add(data("weights", weights)); the_topology.add(data("biases", biases)); the_topology.add(data("sum_input", sum_input)); @@ -269,8 +256,8 @@ protected: auto outputs = network.execute(); auto output_memory = outputs.at("fused_conv").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; int f_size = output_layout.size.feature[0]; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 2b9ddcbd3f4..a3a88b7b2e8 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -2,43 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include "api/input_layout.hpp" -#include "api/convolution.hpp" -#include "api/quantize.hpp" -#include "api/topology.hpp" -#include "api/tensor.hpp" -#include "api/network.hpp" -#include "api/eltwise.hpp" -#include "api/fully_connected.hpp" -#include "api/gemm.hpp" -#include "api/binary_convolution.hpp" -#include "api/engine.hpp" -#include "api/data.hpp" -#include "api/resample.hpp" -#include "api/mvn.hpp" -#include "api/deconvolution.hpp" -#include "api/permute.hpp" -#include "api/gather.hpp" -#include "api/gather_nd.hpp" -#include "api/scatter_update.hpp" -#include "api/scatter_nd_update.hpp" -#include "api/scatter_elements_update.hpp" -#include "api/depth_to_space.hpp" -#include "api/space_to_depth.hpp" -#include "api/batch_to_space.hpp" -#include "api/space_to_batch.hpp" -#include "api/reduce.hpp" +#include "test_utils.h" - -#include "test_utils/test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; struct resample_test_params { tensor in_shape; @@ -137,7 +129,7 @@ struct normalize_test_params { template class BaseFusingTest : public ::testing::TestWithParam { public: - cldnn::engine engine; + cldnn::engine& engine = get_test_engine(); cldnn::topology topology_fused; cldnn::topology topology_non_fused; cldnn::build_options bo_fused; @@ -162,7 +154,7 @@ public: for (auto& pi : net.get_primitives_info()) { if (pi.type_id == "reorder") { auto exec_prims = net.get_executed_primitives(); - auto it = std::find_if(exec_prims.begin(), exec_prims.end(), [&](const std::pair& e) -> bool { + auto it = std::find_if(exec_prims.begin(), exec_prims.end(), [&](const std::pair& e) -> bool { return e.first == pi.original_id; }); // We count executed reorders only @@ -194,23 +186,23 @@ public: auto output_not_fused_prim = outputs_ref.begin()->second.get_memory(); auto output_fused_prim = outputs_fused.begin()->second.get_memory(); - if (output_not_fused_prim.get_layout().data_type == data_types::f32) { - auto ref = output_not_fused_prim.pointer(); - auto output_ptr = output_fused_prim.pointer(); - for (size_t i = 0; i < output_fused_prim.get_layout().count(); i++) { + if (output_not_fused_prim->get_layout().data_type == data_types::f32) { + cldnn::mem_lock ref(output_not_fused_prim, get_test_stream()); + cldnn::mem_lock output_ptr(output_fused_prim, get_test_stream()); + for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) { ASSERT_NEAR(ref[i], output_ptr[i], tolerance) << "i = " << i; } } else { - auto ref = output_not_fused_prim.pointer(); - auto output_ptr = output_fused_prim.pointer(); - for (size_t i = 0; i < output_fused_prim.get_layout().count(); i++) { + cldnn::mem_lock ref(output_not_fused_prim, get_test_stream()); + cldnn::mem_lock output_ptr(output_fused_prim, get_test_stream()); + for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) { ASSERT_NEAR(float16_to_float32(ref[i]), float16_to_float32(output_ptr[i]), tolerance) << "i = " << i; } } } - cldnn::memory get_mem(cldnn::layout l) { - auto prim = memory::allocate(engine, l); + cldnn::memory::ptr get_mem(cldnn::layout l) { + auto prim = engine.allocate_memory(l); tensor s = l.size; if (l.data_type == data_types::bin) { VF rnd_vec = generate_random_1d(s.count() / 32, min_random, max_random); @@ -229,8 +221,8 @@ public: return prim; } - cldnn::memory get_mem(cldnn::layout l, float fill_value) { - auto prim = memory::allocate(engine, l); + cldnn::memory::ptr get_mem(cldnn::layout l, float fill_value) { + auto prim = engine.allocate_memory(l); tensor s = l.size; if (l.data_type == data_types::bin) { VF rnd_vec(s.count() / 32, static_cast(fill_value)); @@ -238,16 +230,18 @@ public: } else if (l.data_type == data_types::f16) { VF rnd_vec(s.count(), float32_to_float16(fill_value)); set_values(prim, rnd_vec); - } else { + } else if (l.data_type == data_types::f32) { VF rnd_vec(s.count(), fill_value); set_values(prim, rnd_vec); + } else { + throw std::runtime_error("get_mem: Unsupported precision"); } return prim; } - cldnn::memory get_repeatless_mem(cldnn::layout l, int min, int max) { - auto prim = memory::allocate(engine, l); + cldnn::memory::ptr get_repeatless_mem(cldnn::layout l, int min, int max) { + auto prim = engine.allocate_memory(l); tensor s = l.size; if (l.data_type == data_types::f32) { VF rnd_vec = generate_random_norepetitions_1d(s.count(), min, max); @@ -267,8 +261,8 @@ public: return prim; } - cldnn::memory get_mem(cldnn::layout l, int min, int max) { - auto prim = memory::allocate(engine, l); + cldnn::memory::ptr get_mem(cldnn::layout l, int min, int max) { + auto prim = engine.allocate_memory(l); tensor s = l.size; if (l.data_type == data_types::f32) { VF rnd_vec = generate_random_1d(s.count(), min, max); @@ -292,7 +286,7 @@ public: return layout{ p.data_type, p.input_format, p.out_shape }; } - layout get_weights_layout(T& p, const int32_t split = 1) { + layout get_weights_layout(T& p, const int32_t /* split */ = 1) { cldnn::tensor weights_tensor; if (p.groups == 1) { weights_tensor = cldnn::tensor(batch(p.out_shape.feature[0]), feature(p.in_shape.feature[0]), @@ -304,7 +298,7 @@ public: return layout{p.weights_type, p.weights_format, weights_tensor}; } - layout get_weights_layout(T& p, const int32_t split, cldnn::format f) { + layout get_weights_layout(T& p, const int32_t /* split */, cldnn::format f) { cldnn::tensor weights_tensor; weights_tensor = cldnn::tensor(batch(p.out_shape.feature[0]), feature(static_cast(p.in_shape.feature[0] / p.groups)), spatial(p.kernel.spatial[0], p.kernel.spatial[1], p.kernel.spatial[2])); @@ -836,7 +830,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_prelu_eltwise, class conv_fp32_multi_eltwise_2 : public ConvFusingTest {}; TEST_P(conv_fp32_multi_eltwise_2, basic) { - if (engine.get_info().supports_immad) { + if (engine.get_device_info().supports_immad) { return; } @@ -871,7 +865,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_2, class conv_fp32_multi_eltwise_2_clamp : public ConvFusingTest {}; TEST_P(conv_fp32_multi_eltwise_2_clamp, basic) { - if (engine.get_info().supports_immad) { + if (engine.get_device_info().supports_immad) { return; } @@ -907,7 +901,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_2_clamp, class conv_fp32_multi_eltwise_4_clamp : public ConvFusingTest {}; TEST_P(conv_fp32_multi_eltwise_4_clamp, basic) { - if (engine.get_info().supports_immad) { + if (engine.get_device_info().supports_immad) { return; } @@ -947,7 +941,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_multi_eltwise_4_clamp, class conv_fp32_multi_eltwise_3_fusing : public ConvFusingTest {}; TEST_P(conv_fp32_multi_eltwise_3_fusing, basic) { - if (engine.get_info().supports_immad) { + if (engine.get_device_info().supports_immad) { return; } @@ -8419,4 +8413,3 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, gather_nd_activation_scale_eltwise, gather_nd_test_params{ CASE_GATHER_ND_FP32_6D_3, 2, 5 }, gather_nd_test_params{ CASE_GATHER_ND_FP32_6D_4, 2, 5 }, }), ); - diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp index fbfe62957f6..c0387763873 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp @@ -2,23 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { - // Dictionary : 2x2x2x2x2x2 + // Dictionary : 2x2x2x2x2x2 // Indexes : 2x2x2x1 // Axis : 5 // batch_dim : -1 @@ -35,19 +30,19 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { // 55.f 56.f 57.f 58.f 59.f 60.f 61.f 62.f 63.f 64.f // // Output: - // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, - // 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, - // 17.f, 17.f, 19.f, 19.f, 21.f, 21.f, 23.f, 23.f, - // 25.f, 25.f, 27.f, 27.f, 29.f, 29.f, 31.f, 31.f, - // 33.f, 33.f, 35.f, 35.f, 37.f, 37.f, 39.f, 39.f, - // 41.f, 41.f, 43.f, 43.f, 45.f, 45.f, 47.f, 47.f, - // 50.f, 49.f, 52.f, 51.f, 54.f, 53.f, 56.f, 55.f, + // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, + // 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, + // 17.f, 17.f, 19.f, 19.f, 21.f, 21.f, 23.f, 23.f, + // 25.f, 25.f, 27.f, 27.f, 29.f, 29.f, 31.f, 31.f, + // 33.f, 33.f, 35.f, 35.f, 37.f, 37.f, 39.f, 39.f, + // 41.f, 41.f, 43.f, 43.f, 45.f, 45.f, 47.f, 47.f, + // 50.f, 49.f, 52.f, 51.f, 54.f, 53.f, 56.f, 55.f, // 58.f, 57.f, 60.f, 59.f, 62.f, 61.f, 64.f, 63.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2} }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2} }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_x; int64_t batch_dim = -1; @@ -68,14 +63,14 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { set_values(input2, { 0.f, 1.f, 0.f, 0.f, - + 0.f, 0.f, 1.f, 0.f }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfwzyx, tensor(2, 2, 2, 2, 2, 2), batch_dim) ); @@ -88,16 +83,16 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, - 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, - 17.f, 17.f, 19.f, 19.f, 21.f, 21.f, 23.f, 23.f, - 25.f, 25.f, 27.f, 27.f, 29.f, 29.f, 31.f, 31.f, - 33.f, 33.f, 35.f, 35.f, 37.f, 37.f, 39.f, 39.f, - 41.f, 41.f, 43.f, 43.f, 45.f, 45.f, 47.f, 47.f, - 50.f, 49.f, 52.f, 51.f, 54.f, 53.f, 56.f, 55.f, + 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, + 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, + 17.f, 17.f, 19.f, 19.f, 21.f, 21.f, 23.f, 23.f, + 25.f, 25.f, 27.f, 27.f, 29.f, 29.f, 31.f, 31.f, + 33.f, 33.f, 35.f, 35.f, 37.f, 37.f, 39.f, 39.f, + 41.f, 41.f, 43.f, 43.f, 45.f, 45.f, 47.f, 47.f, + 50.f, 49.f, 52.f, 51.f, 54.f, 53.f, 56.f, 55.f, 58.f, 57.f, 60.f, 59.f, 62.f, 61.f, 64.f, 63.f }; @@ -107,7 +102,7 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { } TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { - // Dictionary : 3x2x3x4x2 + // Dictionary : 3x2x3x4x2 // Indexes : 3x2x3x1 // Axis : 3 // batch_dim : -1 @@ -138,10 +133,10 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { // 119.f 120.f 115.f 116.f 113.f 114.f 125.f 126.f 121.f 122.f 123.f 124.f // 133.f 134.f 129.f 130.f 131.f 132.f 141.f 142.f 137.f 138.f 139.f 140.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 2, 4, 3} }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 3 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 4, 3} }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 3 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_y; int64_t batch_dim = -1; @@ -176,17 +171,17 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { set_values(input2, { 0.f, 0.f, 0.f, 3.f, 1.f, 0.f, - + 1.f, 1.f, 1.f, 2.f, 0.f, 3.f, - + 3.f, 1.f, 0.f, 2.f, 0.f, 1.f }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfzyx, tensor(3, 2, 2, 3, 3), batch_dim) ); @@ -199,7 +194,7 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 1.f, 2.f, 1.f, 2.f, @@ -218,7 +213,7 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { 77.f, 78.f, 73.f, 74.f, 79.f, 80.f, 85.f, 86.f, 81.f, 82.f, 87.f, 88.f, 93.f, 94.f, 89.f, 90.f, 95.f, 96.f, - + 103.f, 104.f, 99.f, 100.f, 97.f, 98.f, 111.f, 112.f, 107.f, 108.f, 105.f, 106.f, @@ -256,28 +251,28 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) { // 29.f 16.f 75.f 74.f 74.f 8.f 29.f 29.f 7.f 18.f 54.f 54.f 38.f 16.f 40.f 40.f 74.f 24.f // 74.f 25.f 82.f 74.f 71.f 9.f 92.f 71.f 80.f 64.f 27.f 80.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 3, 1, 5 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 4, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 3, 1, 5 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 4, 4, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_y; int64_t batch_dim = 1; set_values(input1, { FLOAT16(84.f), FLOAT16( 7.f), FLOAT16(10.f), FLOAT16(69.f), FLOAT16(13.f), - FLOAT16(47.f), FLOAT16(75.f), FLOAT16( 8.f), FLOAT16(65.f), FLOAT16(28.f), - FLOAT16( 5.f), FLOAT16(12.f), FLOAT16(56.f), FLOAT16(54.f), FLOAT16( 9.f), - - FLOAT16(31.f), FLOAT16(12.f), FLOAT16(71.f), FLOAT16(55.f), FLOAT16( 8.f), - FLOAT16(73.f), FLOAT16(16.f), FLOAT16(29.f), FLOAT16(81.f), FLOAT16(81.f), - FLOAT16(75.f), FLOAT16( 8.f), FLOAT16(74.f), FLOAT16(75.f), FLOAT16(51.f), + FLOAT16(47.f), FLOAT16(75.f), FLOAT16( 8.f), FLOAT16(65.f), FLOAT16(28.f), + FLOAT16( 5.f), FLOAT16(12.f), FLOAT16(56.f), FLOAT16(54.f), FLOAT16( 9.f), - FLOAT16( 7.f), FLOAT16(29.f), FLOAT16( 6.f), FLOAT16(72.f), FLOAT16(18.f), - FLOAT16(38.f), FLOAT16(54.f), FLOAT16(19.f), FLOAT16(70.f), FLOAT16(16.f), - FLOAT16(74.f), FLOAT16(40.f), FLOAT16(72.f), FLOAT16(88.f), FLOAT16(24.f), - - FLOAT16(14.f), FLOAT16(75.f), FLOAT16(74.f), FLOAT16(82.f), FLOAT16(25.f), - FLOAT16(48.f), FLOAT16(13.f), FLOAT16(71.f), FLOAT16(92.f), FLOAT16( 9.f), + FLOAT16(31.f), FLOAT16(12.f), FLOAT16(71.f), FLOAT16(55.f), FLOAT16( 8.f), + FLOAT16(73.f), FLOAT16(16.f), FLOAT16(29.f), FLOAT16(81.f), FLOAT16(81.f), + FLOAT16(75.f), FLOAT16( 8.f), FLOAT16(74.f), FLOAT16(75.f), FLOAT16(51.f), + + FLOAT16( 7.f), FLOAT16(29.f), FLOAT16( 6.f), FLOAT16(72.f), FLOAT16(18.f), + FLOAT16(38.f), FLOAT16(54.f), FLOAT16(19.f), FLOAT16(70.f), FLOAT16(16.f), + FLOAT16(74.f), FLOAT16(40.f), FLOAT16(72.f), FLOAT16(88.f), FLOAT16(24.f), + + FLOAT16(14.f), FLOAT16(75.f), FLOAT16(74.f), FLOAT16(82.f), FLOAT16(25.f), + FLOAT16(48.f), FLOAT16(13.f), FLOAT16(71.f), FLOAT16(92.f), FLOAT16( 9.f), FLOAT16(73.f), FLOAT16( 8.f), FLOAT16(80.f), FLOAT16(27.f), FLOAT16(64.f) }); @@ -289,8 +284,8 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(4, 3, 1, 4), batch_dim) ); @@ -303,23 +298,23 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 69.f, 10.f, 69.f, 13.f, - 65.f, 8.f, 65.f, 28.f, - 54.f, 56.f, 54.f, 9.f, + 69.f, 10.f, 69.f, 13.f, + 65.f, 8.f, 65.f, 28.f, + 54.f, 56.f, 54.f, 9.f, - 55.f, 71.f, 71.f, 12.f, - 81.f, 29.f, 29.f, 16.f, + 55.f, 71.f, 71.f, 12.f, + 81.f, 29.f, 29.f, 16.f, 75.f, 74.f, 74.f, 8.f, - 29.f, 29.f, 7.f, 18.f, - 54.f, 54.f, 38.f, 16.f, + 29.f, 29.f, 7.f, 18.f, + 54.f, 54.f, 38.f, 16.f, 40.f, 40.f, 74.f, 24.f, - 74.f, 25.f, 82.f, 74.f, - 71.f, 9.f, 92.f, 71.f, + 74.f, 25.f, 82.f, 74.f, + 71.f, 9.f, 92.f, 71.f, 80.f, 64.f, 27.f, 80.f }; @@ -345,10 +340,10 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) { // Output: // 1.f 1.f 4.f 3.f 5.f 5.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; size_t batch_dim = -1; @@ -364,8 +359,8 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(3, 2, 1, 1), batch_dim) ); @@ -378,10 +373,10 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 1.f, 1.f, + 1.f, 1.f, 4.f, 3.f, 5.f, 5.f, }; @@ -408,10 +403,10 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) { // Output: // 1.f 1.f 4.f 3.f 5.f 5.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; int64_t batch_dim = 1; @@ -427,8 +422,8 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(3, 2, 1, 1), batch_dim) ); @@ -441,7 +436,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 1.f, 4.f, @@ -470,10 +465,10 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) { // Output: // 1.f 1.f 4.f 3.f 5.f 5.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; size_t batch_dim = 0; @@ -489,8 +484,8 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(3, 3, 1, 2), batch_dim) ); @@ -503,18 +498,18 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 1.f, 1.f, + 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, - 3.f, 3.f, + 3.f, 3.f, 4.f, 3.f, 3.f, 3.f, - 5.f, 5.f, + 5.f, 5.f, 6.f, 5.f, 5.f, 5.f }; @@ -540,10 +535,10 @@ TEST(gather_gpu_fp16, d14_axisB) { // Output: // 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -557,8 +552,8 @@ TEST(gather_gpu_fp16, d14_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(1, 4, 1, 2)) ); @@ -571,7 +566,7 @@ TEST(gather_gpu_fp16, d14_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f @@ -599,10 +594,10 @@ TEST(gather_gpu_fp16, d222_axisB) { // Output: // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -619,8 +614,8 @@ TEST(gather_gpu_fp16, d222_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -633,7 +628,7 @@ TEST(gather_gpu_fp16, d222_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f @@ -661,10 +656,10 @@ TEST(gather_gpu_fp16, d22_axisY) { // Output: // 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_y; set_values(input1, { @@ -680,8 +675,8 @@ TEST(gather_gpu_fp16, d22_axisY) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -694,7 +689,7 @@ TEST(gather_gpu_fp16, d22_axisY) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f @@ -722,10 +717,10 @@ TEST(gather_gpu_fp16, d22_axisF) { // Output: // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; set_values(input1, { @@ -741,8 +736,8 @@ TEST(gather_gpu_fp16, d22_axisF) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -755,7 +750,7 @@ TEST(gather_gpu_fp16, d22_axisF) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f @@ -782,10 +777,10 @@ TEST(gather_gpu_fp32, d14_axisB) { // Output: // 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -799,8 +794,8 @@ TEST(gather_gpu_fp32, d14_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(1, 4, 1, 2)) ); @@ -813,7 +808,7 @@ TEST(gather_gpu_fp32, d14_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f @@ -841,10 +836,10 @@ TEST(gather_gpu_fp32, d222_axisB) { // Output: // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -860,8 +855,8 @@ TEST(gather_gpu_fp32, d222_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -874,7 +869,7 @@ TEST(gather_gpu_fp32, d222_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f @@ -902,10 +897,10 @@ TEST(gather_gpu_fp32, d22_axisY) { // Output: // 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_y; set_values(input1, { @@ -921,8 +916,8 @@ TEST(gather_gpu_fp32, d22_axisY) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -935,7 +930,7 @@ TEST(gather_gpu_fp32, d22_axisY) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f @@ -963,10 +958,10 @@ TEST(gather_gpu_fp32, d22_axisF) { // Output: // 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; set_values(input1, { @@ -982,8 +977,8 @@ TEST(gather_gpu_fp32, d22_axisF) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -996,7 +991,7 @@ TEST(gather_gpu_fp32, d22_axisF) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f @@ -1024,10 +1019,10 @@ TEST(gather_gpu_int32, d22_axisF) { // Output: // 1, 2, 3, 4, 5, 6, 3, 4, 7, 8, 9, 10, 11, 12, 9, 10 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; set_values(input1, { @@ -1043,8 +1038,8 @@ TEST(gather_gpu_int32, d22_axisF) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -1057,7 +1052,7 @@ TEST(gather_gpu_int32, d22_axisF) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1, 2, 3, 4, 5, 6, 3, 4, 7, 8, 9, 10, 11, 12, 9, 10 @@ -1084,10 +1079,10 @@ TEST(gather_gpu_int32, d14_axisB) { // Output: // 1, 2, 3, 4, 3, 4, 1, 2 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -1101,8 +1096,8 @@ TEST(gather_gpu_int32, d14_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(1, 4, 1, 2)) ); @@ -1115,7 +1110,7 @@ TEST(gather_gpu_int32, d14_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1, 2, 3, 4, 3, 4, 1, 2 @@ -1143,10 +1138,10 @@ TEST(gather_gpu_int32, d222_axisB) { // Output: // 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, 6, 7, 8 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -1162,8 +1157,8 @@ TEST(gather_gpu_int32, d222_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -1176,7 +1171,7 @@ TEST(gather_gpu_int32, d222_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 5, 6, 7, 8 @@ -1204,10 +1199,10 @@ TEST(gather_gpu_int32, d22_axisY) { // Output: // 1, 2, 3, 2, 4, 5, 6, 5, 7, 8, 9, 8, 10, 11, 12, 11 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_y; set_values(input1, { @@ -1223,8 +1218,8 @@ TEST(gather_gpu_int32, d22_axisY) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 2)) ); @@ -1237,7 +1232,7 @@ TEST(gather_gpu_int32, d22_axisY) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1, 2, 3, 2, 4, 5, 6, 5, 7, 8, 9, 8, 10, 11, 12, 11 @@ -1268,10 +1263,10 @@ TEST(gather_gpu_fp32, d41_axisB) { // 7, 8, 9, 10, 11, 12 // 1, 2, 3, 4, 5, 6, - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_b; set_values(input1, { @@ -1287,8 +1282,8 @@ TEST(gather_gpu_fp32, d41_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(4, 1, 3, 2)) ); @@ -1301,7 +1296,7 @@ TEST(gather_gpu_fp32, d41_axisB) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, @@ -1334,10 +1329,10 @@ TEST(gather_gpu_fp32, d41_axisF) { // 3, 4, 1, 2, 3, 4, 5, 6, // 9, 10, 7, 8, 9, 10, 11, 12 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; set_values(input1, { @@ -1350,8 +1345,8 @@ TEST(gather_gpu_fp32, d41_axisF) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 4, 2, 1)) ); @@ -1364,7 +1359,7 @@ TEST(gather_gpu_fp32, d41_axisF) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 3.f, 4.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, @@ -1393,10 +1388,10 @@ TEST(gather_gpu_fp32, d2_axisX) { // Output: // 1, 1, 2, 2, 3, 3, 4, 4 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_x; set_values(input1, { @@ -1409,8 +1404,8 @@ TEST(gather_gpu_fp32, d2_axisX) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(2, 2, 2, 1)) ); @@ -1423,7 +1418,7 @@ TEST(gather_gpu_fp32, d2_axisX) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 1.f, 2.f, 2.f, @@ -1443,10 +1438,10 @@ TEST(gather_gpu_fp32, 322_axisF) { // Output : 3x2x2x1 // Input values in i32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 3, 1, 1 } }); // data - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 3, 1, 1 } }); // data + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes auto axis = cldnn::gather::gather_axis::along_f; set_values(input1, { @@ -1459,8 +1454,8 @@ TEST(gather_gpu_fp32, 322_axisF) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); topology.add( gather("gather", "InputDictionary", "InputText", axis, format::bfyx, tensor(3, 2, 1, 2)) ); @@ -1473,7 +1468,7 @@ TEST(gather_gpu_fp32, 322_axisF) { auto outputs = network.execute(); auto output = outputs.at("gather").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1, 0, 2, 1, 11, 10, 12, 11, 21, 20, 22, 21 diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_nd_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_nd_gpu_test.cpp index 29999699287..9be5392902a 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_nd_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_nd_gpu_test.cpp @@ -1,41 +1,25 @@ -// Copyright (c) 2021 Intel Corporation +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include +#include +#include -#include -#include using namespace cldnn; using namespace ::tests; -inline void DoTest(const engine& engine, - const cldnn::memory& input0, - const cldnn::memory& input1, +inline void DoTest(engine& engine, + const cldnn::memory::ptr input0, + const cldnn::memory::ptr input1, const std::vector& expected_results, const int indices_rank, const int batch_dims) { topology topology; - topology.add(input_layout("InputData", input0.get_layout())); - topology.add(input_layout("InputIndices", input1.get_layout())); + topology.add(input_layout("InputData", input0->get_layout())); + topology.add(input_layout("InputIndices", input1->get_layout())); topology.add( gather_nd("gather_nd", "InputData", "InputIndices", indices_rank, batch_dims) ); @@ -46,7 +30,7 @@ inline void DoTest(const engine& engine, network.set_input_data("InputIndices", input1); auto outputs = network.execute(); auto output = outputs.at("gather_nd").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -54,12 +38,12 @@ inline void DoTest(const engine& engine, } TEST(gather_nd_gpu_fp16, d23322_i231312_ir6_batch2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 6; const int batch_dims = 2; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 2, 2, 3 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 3, 2, 1, 3, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 2, 2, 3 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 2, 1, 3, 1 } }); // indices // expected output dim: {6,1,3,1,2} set_values(input0, { @@ -96,12 +80,12 @@ TEST(gather_nd_gpu_fp16, d23322_i231312_ir6_batch2) { } TEST(gather_nd_gpu_fp16, d231322_i231321_ir6_batch5) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 6; const int batch_dims = 5; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 3, 2, 2, 3, 1 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 3, 1, 2, 3, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 2, 2, 3, 1 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 1, 2, 3, 1 } }); // indices // expected output dim: {36} set_values(input0, { @@ -138,12 +122,12 @@ TEST(gather_nd_gpu_fp16, d231322_i231321_ir6_batch5) { } TEST(gather_nd_gpu_fp16, d23322_i23321_ir5_batch4) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 5; const int batch_dims = 4; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 2, 2, 3 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 1, 2, 3 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 2, 2, 3 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 1, 2, 3 } }); // indices // expected output dim: {36} set_values(input0, { @@ -180,12 +164,12 @@ TEST(gather_nd_gpu_fp16, d23322_i23321_ir5_batch4) { } TEST(gather_nd_gpu_fp16, d23223_i2321_ir4_batch3) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 4; const int batch_dims = 3; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 3, 2, 2 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 3, 2, 2 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // indices // expected output dim: {2*3*2,3} set_values(input0, { @@ -222,12 +206,12 @@ TEST(gather_nd_gpu_fp16, d23223_i2321_ir4_batch3) { } TEST(gather_nd_gpu_fp16, d2342_i2312_ir4_batch2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 4; const int batch_dims = 2; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 2, 4 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 2, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 2, 4 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 2, 1 } }); // indices // expected output dim: {6,1} set_values(input0, { @@ -264,12 +248,12 @@ TEST(gather_nd_gpu_fp16, d2342_i2312_ir4_batch2) { } TEST(gather_nd_gpu_fp16, d234_i2311_ir4_batch2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 4; const int batch_dims = 2; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // indices // expected output dim: {6,1,1} set_values(input0, { @@ -307,12 +291,12 @@ TEST(gather_nd_gpu_fp16, d234_i2311_ir4_batch2) { } TEST(gather_nd_gpu_fp16, d234_i21_ir2_batch1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 2; const int batch_dims = 1; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices // expected output dim: {2,4} set_values(input0, { @@ -340,12 +324,12 @@ TEST(gather_nd_gpu_fp16, d234_i21_ir2_batch1) { } TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 2; const int batch_dims = 1; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices // expected output dim: 2 set_values(input0, { @@ -367,12 +351,12 @@ TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch1) { } TEST(gather_nd_gpu_fp16, d3223_i321113_ir6_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 6; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 3, 2 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 3, 2, 3, 1, 1, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 3, 2 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 3, 2, 3, 1, 1, 1 } }); // indices // expected output dim: 321113 set_values(input0, { @@ -412,12 +396,12 @@ TEST(gather_nd_gpu_fp16, d3223_i321113_ir6_batch0) { } TEST(gather_nd_gpu_fp16, d3221_i32312_ir3_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 3; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 2, 1, 3 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 1, 3 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices // expected output dim: 32312 set_values(input0, { @@ -457,12 +441,12 @@ TEST(gather_nd_gpu_fp16, d3221_i32312_ir3_batch0) { } TEST(gather_nd_gpu_fp16, d3231_i32312_ir3_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 3; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 2, 1, 3 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 3 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 2, 1, 3 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 3 } }); // indices // expected output dim: {3,2,1,2} set_values(input0, { @@ -502,12 +486,12 @@ TEST(gather_nd_gpu_fp16, d3231_i32312_ir3_batch0) { } TEST(gather_nd_gpu_fp16, d3112_i3221_ir4_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 4; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 1, 2, 1 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 1, 2, 1 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices // expected output dim: {3,2,2,1,1,2} set_values(input0, { @@ -542,12 +526,12 @@ TEST(gather_nd_gpu_fp16, d3112_i3221_ir4_batch0) { } TEST(gather_nd_gpu_fp16, d311211_i322111_ir4_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 4; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 3, 1, 1, 1, 2, 1 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 3, 2, 1, 1, 1, 2 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 3, 1, 1, 1, 2, 1 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 3, 2, 1, 1, 1, 2 } }); // indices // expected output dim: {3,2,2,1,1,2,1,1} set_values(input0, { @@ -582,12 +566,12 @@ TEST(gather_nd_gpu_fp16, d311211_i322111_ir4_batch0) { } TEST(gather_nd_gpu_fp16, d3332_i3223_ir4_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 4; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 3, 2 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 3, 2 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 3, 2 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 3, 2 } }); // indices set_values(input0, { FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4), FLOAT16(5), FLOAT16(6), @@ -629,12 +613,12 @@ TEST(gather_nd_gpu_fp16, d3332_i3223_ir4_batch0) { } TEST(gather_nd_gpu_fp16, d3323_i322_ir3_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 3; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 3, 2 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 3, 2 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // indices set_values(input0, { FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4), FLOAT16(5), FLOAT16(6), @@ -676,12 +660,12 @@ TEST(gather_nd_gpu_fp16, d3323_i322_ir3_batch0) { } TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 2; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices set_values(input0, { FLOAT16(1), FLOAT16(2), @@ -701,12 +685,12 @@ TEST(gather_nd_gpu_fp16, d22_i21_ir2_batch0) { } TEST(gather_nd_gpu_fp16, d22_i32_ir2_batch0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int indices_rank = 2; const int batch_dims = 0; - auto input0 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices + auto input0 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // data + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices set_values(input0, { FLOAT16(1), FLOAT16(2), @@ -727,4 +711,3 @@ TEST(gather_nd_gpu_fp16, d22_i32_ir2_batch0) { DoTest(engine,input0, input1, expected_results, indices_rank, batch_dims); } - diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp index 136c1a3c107..ae1ca356924 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp @@ -3,17 +3,11 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include "test_utils/test_utils.h" -#include "test_utils/uniform_quantized_real_distribution.hpp" +#include +#include #include @@ -21,14 +15,14 @@ using namespace cldnn; using namespace ::tests; TEST(gemm_gpu, basic_bfyx_t1) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 4 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 4 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 4 } }); std::vector input_data = { - 1.f, -2.f, 3.f, - -4.f, 5.f, 6.f, - 1.f, 2.f, 3.f, + 1.f, -2.f, 3.f, + -4.f, 5.f, 6.f, + 1.f, 2.f, 3.f, 3.f, 2.f, -1.f, }; @@ -48,10 +42,10 @@ TEST(gemm_gpu, basic_bfyx_t1) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( gemm("output", { "input", "input2" }, data_types::f32, true, false) @@ -63,7 +57,7 @@ TEST(gemm_gpu, basic_bfyx_t1) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)3); for (uint32_t i = 0; i < out_data.size(); ++i) { @@ -71,13 +65,13 @@ TEST(gemm_gpu, basic_bfyx_t1) { } } TEST(gemm_gpu, basic_bfyx_t2) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 3 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 3 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); std::vector input_data = { - 1.f, -2.f, 3.f, -4.f, - 5.f, 6.f, 1.f, 2.f, + 1.f, -2.f, 3.f, -4.f, + 5.f, 6.f, 1.f, 2.f, 3.f, 3.f, 2.f, -1.f, }; @@ -94,10 +88,10 @@ TEST(gemm_gpu, basic_bfyx_t2) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( gemm("output", { "input", "input2" }, data_types::f32, false, true) @@ -109,7 +103,7 @@ TEST(gemm_gpu, basic_bfyx_t2) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)3); for (uint32_t i = 0; i < out_data.size(); ++i) { @@ -118,14 +112,14 @@ TEST(gemm_gpu, basic_bfyx_t2) { } TEST(gemm_gpu, basic_bfyx_t1t2) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 4 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 4, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 3, 4 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 4, 1 } }); std::vector input_data = { - 1.f, -2.f, 3.f, - -4.f, 5.f, 6.f, - 1.f, 2.f, 3.f, + 1.f, -2.f, 3.f, + -4.f, 5.f, 6.f, + 1.f, 2.f, 3.f, 3.f, 2.f, -1.f, 1.f, -2.f, 3.f, @@ -150,10 +144,10 @@ TEST(gemm_gpu, basic_bfyx_t1t2) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( gemm("output", { "input", "input2" }, data_types::f32, true, true) @@ -165,7 +159,7 @@ TEST(gemm_gpu, basic_bfyx_t1t2) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)6); for (uint32_t i = 0; i < out_data.size(); ++i) { @@ -174,19 +168,19 @@ TEST(gemm_gpu, basic_bfyx_t1t2) { } TEST(gemm_gpu, basic_input3) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 3 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 3 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); float alpha = 2.f; float beta = 10.f; - std::vector input_data = { + std::vector input_data = { 1.0f, 2.0f, 3.0f, 1.0f, 0.0f, 1.0f }; - std::vector input_data2 = { + std::vector input_data2 = { 3.0f, 3.0f, 1.0f, 2.0f, 1.0f, 2.0f, @@ -208,13 +202,13 @@ TEST(gemm_gpu, basic_input3) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( - input_layout("input3", input3.get_layout()) + input_layout("input3", input3->get_layout()) ); topology.add( gemm("output", { "input", "input2", "input3" }, data_types::f32, false, false, alpha, beta) @@ -227,20 +221,20 @@ TEST(gemm_gpu, basic_input3) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)4); - for (uint32_t i = 0; i < out_data.size(); ++i) { + for (uint32_t i = 0; i < out_data.size(); ++i) { EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]); } } TEST(gemm_gpu, basic_input3_t1t2) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 3 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 3 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); float alpha = 2.f; float beta = 3.f; @@ -251,7 +245,7 @@ TEST(gemm_gpu, basic_input3_t1t2) { }; std::vector input_data2 = { - 3.0f, 3.0f, 1.0f, + 3.0f, 3.0f, 1.0f, 2.0f, 1.0f, 2.0f, }; @@ -275,13 +269,13 @@ TEST(gemm_gpu, basic_input3_t1t2) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( - input_layout("input3", input3.get_layout()) + input_layout("input3", input3->get_layout()) ); topology.add( gemm("output", { "input", "input2", "input3" }, data_types::f32, true, true, alpha, beta) @@ -294,7 +288,7 @@ TEST(gemm_gpu, basic_input3_t1t2) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)8); @@ -303,10 +297,10 @@ TEST(gemm_gpu, basic_input3_t1t2) { } } TEST(gemm_gpu, basic_input3_1) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 4 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 3 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 4 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 3 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); float alpha = 2.f; float beta = 3.f; @@ -343,13 +337,13 @@ TEST(gemm_gpu, basic_input3_1) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( - input_layout("input3", input3.get_layout()) + input_layout("input3", input3->get_layout()) ); topology.add( gemm("output", { "input", "input2", "input3" }, data_types::f32, false, false, alpha, beta) @@ -363,7 +357,7 @@ TEST(gemm_gpu, basic_input3_1) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)8); @@ -373,10 +367,10 @@ TEST(gemm_gpu, basic_input3_1) { } TEST(gemm_gpu, basic_input3_t2) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 4 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 2 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 4 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 2 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); float alpha = 2.f; float beta = 3.f; @@ -412,13 +406,13 @@ TEST(gemm_gpu, basic_input3_t2) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( - input_layout("input3", input3.get_layout()) + input_layout("input3", input3->get_layout()) ); topology.add( gemm("output", { "input", "input2", "input3" }, data_types::f32, false, true, alpha, beta) @@ -431,7 +425,7 @@ TEST(gemm_gpu, basic_input3_t2) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)8); @@ -441,10 +435,10 @@ TEST(gemm_gpu, basic_input3_t2) { } TEST(gemm_gpu, basic_input3_t1) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 3 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 3 } }); - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 3 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 3 } }); + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 4 } }); float alpha = 2.f; float beta = 3.f; @@ -480,13 +474,13 @@ TEST(gemm_gpu, basic_input3_t1) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( - input_layout("input3", input3.get_layout()) + input_layout("input3", input3->get_layout()) ); topology.add( gemm("output", { "input", "input2", "input3" }, data_types::f32, true, false, alpha, beta) @@ -499,7 +493,7 @@ TEST(gemm_gpu, basic_input3_t1) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)8); @@ -509,13 +503,13 @@ TEST(gemm_gpu, basic_input3_t1) { } TEST(gemm_gpu, basic_bfyx) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 4, 3 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 1, 4 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 4, 3 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 1, 4 } }); std::vector input_data = { - 1.f, -2.f, 3.f, -4.f, - 5.f, 6.f, 7.f, 8.f, + 1.f, -2.f, 3.f, -4.f, + 5.f, 6.f, 7.f, 8.f, -10.f, 12.f, 13.f, -13.f, 1.f, -2.f, 3.f, -4.f, @@ -524,9 +518,9 @@ TEST(gemm_gpu, basic_bfyx) { }; std::vector input_data2 = { - 2.f, - 5.f, - -4.f, + 2.f, + 5.f, + -4.f, -7.f, 2.f, 5.f, @@ -542,10 +536,10 @@ TEST(gemm_gpu, basic_bfyx) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( gemm("output", { "input", "input2" }, data_types::f32) @@ -557,9 +551,9 @@ TEST(gemm_gpu, basic_bfyx) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + - EXPECT_EQ(output_ptr.size(), (uint32_t)6); for (uint32_t i = 0; i < out_data.size(); ++i) { EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]); @@ -567,9 +561,9 @@ TEST(gemm_gpu, basic_bfyx) { } TEST(gemm_gpu, basic3_bfyx) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 1, 500, 9 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 1, 1, 500 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 1, 500, 9 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 1, 1, 500 } }); std::vector input_data = { -0.000449777f, -0.000137405f, -0.0762075f, 0.000949166f, 0.0346124f, -0.0111424f, 0.0108357f, 0.0121679f, 0.0242749f, 0.052692f, -0.0017713f, 0.0053728f, 0.0128862f, -0.0162366f, 0.0125041f, -0.00602398f, 0.0107778f, -0.00323086f, -0.00914208f, -0.013884f, 0.00755173f, -0.0175622f, 0.00473339f, -0.015003f, -0.0238219f, 0.004502f, 0.00187154f, 0.0041163f, -9.36184e-05f, 0.00873372f, 0.0121869f, -0.020973f, -0.006006f, -0.0038208f, 0.00210471f, 0.00255549f, -0.0251856f, -0.0626372f, -0.0059258f, -0.0058662f, -0.0946306f, 0.00197436f, 0.00105865f, -0.0033595f, 0.0158977f, -0.0036025f, -0.00568902f, -0.0202577f, -0.000251319f, -0.0117895f, -0.0144239f, -0.0144024f, -0.0150431f, -0.0354826f, -0.0135123f, -0.000422157f, 0.0286438f, -0.000884989f, -0.00675718f, 0.013241f, -0.0118388f, 0.0321394f, -0.000803071f, 0.11408f, -0.00806301f, -0.00831608f, 0.0165189f, 0.016094f, -0.000449332f, -0.00695901f, 0.0437514f, -0.00172117f, 0.00180391f, -0.000859933f, -0.0144826f, 0.0262613f, -0.00194352f, -1.98829e-05f, -0.00902827f, -0.00400867f, -0.00600827f, 0.0120846f, -0.0162493f, 0.0418596f, 0.00131911f, -0.00631566f, 0.00270484f, -0.0950513f, 0.00726431f, -0.0169798f, -0.000554365f, -0.00256903f, -0.00885843f, 0.0104025f, 0.00590779f, -0.00175832f, 0.0168603f, 0.00964353f, -0.0180614f, 0.0213157f, 0.0209548f, -0.0231143f, -0.00121617f, -0.0129815f, -0.0199287f, 0.00863336f, -0.00464991f, 0.0162288f, -0.340115f, -0.011018f, -0.0593997f, 0.00644821f, 0.0416332f, 0.0394596f, 0.0172296f, 0.00494231f, 0.0143805f, -0.00819845f, 0.00196982f, 0.00393258f, 0.0246168f, -0.0235927f, 0.0131416f, -0.0190432f, -0.0237865f, -0.0155627f, 0.0265165f, 0.0162884f, 0.00321098f, 0.0136674f, -0.000966112f, -0.0100813f, -0.00604589f, 0.00889466f, 0.0113945f, 0.0264707f, 0.00371883f, -0.00843358f, 0.0145675f, 0.0048638f, 0.00110399f, -0.00130233f, 0.00740726f, -0.00393368f, -0.0242178f, 0.00341681f, 0.00115369f, -0.00297881f, -0.0844071f, 0.0537151f, -0.00209399f, 0.0310295f, 0.0383914f, 0.00456459f, 0.0188114f, -0.0177144f, 0.0133258f, 0.0584683f, -0.00640495f, 0.0175946f, 0.0186782f, 0.00213311f, 0.00393403f, 0.00382759f, 0.00267507f, 0.00493673f, -0.00856695f, -0.00627955f, -0.0103436f, -0.000671664f, -0.110419f, 0.0307264f, 0.0042176f, 0.0031638f, 0.0154172f, 0.00265482f, 0.0410853f, 0.00833895f, -0.0183989f, -0.000717906f, -0.0090387f, -0.00404523f, -0.00976238f, -0.0137555f, 0.000157289f, -0.00341186f, -0.0214878f, 0.0142639f, 0.00624623f, 0.000537292f, -0.0520912f, -0.0432221f, -0.00330415f, 0.0263942f, -0.00150974f, 0.00172088f, -0.0815726f, -0.0201155f, -0.00986346f, 0.0121252f, 0.00198959f, -0.0349936f, -0.00608366f, -0.00399543f, 0.0192487f, -0.0123156f, 0.0072797f, 0.000507143f, 0.0334805f, 0.000609379f, 0.00961966f, -0.00697663f, 0.00201967f, -0.0207349f, -0.0103385f, -0.00343849f, -0.00330492f, 0.035106f, -0.00456996f, 0.00197528f, 0.016148f, 0.0142903f, 0.0616483f, 0.0093118f, -0.0596028f, 0.00945764f, -0.00659242f, 0.118389f, -0.00259384f, -0.00285344f, 0.00567036f, 0.0195813f, -0.00461807f, -0.0608699f, 0.00380259f, 0.00143385f, -0.00466997f, 0.0194046f, -0.0198423f, -0.00334569f, -0.014399f, 0.0130021f, -0.0141619f, -0.00859914f, 0.00997122f, -0.0198446f, -0.0094162f, -0.0116609f, -0.0111888f, -0.00903524f, 0.00937981f, 0.01772f, -0.00236374f, -0.00870162f, 0.000141193f, -0.0343695f, -0.00997931f, 0.0073531f, -0.100394f, -0.00367661f, -0.00124499f, 0.00318026f, 0.0554203f, -0.00342582f, -0.0104147f, -0.0577869f, -0.0126485f, -0.0332496f, 0.0346141f, 0.0307962f, -0.0174745f, -0.0387339f, 0.0167707f, -0.0363424f, 0.0154902f, -0.0118644f, -4.63543e-06f, -0.0683506f, -0.0344076f, -0.00104884f, -0.00883997f, -0.00305185f, -0.0150299f, -0.0186403f, 0.0110238f, 0.00779224f, -0.0102231f, 0.0087488f, -0.0138988f, -0.0229105f, -0.0244903f, -0.0202919f, 0.00135903f, -0.00574432f, 0.00254918f, 0.0340209f, -0.046428f, -0.00670622f, 0.000925543f, -0.0249251f, -0.00275456f, 0.0199177f, 0.000210993f, 0.027762f, -0.0228046f, 0.0484813f, 0.00538959f, 0.0136714f, -0.00690097f, -0.0448533f, -0.00815204f, 0.00734891f, 0.0173959f, -0.0379109f, 0.0594617f, -0.00722084f, 0.0415935f, 0.014792f, -0.0170252f, -0.0139396f, 0.00146415f, 0.00117702f, 0.0685559f, 0.00727832f, -0.107566f, -0.0112505f, 0.033853f, 0.0046957f, -0.0242369f, 0.0148181f, -0.0723487f, -0.00961667f, 0.0304085f, -0.00520772f, -0.0316467f, 0.0327801f, -0.00755137f, 0.0166041f, -0.0557288f, -0.0227759f, -0.00314548f, 0.0152585f, 0.020071f, -0.0377076f, 0.00687613f, -0.0273935f, -0.00647955f, 0.0105047f, -0.0137238f, 0.023264f, -0.0455722f, -0.00221414f, -0.0258535f, -0.0236395f, 0.0593407f, 0.00448763f, 0.0150777f, 0.00437925f, 0.0295782f, -0.0344752f, 0.00365267f, 0.140464f, -0.0479012f, 0.025726f, 0.119063f, 0.000301925f, -0.00810565f, -0.354073f, -0.0723185f, -0.0046123f, 0.033882f, -0.044552f, -0.0138361f, 0.00384129f, 0.0139111f, -0.01667f, -0.0821503f, 0.0029974f, -0.0306725f, 0.0160366f, 0.0334754f, 0.0192693f, -0.00616713f, -0.00232275f, 0.0107987f, 0.00437057f, 0.0017298f, 0.0196916f, -0.0417255f, -0.00911193f, 0.00876709f, -0.00172422f, -0.00105248f, -0.0191631f, -0.00387423f, -0.0102766f, -0.025317f, -0.0416204f, -0.0319611f, -0.00359193f, 0.00424064f, -0.00575092f, -0.0282402f, 0.0745899f, -0.0126492f, -0.0162564f, -0.261967f, -0.705265f, -0.0403731f, -0.00209634f, -0.694297f, 0.00956909f, 0.0158826f, 0.0130207f, 0.003825f, -0.000300812f, -0.0121346f, 0.00642053f, -0.012902f, 0.0309272f, 0.0609192f, -0.00654145f, -0.0937578f, -0.00432024f, -0.00767539f, 0.0461248f, 0.00701077f, -0.0174477f, 0.00563833f, -0.0107107f, -0.0255275f, 0.00892488f, -0.00166062f, 0.039829f, -0.00150394f, 0.00742194f, -0.00885529f, -0.0103532f, 0.0777858f, 0.0885367f, -0.00425715f, 0.0423651f, -0.0446651f, -0.635069f, -0.00919329f, -0.00356176f, 0.00988705f, 0.0116529f, -0.0401253f, 0.00260105f, 0.00573955f, -0.0667439f, 0.101175f, 0.0765288f, -0.0120077f, 0.00322599f, -0.0192768f, 0.0382749f, -0.222119f, -0.0452036f, 0.0424303f, 0.0890699f, 0.0117557f, 0.0315167f, 0.0284256f, 0.00541845f, -0.250147f, 0.00420668f, -0.0189724f, -0.00416381f, -0.00162803f, -0.0108763f, -0.00970892f, 0.0134476f, -0.0254931f, 0.0307225f, 0.00128596f, 0.0171106f, 0.00467854f, -0.0124376f, 0.0183396f, 0.0021754f, 0.00170886f, -0.0226898f, 0.0250111f, -0.0533301f, -0.0163268f, 0.00618995f, 0.0416378f, 0.0475397f, 0.0105684f, -0.00440933f, 0.0496722f, -0.0215733f, -0.0256361f, -0.0285091f, -0.0276881f, -0.00102202f, -0.0720219f, -0.0296656f, @@ -3145,10 +3139,10 @@ TEST(gemm_gpu, basic3_bfyx) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( gemm("output", { "input", "input2" }, data_types::f32) @@ -3160,7 +3154,7 @@ TEST(gemm_gpu, basic3_bfyx) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)45); for (uint32_t i = 0; i < out_data.size(); ++i) { @@ -3170,9 +3164,9 @@ TEST(gemm_gpu, basic3_bfyx) { } TEST(gemm_gpu, basic_smarcink2) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 3 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 3, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 2, 3 } }); std::vector input_data = { 1.0f, 2.0f, 3.0f, @@ -3204,10 +3198,10 @@ TEST(gemm_gpu, basic_smarcink2) { topology topology; topology.add( - input_layout("input", input.get_layout()) + input_layout("input", input->get_layout()) ); topology.add( - input_layout("input2", input2.get_layout()) + input_layout("input2", input2->get_layout()) ); topology.add( gemm("output", { "input", "input2" }, data_types::f32) @@ -3219,7 +3213,7 @@ TEST(gemm_gpu, basic_smarcink2) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (uint32_t)8); for (uint32_t i = 0; i < out_data.size(); ++i) { @@ -3414,7 +3408,7 @@ public: } void execute(gemm_params& p) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto y0_size = p.m_size; auto y0_pitch = p.k_size; @@ -3461,19 +3455,19 @@ public: auto input0_size = tensor((int)p.b0_num, (int)p.f0_num, (int)x0_size, (int)y0_size); VVVVF input0_data = generate_random_4d(p.b0_num, p.f0_num, x0_size, y0_size, p.range0[0], p.range0[1], p.range0[2]); auto input0_data_bfyx = flatten_4d(format::bfyx, input0_data); - auto input0_mem = memory::allocate(engine, { p.allocate0_type, format::bfyx, input0_size }); + auto input0_mem = engine.allocate_memory({ p.allocate0_type, format::bfyx, input0_size }); set_values(input0_mem, input0_data_bfyx); auto input1_size = tensor((int)p.b1_num, (int)p.f1_num, (int)x1_size, (int)y1_size); VVVVF input1_data = generate_random_4d(p.b1_num, p.f1_num, x1_size, y1_size, p.range1[0], p.range1[1], p.range1[2]); auto input1_data_bfyx = flatten_4d(format::bfyx, input1_data); - auto input1_mem = memory::allocate(engine, { p.allocate1_type, format::bfyx, input1_size }); + auto input1_mem = engine.allocate_memory({ p.allocate1_type, format::bfyx, input1_size }); set_values(input1_mem, input1_data_bfyx); auto input2_size = tensor((int)p.b2_num, (int)p.f2_num, (int)x2_size, (int)y2_size); VVVVF input2_data = generate_random_4d(p.b2_num, p.f2_num, x2_size, y2_size, p.range2[0], p.range2[1], p.range2[2]); auto input2_data_bfyx = flatten_4d(format::bfyx, input2_data); - auto input2_mem = memory::allocate(engine, { p.allocate2_type, format::bfyx, input2_size }); + auto input2_mem = engine.allocate_memory({ p.allocate2_type, format::bfyx, input2_size }); set_values(input2_mem, input2_data_bfyx); std::vector out_data(p.b_out_num * p.f_out_num * p.m_size * p.n_size); @@ -3505,9 +3499,9 @@ public: } topology topology; - topology.add(input_layout("input0", input0_mem.get_layout())); - topology.add(input_layout("input1", input1_mem.get_layout())); - topology.add(input_layout("input2", input2_mem.get_layout())); + topology.add(input_layout("input0", input0_mem->get_layout())); + topology.add(input_layout("input1", input1_mem->get_layout())); + topology.add(input_layout("input2", input2_mem->get_layout())); topology.add(gemm("gemm_bfyx", { "input0", "input1", "input2" }, p.output_type, p.transpose_input0, p.transpose_input1, p.alpha, p.beta)); topology.add(reorder("reorder_bfyx", "gemm_bfyx", format::bfyx, data_types::f32)); @@ -3521,7 +3515,7 @@ public: network.set_input_data("input2", input2_mem); auto outputs = network.execute(); auto output = outputs.at("reorder_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); EXPECT_EQ(output_ptr.size(), (size_t)(p.b_out_num * p.f_out_num * p.m_size * p.n_size)); if (sizeof(input0_type) == 1) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp index 9a61d29f8e2..37c0a030949 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/loop_gpu_test.cpp @@ -3,17 +3,18 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/eltwise.hpp" -#include -#include + #include "test_utils/test_utils.h" -#include -#include -#include -#include + +#include +#include +#include +#include +#include "cldnn/primitives/eltwise.hpp" +#include +#include +#include +#include #include #include @@ -26,13 +27,13 @@ using namespace testing; TEST(loop_gpu, basic_no_concat) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); - auto operand_mem = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); - auto trip_count_mem = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); - auto initial_condition_mem = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); - auto num_iteration_mem = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); + auto operand_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); + auto trip_count_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); + auto initial_condition_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); + auto num_iteration_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); std::vector input_data{ 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f, @@ -65,9 +66,9 @@ TEST(loop_gpu, basic_no_concat) }; topology topology( - input_layout("input", input_mem.get_layout()), - input_layout("trip_count", trip_count_mem.get_layout()), - input_layout("initial_condition", initial_condition_mem.get_layout()), + input_layout("input", input_mem->get_layout()), + input_layout("trip_count", trip_count_mem->get_layout()), + input_layout("initial_condition", initial_condition_mem->get_layout()), mutable_data("num_iteration", num_iteration_mem), loop("loop", {"input"}, body, "trip_count", "initial_condition", "num_iteration", @@ -82,33 +83,33 @@ TEST(loop_gpu, basic_no_concat) auto outputs = network.execute(); EXPECT_EQ(outputs.size(), 1); auto output = outputs.begin()->second.get_memory(); - auto output_layout = output.get_layout(); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.size.batch[0], 1); EXPECT_EQ(output_layout.size.feature[0], 1); EXPECT_EQ(output_layout.size.spatial[0], 4); EXPECT_EQ(output_layout.size.spatial[1], 5); - auto ptr = num_iteration_mem.pointer(); + mem_lock ptr{num_iteration_mem, get_test_stream()}; EXPECT_EQ(ptr[0], trip_count); // value check - auto output_ptr = output.pointer(); + mem_lock output_ptr{output, get_test_stream()}; EXPECT_EQ(output_ptr.size(), input_data.size()); - for (size_t i=0, iend = input_data.size(); i input_data{ 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f, @@ -128,7 +129,7 @@ TEST(loop_gpu, basic_concat) set_values(initial_condition_mem, {initial_condition}); topology body( - input_layout("input", operand_mem.get_layout()), + input_layout("input", operand_mem->get_layout()), data("eltwise_operand", operand_mem), eltwise("eltwise", "input", "eltwise_operand", eltwise_mode::sum) ); @@ -139,9 +140,9 @@ TEST(loop_gpu, basic_concat) std::vector back_edges {}; topology topology( - input_layout("input", input_mem.get_layout()), - input_layout("trip_count", trip_count_mem.get_layout()), - input_layout("initial_condition", initial_condition_mem.get_layout()), + input_layout("input", input_mem->get_layout()), + input_layout("trip_count", trip_count_mem->get_layout()), + input_layout("initial_condition", initial_condition_mem->get_layout()), mutable_data("num_iteration", num_iteration_mem), loop("loop", {"input"}, body, "trip_count", "initial_condition", "num_iteration", @@ -156,19 +157,19 @@ TEST(loop_gpu, basic_concat) auto outputs = network.execute(); EXPECT_EQ(outputs.size(), 1); auto output = outputs.begin()->second.get_memory(); - auto output_layout = output.get_layout(); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.size.batch[0], 1); EXPECT_EQ(output_layout.size.feature[0], 1); EXPECT_EQ(output_layout.size.spatial[0], 4); EXPECT_EQ(output_layout.size.spatial[1], 5); - auto ptr = num_iteration_mem.pointer(); + mem_lock ptr{num_iteration_mem, get_test_stream()}; const int32_t actual_iterations = ptr[0]; EXPECT_EQ(actual_iterations, trip_count); // value check - auto output_ptr = output.pointer(); + mem_lock output_ptr{output, get_test_stream()}; for (size_t i=0, iend = input_data.size(); iget_layout()), data("inner_eltwise_operand", inner_operand_mem), eltwise("inner_eltwise", "inner_input", "inner_eltwise_operand", eltwise_mode::sum) ); @@ -232,9 +233,9 @@ TEST(loop_gpu, basic_concat_nested) // set outer loop body ///////////////////////////////// topology outer_loop_body( - input_layout("inner_input", input_mem.get_layout()), - input_layout("trip_count", inner_trip_count_mem.get_layout()), - input_layout("initial_condition", inner_initial_condition_mem.get_layout()), + input_layout("inner_input", input_mem->get_layout()), + input_layout("trip_count", inner_trip_count_mem->get_layout()), + input_layout("initial_condition", inner_initial_condition_mem->get_layout()), mutable_data("inner_num_iteration", inner_num_iteration_mem), loop("inner_loop", {"inner_input", "trip_count", "initial_condition"}, inner_loop_body, "trip_count", "initial_condition", "inner_num_iteration", @@ -254,12 +255,12 @@ TEST(loop_gpu, basic_concat_nested) // set main topology ///////////////////////////////// topology main_topology( - input_layout("input", input_mem.get_layout()), - input_layout("trip_count", trip_count_mem.get_layout()), - input_layout("initial_condition", initial_condition_mem.get_layout()), + input_layout("input", input_mem->get_layout()), + input_layout("trip_count", trip_count_mem->get_layout()), + input_layout("initial_condition", initial_condition_mem->get_layout()), mutable_data("num_iteration", num_iteration_mem), - input_layout("inner_trip_count", inner_trip_count_mem.get_layout()), - input_layout("inner_initial_condition", inner_initial_condition_mem.get_layout()), + input_layout("inner_trip_count", inner_trip_count_mem->get_layout()), + input_layout("inner_initial_condition", inner_initial_condition_mem->get_layout()), loop("loop", {"input", "inner_trip_count", "inner_initial_condition"}, outer_loop_body, "trip_count", "initial_condition", "num_iteration", outer_input_primitive_maps, outer_output_primitive_maps, outer_back_edges, outer_trip_count) @@ -278,7 +279,7 @@ TEST(loop_gpu, basic_concat_nested) auto outputs = network.execute(); EXPECT_EQ(outputs.size(), 1); auto output = outputs.begin()->second.get_memory(); - auto output_layout = output.get_layout(); + auto output_layout = output->get_layout(); ///////////////////////////////// // calculate expected output @@ -303,16 +304,16 @@ TEST(loop_gpu, basic_concat_nested) EXPECT_EQ(output_layout.size.spatial[1], 5); // check trip count = actual iteration - auto inner_num_iteration_ptr = inner_num_iteration_mem.pointer(); + mem_lock inner_num_iteration_ptr{inner_num_iteration_mem, get_test_stream()}; int64_t inner_actual_iterations = inner_num_iteration_ptr[0]; EXPECT_EQ(inner_actual_iterations, inner_trip_count); - auto num_iteration_ptr = num_iteration_mem.pointer(); + mem_lock num_iteration_ptr{num_iteration_mem, get_test_stream()}; int64_t actual_iterations = num_iteration_ptr[0]; EXPECT_EQ(actual_iterations, outer_trip_count); // check output values EXPECT_EQ(output_layout.count(), expected.size()); - auto output_ptr = output.pointer(); + mem_lock output_ptr{output, get_test_stream()}; for (size_t i=0 ;i -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include +#include +#include +#include using namespace cldnn; using namespace ::tests; @@ -20,14 +16,14 @@ using namespace ::tests; TEST(lrn_fp32_gpu, basic) { // input : 1x16x1x1 // Output : 1x16x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const size_t b = 1; const size_t f = 16; const size_t y = 1; const size_t x = 1; - auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); + auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { static float n = 0; @@ -37,7 +33,7 @@ TEST(lrn_fp32_gpu, basic) { set_values(input, inputVals); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); uint32_t size = 2; float k = 0.5f; float alpha = 9.9e-05f; @@ -51,7 +47,7 @@ TEST(lrn_fp32_gpu, basic) { auto outputs = network.execute(); auto output = outputs.at("lrn").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.99901f, 3.99486f, 5.98519f, @@ -69,14 +65,14 @@ TEST(lrn_fp32_gpu, basic) { TEST(lrn_fp32_gpu, basic2) { // input : 1x16x1x1 // Output : 1x16x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const size_t b = 1; const size_t f = 16; const size_t y = 1; const size_t x = 1; - auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); + auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { static float n = 0; @@ -86,7 +82,7 @@ TEST(lrn_fp32_gpu, basic2) { set_values(input, inputVals); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); uint32_t size = 5; float k = 0.5f; float alpha = 9.9e-05f; @@ -100,7 +96,7 @@ TEST(lrn_fp32_gpu, basic2) { auto outputs = network.execute(); auto output = outputs.at("lrn").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.99889f, 3.99525f, 5.98696f, @@ -118,14 +114,14 @@ TEST(lrn_fp32_gpu, basic2) { TEST(lrn_fp16_gpu, basic1) { // input : 1x16x1x1 // Output : 1x16x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const size_t b = 1; const size_t f = 16; const size_t y = 1; const size_t x = 1; - auto input = memory::allocate(engine, { data_types::f16, format::b_fs_yx_fsv16, { b, f, x, y } }); + auto input = engine.allocate_memory({ data_types::f16, format::b_fs_yx_fsv16, { b, f, x, y } }); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { static float n = 0; @@ -135,7 +131,7 @@ TEST(lrn_fp16_gpu, basic1) { set_values(input, inputVals); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); uint32_t size = 5; float k = 0.5f; float alpha = 9.9e-05f; @@ -149,7 +145,7 @@ TEST(lrn_fp16_gpu, basic1) { auto outputs = network.execute(); auto output = outputs.at("lrn").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.99889f, 3.99525f, 5.98696f, @@ -167,14 +163,14 @@ TEST(lrn_fp16_gpu, basic1) { TEST(lrn_fp32_gpu, basic3) { // input : 2x16x4x4 // Output : 2x16x4x4 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const size_t b = 2; const size_t f = 16; const size_t y = 4; const size_t x = 4; - auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); + auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } }); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { static float n = 0; @@ -184,7 +180,7 @@ TEST(lrn_fp32_gpu, basic3) { set_values(input, inputVals); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); uint32_t size = 5; float k = 1.f; float alpha = 9.89999971e-05f; @@ -198,7 +194,7 @@ TEST(lrn_fp32_gpu, basic3) { auto outputs = network.execute(); auto output = outputs.at("lrn").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.999792f, 1.99911f, 2.99755f, 3.99466f, 4.99f, 5.98313f, 6.97361f, 7.96102f, 8.94493f, 9.92493f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_dynamic_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_dynamic_gpu_test.cpp index 04100ead3c7..c8a037bfbf1 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_dynamic_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_dynamic_gpu_test.cpp @@ -1,34 +1,30 @@ -// Copyright (C) 2018-2021 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include -#include "api/memory.hpp" -#include "api/mutable_data.hpp" -#include "api/input_layout.hpp" -#include "api/lstm.hpp" -#include "api/lstm_dynamic.hpp" -#include "api/reorder.hpp" -#include "api_extension/lstm_dynamic_input.hpp" -#include "api_extension/lstm_dynamic_timeloop.hpp" -#include "api/topology.hpp" -#include "api/tensor.hpp" -#include "api/network.hpp" -#include "api/engine.hpp" -#include "test_utils/test_utils.h" -#include "api/data.hpp" -#include "instrumentation.h" -#include +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include + #include #include #include +#ifndef __clang__ #pragma warning( disable : 4503 ) +#endif #define MEASURE_PERF false #define MEASURE_LOOP 50 using namespace cldnn; -using namespace tests; +using namespace ::tests; namespace { float sigmoid(float x) { @@ -217,29 +213,28 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test VF ref_weights_vec = flatten_4d(cldnn::format::bfyx, ref_weights); VF ref_bias_vec = flatten_4d(cldnn::format::bfyx, ref_bias); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); VF ref_dynamic_length; for (auto& v : dynamic_lengths) ref_dynamic_length.push_back((T)v); constexpr auto dt = std::is_same::value ? data_types::f32 : data_types::f16; - memory input_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, max_sequence_len, input_size, direction } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, max_sequence_len, input_size, direction } }); set_values(input_mem, ref_input_vec); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); set_values(weights_mem, ref_weights_vec); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, batch_size, 1 } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, batch_size, 1 } }); set_values(dynamic_length_mem, ref_dynamic_length); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, direction } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, direction } }); set_values(bias_mem, ref_bias_vec); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); std::string bias_id = ""; - if (has_bias) - { + if (has_bias) { bias_id = "bias"; topology.add(data(bias_id, bias_mem)); } @@ -277,8 +272,9 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test auto outputs = network.execute(); auto out = outputs.at("dynamic_lstm_input"); - auto out_tensor = out.get_memory().get_layout().size; - auto out_ptr = out.get_memory().pointer(); + auto out_tensor = out.get_memory()->get_layout().size; + cldnn::mem_lock out_ptr(out.get_memory(), get_test_stream()); + auto output_ref = dynamic_lstm::lstm_dynamic_input_ref(ref_input, ref_weights, ref_bias, dynamic_lengths, max_sequence_len, has_bias, direction); @@ -331,31 +327,31 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test VF ref_hidden_vec = flatten_4d(cldnn::format::bfyx, ref_hidden); VF ref_cell_vec = flatten_4d(cldnn::format::bfyx, ref_cell); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); constexpr auto dt = std::is_same::value ? data_types::f32 : data_types::f16; VF ref_dynamic_length; for (auto& v : dynamic_lengths) ref_dynamic_length.push_back((T)v); - memory input_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, max_sequence_len, input_size, direction } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, max_sequence_len, input_size, direction } }); set_values(input_mem, ref_input_vec); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); set_values(weights_mem, ref_weights_vec); - memory recurrent_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); + auto recurrent_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); set_values(recurrent_mem, ref_recurrent_vec); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, batch_size, 1 } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, batch_size, 1 } }); set_values(dynamic_length_mem, ref_dynamic_length); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, direction } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, direction } }); set_values(bias_mem, ref_bias_vec); - memory initial_hidden_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); + auto initial_hidden_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); set_values(initial_hidden_mem, ref_hidden_vec); - memory initial_cell_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); + auto initial_cell_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); set_values(initial_cell_mem, ref_cell_vec); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); topology.add(data("recurrent", recurrent_mem)); @@ -381,7 +377,9 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test } std::string last_hidden_state = ""; - memory last_hidden_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); + auto last_hidden_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); + last_hidden_mem->fill(get_test_stream()); + get_test_stream().finish(); if (has_last_hidden_state) { last_hidden_state = "last_hidden_state"; @@ -389,7 +387,9 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test } std::string last_cell_state = ""; - memory last_cell_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); + auto last_cell_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, 1, hidden_size, direction } }); + last_cell_mem->fill(get_test_stream()); + get_test_stream().finish(); if (has_last_cell_state) { last_cell_state = "last_cell_state"; @@ -436,10 +436,11 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test clip_threshold, input_forget); auto real_outs = network.execute(); auto out = real_outs.at("dynamic_lstm"); - auto out_tensor = out.get_memory().get_layout().size; - auto out_ptr = out.get_memory().pointer(); - auto last_hidden_ptr = last_hidden_mem.pointer(); - auto last_cell_ptr = last_cell_mem.pointer(); + auto out_tensor = out.get_memory()->get_layout().size; + + cldnn::mem_lock out_ptr(out.get_memory(), get_test_stream()); + cldnn::mem_lock last_hidden_ptr(last_hidden_mem, get_test_stream()); + cldnn::mem_lock last_cell_ptr(last_cell_mem, get_test_stream()); size_t i = 0, i_lh = 0, i_lc = 0; for (auto b = 0; b < out_tensor.batch[0]; b++) { @@ -474,8 +475,8 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test //check optional last hidden state output if(has_last_hidden_state && len == dynamic_lengths[b] - 1) { - auto ratio = (float)ref_output_hidden[b][len][dir][x] / (float)last_hidden_ptr[i_lh++]; - EXPECT_TRUE(std::abs((1.0f - ratio) < 0.01f)) + auto ratio = (float)ref_output_hidden[b][len][dir][x] / (float)last_hidden_ptr[i_lh++]; + EXPECT_TRUE(std::abs(1.0f - ratio) < 0.01f) << "check has_last_hidden_state with ratio: " << ratio << ", " << "b:" << b << ", " << "len:" << len << ", " @@ -499,7 +500,7 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test if(has_last_cell_state && len == dynamic_lengths[b] - 1) { auto ratio = (float)ref_output_cell[b][len][dir][x] / (float)last_cell_ptr[i_lc++]; - EXPECT_TRUE(std::abs((1.0f - ratio) < 0.01f)) + EXPECT_TRUE(std::abs(1.0f - ratio) < 0.01f) << "check has_last_cell_state with ratio: " << ratio << ", " << "b:" << b << ", " << "len:" << len << ", " @@ -869,17 +870,17 @@ TEST(lstm_dynamic_negative, wrong_weights_size) { auto batch_size = 1, max_sequence_len = 10, input_size = 16, hidden_size = 32, direction = 1; auto wrong_value = 50; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::data_types dt = cldnn::data_types::f32; - memory input_mem = memory::allocate(engine, { dt, format::bfyx, { batch_size, max_sequence_len, input_size, 1 } }); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, wrong_value } }); - memory recurrent_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, batch_size, 1 } }); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx, { batch_size, max_sequence_len, input_size, 1 } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, wrong_value } }); + auto recurrent_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, batch_size, 1 } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); topology.add(data("recurrent", recurrent_mem)); topology.add(lstm_dynamic("dynamic_lstm", @@ -894,17 +895,17 @@ TEST(lstm_dynamic_negative, wrong_recurrent_size_0) { auto batch_size = 1, max_sequence_len = 10, input_size = 16, hidden_size = 32, direction = 1; auto wrong_value = 50; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::data_types dt = cldnn::data_types::f32; - memory input_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); - memory recurrent_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, wrong_value, 4 * hidden_size } }); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, batch_size, 1 } }); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + auto recurrent_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, wrong_value, 4 * hidden_size } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, batch_size, 1 } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); topology.add(data("recurrent", recurrent_mem)); topology.add(lstm_dynamic("dynamic_lstm", @@ -919,17 +920,17 @@ TEST(lstm_dynamic_negative, wrong_recurrent_size_1) { auto batch_size = 1, max_sequence_len = 10, input_size = 16, hidden_size = 32, direction = 1; auto wrong_value = 50; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::data_types dt = cldnn::data_types::f32; - memory input_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); - memory recurrent_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, wrong_value, 4 * hidden_size } }); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, batch_size, 1 } }); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + auto recurrent_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, wrong_value, 4 * hidden_size } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, batch_size, 1 } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); topology.add(data("recurrent", recurrent_mem)); topology.add(lstm_dynamic("dynamic_lstm", @@ -944,17 +945,17 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_0) { auto batch_size = 1, max_sequence_len = 10, input_size = 16, hidden_size = 32, direction = 1; auto wrong_value = 50; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::data_types dt = cldnn::data_types::f32; - memory input_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); - memory recurrent_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, wrong_value, 1 } }); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + auto recurrent_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, wrong_value, 1 } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); topology.add(data("recurrent", recurrent_mem)); topology.add(lstm_dynamic("dynamic_lstm", @@ -969,17 +970,17 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) { auto batch_size = 50, max_sequence_len = 10, input_size = 16, hidden_size = 32, direction = 1; auto wrong_value = 2; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::data_types dt = cldnn::data_types::f32; - memory input_mem = memory::allocate(engine, { dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); - memory weights_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); - memory recurrent_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); - memory dynamic_length_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, wrong_value, 1 } }); - memory bias_mem = memory::allocate(engine, { dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); + auto input_mem = engine.allocate_memory({ dt, format::bfyx,{ batch_size, max_sequence_len, input_size, 1 } }); + auto weights_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + auto recurrent_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); + auto dynamic_length_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, wrong_value, 1 } }); + auto bias_mem = engine.allocate_memory({ dt, format::bfyx,{ 1, 1, 4 * hidden_size, 1 } }); topology topology; - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("dyn_len", dynamic_length_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); + topology.add(input_layout("dyn_len", dynamic_length_mem->get_layout())); topology.add(data("weights", weights_mem)); topology.add(data("recurrent", recurrent_mem)); topology.add(lstm_dynamic("dynamic_lstm", @@ -989,5 +990,3 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) { "recurrent")); ASSERT_ANY_THROW(network network(engine, topology)); } - - diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp index 866343d9297..d9b968d205f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp @@ -3,22 +3,16 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/lstm.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include "instrumentation.h" -#include + +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include +#include #include #include @@ -28,7 +22,7 @@ #endif using namespace cldnn; -using namespace tests; +using namespace ::tests; #define FERROR 1E-4 @@ -219,10 +213,10 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, VVVVF ref_output = lstm_gemm_reference(ref_input, ref_weights, ref_recurrent, ref_bias, ref_hidden, 0, hasBias, hasHidden); constexpr auto dt = std::is_same::value ? data_types::f32 : data_types::f16; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - // If the input is of fp16 type then, the memory will be allocated as such - if (!engine.get_info().supports_fp16) + // If the input is of fp16 type then, the memory::ptr will be allocated as such + if (!engine.get_device_info().supports_fp16) { if (dt == data_types::f16) { @@ -230,11 +224,11 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, } } - memory input = memory::allocate(engine, { dt, format::bfyx, { batch_size, sequence_len, input_size, 1 } }); - memory weights = memory::allocate(engine, { dt, format::bfyx, { 1, direction, input_size, 4 * hidden_size } }); - memory recurrent = memory::allocate(engine, { dt, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } }); - memory biases = memory::allocate(engine, { dt, format::bfyx, { 1, 1, 4 * hidden_size, direction } }); - memory hidden = memory::allocate(engine, { dt, format::bfyx, { batch_size, direction, hidden_size, 1 } }); + memory::ptr input = engine.allocate_memory({ dt, format::bfyx, { batch_size, sequence_len, input_size, 1 } }); + memory::ptr weights = engine.allocate_memory({ dt, format::bfyx, { 1, direction, input_size, 4 * hidden_size } }); + memory::ptr recurrent = engine.allocate_memory({ dt, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } }); + memory::ptr biases = engine.allocate_memory({ dt, format::bfyx, { 1, 1, 4 * hidden_size, direction } }); + memory::ptr hidden = engine.allocate_memory({ dt, format::bfyx, { batch_size, direction, hidden_size, 1 } }); set_values(input, ref_input_vec); set_values(weights, ref_weights_vec); @@ -243,14 +237,14 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, set_values(hidden, ref_hidden_vec); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights", weights)); topology.add(data("recurrent", recurrent)); if (hasBias) { topology.add(data("biases", biases)); } if (hasHidden) { - topology.add(input_layout("hidden", hidden.get_layout())); + topology.add(input_layout("hidden", hidden->get_layout())); } topology.add(lstm_gemm("lstm_gemm", "input", "weights", "recurrent", hasBias ? "biases" : "", hasHidden ? "hidden" : "")); @@ -265,7 +259,7 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, EXPECT_EQ(outputs.size(), size_t(1)); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int i = 0; for (int b = 0; b < batch_size; ++b) { for (int x = 0; x < 4 * hidden_size; ++x) @@ -295,10 +289,10 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_ // to error from 1E-4 to 1E-2 constexpr float ferror = std::is_same::value ? (float)1E-4 : (float)1E-2; constexpr auto dt = std::is_same::value ? data_types::f32 : data_types::f16; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - // If the input is of fp16 type then, the memory will be allocated as such - if (!engine.get_info().supports_fp16) + // If the input is of fp16 type then, the memory::ptr will be allocated as such + if (!engine.get_device_info().supports_fp16) { if (dt == data_types::f16) { @@ -306,15 +300,15 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_ } } - memory tempGEMM = memory::allocate(engine, { dt, format::bfyx,{ batch_size, direction, 4 * hidden_size, 1 } }); - memory cell = memory::allocate(engine, { dt, format::bfyx,{ batch_size, direction, hidden_size, 1 } }); + memory::ptr tempGEMM = engine.allocate_memory({ dt, format::bfyx,{ batch_size, direction, 4 * hidden_size, 1 } }); + memory::ptr cell = engine.allocate_memory({ dt, format::bfyx,{ batch_size, direction, hidden_size, 1 } }); set_values(tempGEMM, ref_tempGEMM_vec); set_values(cell, ref_cell_vec); topology topology; - topology.add(input_layout("tempGEMM", tempGEMM.get_layout())); + topology.add(input_layout("tempGEMM", tempGEMM->get_layout())); if (hasCell) { - topology.add(input_layout("cell", cell.get_layout())); + topology.add(input_layout("cell", cell->get_layout())); } topology.add(lstm_elt("lstm_elt", "tempGEMM", hasCell ? "cell" : "", clip_threshold, input_forget)); @@ -328,7 +322,7 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_ EXPECT_EQ(outputs.size(), size_t(1)); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int b = 0; b < batch_size; ++b) { for (int j = 0; j < 2; ++j) { for (int x = 0; x < hidden_size; ++x) @@ -349,11 +343,11 @@ std::string get_string_id(size_t i) { // --------------- Manually constructed LSTM ---------------------------------------- // This function manually generates an lstm node sequence by conbining lstm_gemm and lstm_elt nodes // it requires that the output of the lstm_elt node is croped to obtain the corresponding hidden and cell outputs -void generate_lstm_topology(topology& t, memory& input, memory& hidden, memory& cell, - memory& weights, memory& recurrent, memory& biases, int sequence_len, +void generate_lstm_topology(topology& t, memory::ptr input, memory::ptr hidden, memory::ptr cell, + memory::ptr weights, memory::ptr recurrent, memory::ptr biases, int sequence_len, bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true) { - auto hidden_size = hidden.get_layout().size; - t.add(input_layout("input", input.get_layout())); + auto hidden_size = hidden->get_layout().size; + t.add(input_layout("input", input->get_layout())); std::vector> input_ids_offsets; std::vector output_ids_offsets; for (int i = 0; i < sequence_len; ++i) @@ -372,12 +366,12 @@ void generate_lstm_topology(topology& t, memory& input, memory& hidden, memory& } if (hasInitialHidden) { - t.add(input_layout("hidden", hidden.get_layout())); + t.add(input_layout("hidden", hidden->get_layout())); hiddenStr = "hidden"; } if (hasInitialCell) { - t.add(input_layout("cell", cell.get_layout())); + t.add(input_layout("cell", cell->get_layout())); cellStr = "cell"; } for (int i = 0; i < sequence_len; ++i) { @@ -423,13 +417,13 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz lstm_reference(ref_input, ref_hidden, ref_cell, ref_weights, ref_recurrent, ref_bias, ref_output, last_hidden, last_cell, hasBias, hasInitialHidden, hasInitialCell); - const auto& engine = get_test_engine(); - memory input = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ batch_size, sequence_len, input_size, 1 } }); - memory weights = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); - memory recurrent = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); - memory biases = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ 1, 1, 4 * hidden_size, direction } }); - memory hidden = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ batch_size, direction, hidden_size, 1 } }); - memory cell = memory::allocate(engine, { type_to_data_type::value, format::bfyx,{ batch_size, direction, hidden_size, 1 } }); + auto& engine = get_test_engine(); + memory::ptr input = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ batch_size, sequence_len, input_size, 1 } }); + memory::ptr weights = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, direction, input_size, 4 * hidden_size } }); + memory::ptr recurrent = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, direction, hidden_size, 4 * hidden_size } }); + memory::ptr biases = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ 1, 1, 4 * hidden_size, direction } }); + memory::ptr hidden = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ batch_size, direction, hidden_size, 1 } }); + memory::ptr cell = engine.allocate_memory({ type_to_data_type::value, format::bfyx,{ batch_size, direction, hidden_size, 1 } }); set_values(input, ref_input_vec); set_values(weights, ref_weights_vec); set_values(recurrent, ref_recurrent_vec); @@ -448,11 +442,11 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); - size_t output_size = outputs.begin()->second.get_memory().size() / sizeof(T); + size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T); ASSERT_EQ(output_size, size_t(hidden_size * sequence_len * batch_size * direction)); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int i = 0; for (int b = 0; b < batch_size; ++b) { for (int s = 0; s < sequence_len; ++s) { @@ -526,10 +520,10 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc // to error from 1E-4 to 1E-2 constexpr float ferror = std::is_same::value ? (float)1E-4 : (float)1E-2; constexpr auto dt = std::is_same::value ? data_types::f32 : data_types::f16; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - // If the input is of fp16 type then, the memory will be allocated as such - if (!engine.get_info().supports_fp16) + // If the input is of fp16 type then, the memory::ptr will be allocated as such + if (!engine.get_device_info().supports_fp16) { if (dt == data_types::f16) { @@ -537,29 +531,29 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc } } - memory input = memory::allocate(engine, { dt, format::bfyx, {batch_size, sequence_len, input_size, 1} }); + memory::ptr input = engine.allocate_memory({ dt, format::bfyx, {batch_size, sequence_len, input_size, 1} }); set_values(input, ref_input_vec); - std::vector weights; - std::vector recurrent; - std::vector biases; - std::vector hidden; - std::vector cell; + std::vector weights; + std::vector recurrent; + std::vector biases; + std::vector hidden; + std::vector cell; for(int i = 0; i < layers; ++i) { - weights.push_back(memory::allocate(engine, { dt, format::bfyx, { 1, direction, i==0 ? input_size : hidden_size, 4 * hidden_size } })); + weights.push_back(engine.allocate_memory({ dt, format::bfyx, { 1, direction, i==0 ? input_size : hidden_size, 4 * hidden_size } })); set_values(weights[i], ref_weights_vec[i]); - recurrent.push_back(memory::allocate(engine, { dt, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } })); + recurrent.push_back(engine.allocate_memory({ dt, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } })); set_values(recurrent[i], ref_recurrent_vec[i]); if (hasBias) { - biases.push_back(memory::allocate(engine, { dt, format::bfyx, { 1, 1, 4 * hidden_size, direction } })); + biases.push_back(engine.allocate_memory({ dt, format::bfyx, { 1, 1, 4 * hidden_size, direction } })); set_values(biases[i], ref_bias_vec[i]); } if (hasInitialHidden) { - hidden.push_back(memory::allocate(engine, { dt, format::bfyx, { batch_size, 1, hidden_size, direction } })); + hidden.push_back(engine.allocate_memory({ dt, format::bfyx, { batch_size, 1, hidden_size, direction } })); set_values(hidden[i], ref_hidden_vec[i]); } if (hasInitialCell) { - cell.push_back(memory::allocate(engine, { dt, format::bfyx, { batch_size, 1, hidden_size, direction} })); + cell.push_back(engine.allocate_memory({ dt, format::bfyx, { batch_size, 1, hidden_size, direction} })); set_values(cell[i], ref_cell_vec[i]); } } @@ -569,7 +563,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc std::vector lstm_inputs; std::vector output_ids_offsets; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); for (int i = 0; i < sequence_len; ++i) { input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}}); lstm_inputs.push_back("inputSplit:"+get_string_id(i)); @@ -588,8 +582,8 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc topology.add(data(weights_id, weights[i])); topology.add(data(recurrent_id, recurrent[i])); if (hasBias) topology.add(data(biases_id, biases[i])); - if (hasInitialHidden) topology.add(input_layout(hidden_id, hidden[i].get_layout())); - if (hasInitialCell) topology.add(input_layout(cell_id, cell[i].get_layout())); + if (hasInitialHidden) topology.add(input_layout(hidden_id, hidden[i]->get_layout())); + if (hasInitialCell) topology.add(input_layout(cell_id, cell[i]->get_layout())); if (i == 0) { topology.add(lstm(lstm_id, lstm_inputs, weights_id, recurrent_id, hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "", @@ -617,13 +611,13 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc auto outputs = network.execute(); { ASSERT_EQ(outputs.size(), size_t(1)); - size_t output_size = outputs.begin()->second.get_memory().size() / sizeof(T); + size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T); ASSERT_EQ(output_size, size_t(hidden_size * sequence_len * batch_size * direction)); auto output = outputs.begin()->second.get_memory(); // Get the output tensor - cldnn::layout output_layout = output.get_layout(); + cldnn::layout output_layout = output->get_layout(); cldnn::tensor output_tensor = output_layout.size; // Compare the output tensor configuration against the reference value @@ -633,7 +627,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc ASSERT_EQ(direction, output_tensor.spatial[1]); ASSERT_EQ(hidden_size, output_tensor.spatial[0]); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int32_t i = 0; for (int32_t b = 0; b < batch_size; ++b) { for (int32_t s = 0; s < sequence_len; ++s) { @@ -683,14 +677,14 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir last_hidden, last_cell, true, true, true, (T)0, false, true); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - memory input = memory::allocate(engine, { type_to_data_type::value, format::bfyx, {batch_size, sequence_len, input_size, 1} }); - memory weights = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } }); - memory recurrent = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } }); - memory biases = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } }); - memory hidden = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); - memory cell = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); + memory::ptr input = engine.allocate_memory({ type_to_data_type::value, format::bfyx, {batch_size, sequence_len, input_size, 1} }); + memory::ptr weights = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } }); + memory::ptr recurrent = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } }); + memory::ptr biases = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } }); + memory::ptr hidden = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); + memory::ptr cell = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); set_values(input, ref_input_vec); set_values(weights, ref_weights_vec); @@ -709,7 +703,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir std::vector lstm_inputs; std::vector output_ids_offsets; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); for (int i = 0; i < sequence_len; ++i) { input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}}); @@ -719,8 +713,8 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir topology.add(data("weights", weights)); topology.add(data("recurrent", recurrent)); topology.add(data("biases", biases)); - topology.add(input_layout("hidden", hidden.get_layout())); - topology.add(input_layout("cell", cell.get_layout())); + topology.add(input_layout("hidden", hidden->get_layout())); + topology.add(input_layout("cell", cell->get_layout())); topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent", "biases", "hidden", "cell", "", 0, false, { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, @@ -752,11 +746,11 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir for (auto itr = outputs.begin(); itr != outputs.end(); itr++) { - auto output_tensor = itr->second.get_memory().get_layout().size; + auto output_tensor = itr->second.get_memory()->get_layout().size; primitive_id primitive_name = itr->first; - cldnn::memory output_memory = itr->second.get_memory(); - int32_t output_size = (int32_t)(itr->second.get_memory().size() / sizeof(T)); + cldnn::memory::ptr output_memory = itr->second.get_memory(); + int32_t output_size = (int32_t)(itr->second.get_memory()->size() / sizeof(T)); cldnn::tensor ref_output_tensor; VVVVF ref_primitive_output; @@ -793,7 +787,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir ASSERT_EQ(ref_directions, output_tensor.spatial[1]); // directions should match ASSERT_EQ(ref_hidden_size, output_tensor.spatial[0]); // input size should match - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int32_t i = 0; for (int32_t b = 0; b < ref_batch_size; ++b) { @@ -846,14 +840,14 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) { last_hidden, last_cell, true, true, true, (T)0, false, true); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - memory input = memory::allocate(engine, { type_to_data_type::value,format, {batch_size, sequence_len, input_size, 1} }); - memory weights = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } }); - memory recurrent = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } }); - memory biases = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } }); - memory hidden = memory::allocate(engine, { type_to_data_type::value, format, { batch_size, 1, hidden_size, directions } }); - memory cell = memory::allocate(engine, { type_to_data_type::value, format, { batch_size, 1, hidden_size, directions } }); + memory::ptr input = engine.allocate_memory({ type_to_data_type::value,format, {batch_size, sequence_len, input_size, 1} }); + memory::ptr weights = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } }); + memory::ptr recurrent = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } }); + memory::ptr biases = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } }); + memory::ptr hidden = engine.allocate_memory({ type_to_data_type::value, format, { batch_size, 1, hidden_size, directions } }); + memory::ptr cell = engine.allocate_memory({ type_to_data_type::value, format, { batch_size, 1, hidden_size, directions } }); set_values(input, ref_input_vec); set_values(weights, ref_weights_vec); @@ -872,7 +866,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) { std::vector lstm_inputs; std::vector output_ids_offsets; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); for (int i = 0; i < sequence_len; ++i) { input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}}); @@ -882,8 +876,8 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) { topology.add(data("weights", weights)); topology.add(data("recurrent", recurrent)); topology.add(data("biases", biases)); - topology.add(input_layout("hidden", hidden.get_layout())); - topology.add(input_layout("cell", cell.get_layout())); + topology.add(input_layout("hidden", hidden->get_layout())); + topology.add(input_layout("cell", cell->get_layout())); topology.add(lstm("lstm"+get_string_id(0), lstm_inputs, "weights", "recurrent", "biases", "hidden", "cell", "", 0, false, { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, @@ -918,11 +912,11 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) { for (auto itr = outputs.begin(); itr != outputs.end(); itr++) { - auto output_tensor = itr->second.get_memory().get_layout().size; + auto output_tensor = itr->second.get_memory()->get_layout().size; primitive_id primitive_name = itr->first; - cldnn::memory output_memory = itr->second.get_memory(); - int32_t output_size = (int32_t)(itr->second.get_memory().size() / sizeof(T)); + cldnn::memory::ptr output_memory = itr->second.get_memory(); + int32_t output_size = (int32_t)(itr->second.get_memory()->size() / sizeof(T)); cldnn::tensor ref_output_tensor; VVVVF ref_primitive_output; @@ -959,7 +953,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) { ASSERT_EQ(ref_directions, output_tensor.spatial[1]); // directions should match ASSERT_EQ(ref_hidden_size, output_tensor.spatial[0]); // input size should match - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int32_t i = 0; if (format == cldnn::format::bfyx) { @@ -1025,14 +1019,14 @@ void lstm_gpu_users_test() { VVVVF last_hidden(batch_size, VVVF(1, VVF(directions, VF(hidden_size)))); VVVVF last_cell(batch_size, VVVF(1, VVF(directions, VF(hidden_size)))); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - memory input = memory::allocate(engine, { type_to_data_type::value, format::bfyx, {batch_size, sequence_len, input_size, 1} }); - memory weights = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } }); - memory recurrent = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } }); - memory biases = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } }); - memory hidden = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); - memory cell = memory::allocate(engine, { type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); + memory::ptr input = engine.allocate_memory({ type_to_data_type::value, format::bfyx, {batch_size, sequence_len, input_size, 1} }); + memory::ptr weights = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } }); + memory::ptr recurrent = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } }); + memory::ptr biases = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } }); + memory::ptr hidden = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); + memory::ptr cell = engine.allocate_memory({ type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, directions } }); set_values(input, ref_input_vec); set_values(weights, ref_weights_vec); @@ -1045,7 +1039,7 @@ void lstm_gpu_users_test() { std::vector> input_ids_offsets; std::vector lstm_inputs; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); for (int i = 0; i < sequence_len; ++i) { input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}}); @@ -1055,8 +1049,8 @@ void lstm_gpu_users_test() { topology.add(data("weights", weights)); topology.add(data("recurrent", recurrent)); topology.add(data("biases", biases)); - topology.add(input_layout("hidden", hidden.get_layout())); - topology.add(input_layout("cell", cell.get_layout())); + topology.add(input_layout("hidden", hidden->get_layout())); + topology.add(input_layout("cell", cell->get_layout())); topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent", "biases", "hidden", "cell", "", 0, false, { activation_func::logistic, activation_func::hyperbolic_tan, activation_func::hyperbolic_tan }, {}, @@ -1074,8 +1068,8 @@ void lstm_gpu_users_test() { // check if the number of returned primitives match the expected number of output primitives ASSERT_EQ(size_t(1), outputs.size()); - cldnn::memory output_memory = outputs.begin()->second.get_memory(); - auto output_ptr = output_memory.pointer(); + cldnn::memory::ptr output_memory = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); for (int32_t b = 0; b < batch_size; ++b) { for (int32_t s = 0; s < 1; ++s) { @@ -1148,31 +1142,31 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio clip_threshold, input_forget, false); } - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - memory input = memory::allocate(engine, { type_to_data_type::value, format::bfyx, {batch_size, sequence_len, input_size, 1} }); + memory::ptr input = engine.allocate_memory({ type_to_data_type::value, format::bfyx, {batch_size, sequence_len, input_size, 1} }); set_values(input, ref_input_vec); - std::vector weights; - std::vector recurrent; - std::vector biases; - std::vector hidden; - std::vector cell; + std::vector weights; + std::vector recurrent; + std::vector biases; + std::vector hidden; + std::vector cell; for (int i = 0; i < layers; ++i) { - weights.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, direction, i == 0 ? input_size : hidden_size, 4 * hidden_size } })); + weights.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, direction, i == 0 ? input_size : hidden_size, 4 * hidden_size } })); set_values(weights[i], ref_weights_vec[i]); - recurrent.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } })); + recurrent.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } })); set_values(recurrent[i], ref_recurrent_vec[i]); if (has_bias) { - biases.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, direction } })); + biases.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, { 1, 1, 4 * hidden_size, direction } })); set_values(biases[i], ref_bias_vec[i]); } if (has_initial_hidden) { - hidden.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, direction } })); + hidden.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, direction } })); set_values(hidden[i], ref_hidden_vec[i]); } if (has_initial_cell) { - cell.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, direction} })); + cell.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, { batch_size, 1, hidden_size, direction} })); set_values(cell[i], ref_cell_vec[i]); } } @@ -1182,7 +1176,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio std::vector lstm_inputs; std::vector output_ids_offsets; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); cldnn::primitive_id prev_node_id; for (int i = 0; i < layers; ++i) { @@ -1198,8 +1192,8 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio topology.add(data(weights_id, weights[i])); topology.add(data(recurrent_id, recurrent[i])); if (has_bias) topology.add(data(biases_id, biases[i])); - if (has_initial_hidden) topology.add(input_layout(hidden_id, hidden[i].get_layout())); - if (has_initial_cell) topology.add(input_layout(cell_id, cell[i].get_layout())); + if (has_initial_hidden) topology.add(input_layout(hidden_id, hidden[i]->get_layout())); + if (has_initial_cell) topology.add(input_layout(cell_id, cell[i]->get_layout())); if (i == 0) { topology.add(lstm(lstm_id, { "input" }, weights_id, recurrent_id, has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "", @@ -1232,13 +1226,13 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio auto outputs = network.execute(); { ASSERT_EQ(outputs.size(), size_t(1)); - size_t output_size = outputs.begin()->second.get_memory().size() / sizeof(T); + size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T); ASSERT_EQ(output_size, size_t(hidden_size * sequence_len * batch_size * direction)); auto output = outputs.begin()->second.get_memory(); // Get the output tensor - cldnn::layout output_layout = output.get_layout(); + cldnn::layout output_layout = output->get_layout(); cldnn::tensor output_tensor = output_layout.size; // Compare the output tensor configuration against the reference value @@ -1248,7 +1242,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio ASSERT_EQ(direction, output_tensor.spatial[1]); ASSERT_EQ(hidden_size, output_tensor.spatial[0]); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); int32_t i = 0; for (int32_t b = 0; b < batch_size; ++b) { for (int32_t s = 0; s < sequence_len; ++s) { @@ -1388,49 +1382,49 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, } } - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor = { batch_size, sequence_len, input_size, 1 }; layout layout = { type_to_data_type::value, cldnn::format::bfyx, input_tensor }; - memory input = memory::allocate(engine, layout); + memory::ptr input = engine.allocate_memory(layout); set_values(input, ref_input_vec); // 2-dim vectors to support chain and layers - std::vector> weights; - std::vector> recurrent; - std::vector> biases; - std::vector> hidden; - std::vector> cell; + std::vector> weights; + std::vector> recurrent; + std::vector> biases; + std::vector> hidden; + std::vector> cell; for (size_t chain = 0; chain < chains; chain++) { - std::vector per_chain_weights; - std::vector per_chain_recurrent; - std::vector per_chain_biases; - std::vector per_chain_hidden; - std::vector per_chain_cell; + std::vector per_chain_weights; + std::vector per_chain_recurrent; + std::vector per_chain_biases; + std::vector per_chain_hidden; + std::vector per_chain_cell; for (size_t layer = 0; layer < layers; layer++) { - per_chain_weights.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, {1, directions, layer == 0 ? input_size : hidden_size, 4 * hidden_size} })); + per_chain_weights.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, {1, directions, layer == 0 ? input_size : hidden_size, 4 * hidden_size} })); set_values(per_chain_weights[layer], ref_weights_vec[chain][layer]); - per_chain_recurrent.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, {1, directions, hidden_size, 4 * hidden_size} })); + per_chain_recurrent.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, {1, directions, hidden_size, 4 * hidden_size} })); set_values(per_chain_recurrent[layer], ref_recurrent_vec[chain][layer]); if (has_bias) { - per_chain_biases.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, {1, 1, 4 * hidden_size, directions} })); + per_chain_biases.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, {1, 1, 4 * hidden_size, directions} })); set_values(per_chain_biases[layer], ref_bias_vec[chain][layer]); } if (has_initial_hidden) { - per_chain_hidden.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, {1, 1, hidden_size, directions} })); + per_chain_hidden.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, {1, 1, hidden_size, directions} })); set_values(per_chain_hidden[layer], ref_hidden_vec[chain][layer]); } if (has_initial_cell) { - per_chain_cell.push_back(memory::allocate(engine, { type_to_data_type::value, format::bfyx, {1, 1, hidden_size, directions} })); + per_chain_cell.push_back(engine.allocate_memory({ type_to_data_type::value, format::bfyx, {1, 1, hidden_size, directions} })); set_values(per_chain_cell[layer], ref_cell_vec[chain][layer]); } } @@ -1448,7 +1442,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, std::vector lstm_inputs; std::vector output_ids_offsets; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); for (int feature = 0; feature < sequence_len; feature++) { input_ids_offsets.push_back({ get_string_id(feature), {0, feature, 0, 0} }); @@ -1498,8 +1492,8 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, if (chain == 0 && layer == 0) { - if (has_initial_hidden) topology.add(input_layout(hidden_id, hidden[chain][layer].get_layout())); - if (has_initial_cell) topology.add(input_layout(cell_id, cell[chain][layer].get_layout())); + if (has_initial_hidden) topology.add(input_layout(hidden_id, hidden[chain][layer]->get_layout())); + if (has_initial_cell) topology.add(input_layout(cell_id, cell[chain][layer]->get_layout())); } // Get the initial hidden and initial cell for each layer for each chain link @@ -1577,7 +1571,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, auto outputs = network.execute(); for (auto itr = outputs.begin(); itr != outputs.end(); itr++) { - auto output_tensor = itr->second.get_memory().get_layout().size; + auto output_tensor = itr->second.get_memory()->get_layout().size; primitive_id primitive_name = itr->first; // Split the primitive id to get the chain id @@ -1589,8 +1583,8 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, size_t chain_id = stoi(chain_str); size_t layer_id = stoi(layer_str); - cldnn::memory output_memory = itr->second.get_memory(); - int32_t output_size = (int32_t)(itr->second.get_memory().size() / sizeof(T)); + cldnn::memory::ptr output_memory = itr->second.get_memory(); + int32_t output_size = (int32_t)(itr->second.get_memory()->size() / sizeof(T)); cldnn::tensor ref_output_tensor; VVVVF ref_primitive_output; @@ -1628,7 +1622,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, ASSERT_EQ(ref_directions, output_tensor.spatial[1]); // directions should match ASSERT_EQ(ref_hidden_size, output_tensor.spatial[0]); // input size should match - auto output_ptr = output_memory.pointer(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int32_t i = 0; for (int32_t b = 0; b < ref_batch_size; ++b) { @@ -2049,4 +2043,3 @@ TEST(lstm_gpu, generic_lstm_stacked_bi_f16) { // integration testing using multi-layer and chained LSTMs // LSTMs single input // optional activation list - diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp index 1fbdba583d7..0cb3c2cb828 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp @@ -3,22 +3,18 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/max_unpooling.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include -#include -#include "test_utils/float16.h" + +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(max_unpooling_gpu, basic_in2x3x2x2) { // Input : 2x2x2x1 @@ -45,10 +41,10 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2) { // f1: b0: 0 0 0 b1: 0 0 0 // f1: b0: 0 8 16 b1: 12 0 17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 4.0f, 4.0f, @@ -65,7 +61,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("arg_max", arg_max)); topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); @@ -76,8 +72,8 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2) { auto outputs = network.execute(); auto output = outputs.at("max_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -127,10 +123,10 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) { // f1: b0: 0 0 0 b1: 0 0 0 // f1: b0: 0 8 16 b1: 12 0 17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 4.0f, 4.0f, @@ -147,7 +143,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("arg_max", arg_max)); topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0))); @@ -158,8 +154,8 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) { auto outputs = network.execute(); auto output = outputs.at("max_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -218,10 +214,10 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) { // f1: b0: 0 0 0 b1: 0 0 0 // f1: b0: 0 8 16 b1: 12 0 17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 4.0f, 4.0f, @@ -238,7 +234,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("arg_max", arg_max)); topology.add(max_unpooling("max_unpooling", "input", "arg_max", {2, 2, 3, 2})); @@ -249,8 +245,8 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) { auto outputs = network.execute(); auto output = outputs.at("max_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -299,10 +295,10 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) { // f1: b0: 0 0 0 b1: 0 0 0 // f1: b0: 0 8 16 b1: 12 0 17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 2, 2, 1 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 2, 1 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { FLOAT16(4.0f), FLOAT16(4.0f), @@ -319,7 +315,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("arg_max", arg_max)); topology.add(max_unpooling("max_unpooling", "input", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); @@ -330,8 +326,8 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) { auto outputs = network.execute(); auto output = outputs.at("max_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -379,10 +375,10 @@ TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) { // f1: b0: 0 0 0 b1: 0 0 0 // f1: b0: 0 8 16 b1: 12 0 17 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 1.0f, 2.0f, -10.f, @@ -396,7 +392,7 @@ TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mutable_data("arg_max", arg_max)); topology.add(pooling("pooling_max_with_argmax", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); topology.add(max_unpooling("max_unpooling", "pooling_max_with_argmax", "arg_max", { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); @@ -408,9 +404,9 @@ TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) { auto outputs = network.execute(); auto output = outputs.at("max_unpooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); - auto argmax_ptr = arg_max.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); + cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 2); @@ -443,4 +439,4 @@ TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) { for (size_t i = 0; i < expected_argmax_vec.size(); ++i) { EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]); } -} \ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp index d303700be2f..861cd2672cd 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp @@ -4,24 +4,19 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include "test_utils/test_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; #if 0 TEST(memory_tests, DISABLED_execution_loop) @@ -31,12 +26,12 @@ TEST(memory_tests, DISABLED_execution_loop) memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1000, 1000 } }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), activation("out", "in", activation_func::linear) }; network net(eng, tpl); - + while (true) { net.set_input_data("in", in); @@ -51,7 +46,7 @@ TEST(memory_tests, DISABLED_network_creation_loop) memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1, 1, 1000, 1000 } }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), activation("out", "in", activation_func::linear) }; @@ -62,17 +57,19 @@ TEST(memory_tests, DISABLED_network_creation_loop) } #endif TEST(memory_pool, basic_non_padded_relu_pipe) { - // 5 relu's of size 1x4x1x1 - const cldnn::engine engine;// here we need new engine + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); auto batch_num = 1; auto feature_num = 4; auto x_size = 1; auto y_size = 1; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(activation("relu1", "relu", activation_func::relu)); topology.add(activation("relu2", "relu1", activation_func::relu)); @@ -85,27 +82,27 @@ TEST(memory_pool, basic_non_padded_relu_pipe) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + network network(*engine, topology, bo); network.set_input_data("input", input); auto outputs = network.execute(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 64); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 64); } TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) { - // uncomment this line to disable memory pool - /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false }; - engine engine{ cfg };*/ - const cldnn::engine engine;// here we need new engine + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); auto batch_num = 1; auto feature_num = 4; auto x_size = 4; auto y_size = 4; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(activation("relu1", "relu", activation_func::relu)); topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 })); @@ -117,11 +114,11 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + network network(*engine, topology, bo); network.set_input_data("input", input); auto outputs = network.execute(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)896); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)896); } TEST(memory_pool, multi_outputs_network) { @@ -130,19 +127,16 @@ TEST(memory_pool, multi_outputs_network) { // -- relu2 -- relu3 -- relu5--relu6--relu7 // neither of relu5, relu6 nor relu7 can share resource with relu4. - // uncomment this line to disable memory pool - /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false }; - engine engine{ cfg };*/ - const cldnn::engine engine;// here we need new engine + auto engine = create_test_engine(); auto batch_num = 1; auto feature_num = 4; auto x_size = 4; auto y_size = 4; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(activation("relu1", "relu", activation_func::relu)); topology.add(activation("relu2", "input", activation_func::relu)); @@ -155,11 +149,11 @@ TEST(memory_pool, multi_outputs_network) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + network network(*engine, topology, bo); network.set_input_data("input", input); auto outputs = network.execute(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1536); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)1536); } TEST(memory_pool, oooq) { @@ -168,17 +162,19 @@ TEST(memory_pool, oooq) { -- relu3 -- relu5 --------- neither of relu5, relu6 nor relu7 can share resource with relu4. */ - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); auto batch_num = 1; auto feature_num = 4; auto x_size = 4; auto y_size = 4; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu1", "input", activation_func::relu)); topology.add(activation("relu2", "input", activation_func::relu)); topology.add(activation("relu3", "input", activation_func::relu)); @@ -191,11 +187,11 @@ TEST(memory_pool, oooq) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); + network network(*engine, topology, bo); network.set_input_data("input", input); auto outputs = network.execute(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2560); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 2560); } TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { @@ -204,14 +200,16 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { -- relu3 -- relu5 --------- neither of relu5, relu6 nor relu7 can share resource with relu4. */ - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); auto batch_num = 1; auto feature_num = 4; auto inp_x_size = 4; auto inp_y_size = 4; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); set_values(input, { 1.0f, 2.5f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 6.1f, 4.7f, 1.0f, 1.0f, 8.2f, 1.0f, 2.0f, 1.0f, @@ -221,7 +219,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu1", "input", activation_func::relu)); topology.add(activation("relu2", "input", activation_func::sqrt)); topology.add(activation("relu3", "input", activation_func::square)); @@ -234,25 +232,25 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network_first(engine, topology, bo); + network network_first(*engine, topology, bo); network_first.set_input_data("input", input); auto outputs = network_first.execute(); auto output_memory_first = outputs.at("relu6").get_memory(); - auto output_layout_first = output_memory_first.get_layout(); - auto output_ptr_first = output_memory_first.pointer(); + auto output_layout_first = output_memory_first->get_layout(); + cldnn::mem_lock output_ptr_first(output_memory_first, get_test_stream()); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2560); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 2560); - network network_second(engine, topology, bo); + network network_second(*engine, topology, bo); network_second.set_input_data("input", input); auto outputs_second = network_second.execute(); auto output_memory_second = outputs_second.at("relu6").get_memory(); - auto output_layout_second = output_memory_second.get_layout(); - auto output_ptr_second = output_memory_second.pointer(); + auto output_layout_second = output_memory_second->get_layout(); + cldnn::mem_lock output_ptr_second(output_memory_second, get_test_stream()); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3328); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 3328); EXPECT_EQ(output_layout_first, output_layout_second); @@ -279,16 +277,17 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { } TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { - - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); auto batch_num = 1; auto feature_num = 3; auto inp_x_size = 4; auto inp_y_size = 4; - auto input= memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); - auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + auto input= engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); + auto weights = engine->allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); std::vector dummy_input_data_1 = { /*f0 xy*/ 0.8f, 0.65f, 0.1f, 1.0f, 1.0f, 0.5f, 0.11f, 0.33f, 0.66f, 0.11f, 0.22f, 0.33f, 0.99f, 0.8f, 0.7f, 0.5f, @@ -300,7 +299,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }), softmax("softmax", "conv")); @@ -308,31 +307,31 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network_first(engine, topology, bo); + network network_first(*engine, topology, bo); network_first.set_input_data("input", input); auto outputs = network_first.execute(); uint64_t cl_mem_result = 824; uint64_t usm_result = 1208; // USM have a higher peak, since transfering memory to device adds temporay memory bytes allocated. Old memory is deallocated quickly, but max peak is higher. - auto is_correct = engine.get_max_used_device_memory_size() == cl_mem_result - || engine.get_max_used_device_memory_size() == usm_result; + auto is_correct = engine->get_max_used_device_memory() == cl_mem_result + || engine->get_max_used_device_memory() == usm_result; EXPECT_TRUE(is_correct) << "Memory max peak is not correct"; auto output_memory_first = outputs.at("softmax").get_memory(); - auto output_layout_first = output_memory_first.get_layout(); - auto output_ptr_first = output_memory_first.pointer(); + auto output_layout_first = output_memory_first->get_layout(); + cldnn::mem_lock output_ptr_first(output_memory_first, get_test_stream()); - network network_second(engine, topology, bo); + network network_second(*engine, topology, bo); network_second.set_input_data("input", input); auto outputs_second = network_second.execute(); auto output_memory_second = outputs_second.at("softmax").get_memory(); - auto output_layout_second = output_memory_second.get_layout(); - auto output_ptr_second = output_memory_second.pointer(); + auto output_layout_second = output_memory_second->get_layout(); + cldnn::mem_lock output_ptr_second(output_memory_second, get_test_stream()); cl_mem_result = 1224; usm_result = 1992; // USM have a higher peak, since transfering memory to device adds temporay memory bytes allocated. Old memory is deallocated quickly, but max peak is higher. - is_correct = engine.get_max_used_device_memory_size() == cl_mem_result - || engine.get_max_used_device_memory_size() == usm_result; + is_correct = engine->get_max_used_device_memory() == cl_mem_result + || engine->get_max_used_device_memory() == usm_result; EXPECT_TRUE(is_correct) << "Memory max peak is not correct"; EXPECT_EQ(output_layout_first, output_layout_second); @@ -359,9 +358,10 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { } TEST(memory_pool, shared_mem_pool_diff_batches) { - - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); auto batch_8 = 8; auto batch_1 = 1; auto feature_num = 3; @@ -371,9 +371,9 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { auto fmt = format::bfyx; layout lay_batch_1 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) }}; layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }}; - auto input_1 = memory::allocate(engine, lay_batch_1); - auto input_8 = memory::allocate(engine, lay_batch_8); - auto weights = memory::allocate(engine, { dt, fmt, { 1, 1, 3, 2 } }); + auto input_1 = engine->allocate_memory(lay_batch_1); + auto input_8 = engine->allocate_memory(lay_batch_8); + auto weights = engine->allocate_memory({ dt, fmt, { 1, 1, 3, 2 } }); std::vector dummy_input_data_1 = generate_random_1d(batch_1*feature_num*inp_x_size*inp_y_size, 0, 1); std::vector dummy_input_data_8 = generate_random_1d(batch_8*feature_num*inp_x_size*inp_y_size, 0, 1); @@ -383,7 +383,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f }); topology topo( - input_layout("input", input_8.get_layout()), + input_layout("input", input_8->get_layout()), data("weights", weights), convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }), softmax("softmax", "conv")); @@ -391,25 +391,27 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network_first(engine, topo, bo); + network network_first(*engine, topo, bo); network_first.set_input_data("input", input_8); auto outputs = network_first.execute(); - auto dev_info = engine.get_info(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928); + auto dev_info = engine->get_device_info(); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928); - topo.change_input_layout("input", input_1.get_layout());//change input layout to batch=1 + topo.change_input_layout("input", input_1->get_layout());//change input layout to batch=1 - network network_second(engine, topo, bo); + network network_second(*engine, topo, bo); network_second.set_input_data("input", input_1); auto outputs_second = network_second.execute(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928); } TEST(memory_pool, shared_dep_two_output) { + // We need a new engine here to get correct get_max_used_device_memory() result + // If we reuse common engine, then max memory value will be taken from some previously executed tests + // as it's tracked within engine instance + auto engine = create_test_engine(); - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; auto batch_1 = 1; auto feature_num = 1; auto inp_x_size = 4; @@ -417,7 +419,7 @@ TEST(memory_pool, shared_dep_two_output) { auto dt = data_types::f32; auto fmt = format::bfyx; layout lay_batch_1 = { dt, fmt,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) } }; - auto input_1 = memory::allocate(engine, lay_batch_1); + auto input_1 = engine->allocate_memory(lay_batch_1); set_random_values(input_1); //build primitives @@ -445,21 +447,19 @@ TEST(memory_pool, shared_dep_two_output) { build_options bo; bo.set_option(build_option::optimize_data(true)); - network network(engine, topo, bo); + network network(*engine, topo, bo); auto outputs = network.execute(); - EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)256); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)256); } TEST(memory_pool, non_opt_intermidate_opt_after) { - - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; + auto& engine = get_test_engine(); auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 }); auto input_layout2 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 }); - auto input_memory1 = cldnn::memory::allocate(engine, input_layout1); - auto input_memory2 = cldnn::memory::allocate(engine, input_layout2); - auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 })); + auto input_memory1 = engine.allocate_memory(input_layout1); + auto input_memory2 = engine.allocate_memory(input_layout2); + auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 })); auto data_memory = cldnn::data("scale_mem", scale_memory); set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f }); @@ -496,20 +496,19 @@ TEST(memory_pool, non_opt_intermidate_opt_after) { auto out1 = outputs.at("elt1"); auto out2 = outputs.at("elt2"); - auto out1_ptr = out1.get_memory().pointer(); - auto out2_ptr = out2.get_memory().pointer(); + cldnn::mem_lock out1_ptr(out1.get_memory(), get_test_stream()); + cldnn::mem_lock out2_ptr(out2.get_memory(), get_test_stream()); EXPECT_EQ(out1_ptr[0], 1.0f); EXPECT_EQ(out2_ptr[0], 2.0f); } TEST(memory_pool, add_mem_dep_test) { + auto& engine = get_test_engine(); - engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ }; - engine engine{ cfg }; auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 }); - auto input_memory1 = cldnn::memory::allocate(engine, input_layout1); - auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 })); + auto input_memory1 = engine.allocate_memory(input_layout1); + auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 })); auto data_memory = cldnn::data("scale_mem", scale_memory); set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f, @@ -545,8 +544,8 @@ TEST(memory_pool, add_mem_dep_test) { auto out1 = outputs.at("out3"); auto out2 = outputs.at("out4"); - auto out1_ptr = out1.get_memory().pointer(); - auto out2_ptr = out2.get_memory().pointer(); + cldnn::mem_lock out1_ptr(out1.get_memory(), get_test_stream()); + cldnn::mem_lock out2_ptr(out2.get_memory(), get_test_stream()); EXPECT_EQ(out1_ptr[0], 1.0f); EXPECT_EQ(out1_ptr[1], 2.0f); EXPECT_EQ(out1_ptr[2], 3.0f); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp index 410fc6cad02..5632a19cfd4 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp @@ -4,26 +4,22 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include "api/mvn.hpp" -#include "api/reorder.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include "float16.h" #include "test_utils.h" +#include +#include +#include + +#include + using namespace cldnn; +using namespace ::tests; class mvn_gpu_test : public ::testing::TestWithParam {}; template -void mvn_compute_mean_across_channels(cldnn::memory& output, bool normalize_variance) { - auto output_size = output.get_layout().size; +void mvn_compute_mean_across_channels(cldnn::memory::ptr output, bool normalize_variance) { + auto output_size = output->get_layout().size; uint32_t batch_size = output_size.batch[0]; uint32_t feature_size = output_size.feature[0]; @@ -31,9 +27,9 @@ void mvn_compute_mean_across_channels(cldnn::memory& output, bool normalize_vari uint32_t y_size = output_size.spatial[1]; uint32_t x_size = output_size.spatial[0]; - auto buff = output.pointer(); + cldnn::mem_lock buff(output, get_test_stream()); - float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F; + float err_margin = output->get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F; for (uint32_t b = 0; b < batch_size; ++b) { float sum = 0.f; @@ -43,7 +39,7 @@ void mvn_compute_mean_across_channels(cldnn::memory& output, bool normalize_vari for (uint32_t y = 0; y < y_size; ++y) { for (uint32_t x = 0; x < x_size; ++x) { auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0)); - size_t data_index = output.get_layout().get_linear_offset(index_tensor); + size_t data_index = output->get_layout().get_linear_offset(index_tensor); float data = static_cast(buff[data_index]); sum += data; if (normalize_variance) @@ -65,8 +61,8 @@ void mvn_compute_mean_across_channels(cldnn::memory& output, bool normalize_vari } template -void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_variance) { - auto output_size = output.get_layout().size; +void mvn_compute_mean_within_channels(cldnn::memory::ptr output, bool normalize_variance) { + auto output_size = output->get_layout().size; uint32_t batch_size = output_size.batch[0]; uint32_t feature_size = output_size.feature[0]; @@ -74,9 +70,9 @@ void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_vari uint32_t y_size = output_size.spatial[1]; uint32_t x_size = output_size.spatial[0]; - auto buff = output.pointer(); + cldnn::mem_lock buff(output, get_test_stream()); - float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F; + float err_margin = output->get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F; for (uint32_t b = 0; b < batch_size; ++b) { for (uint32_t f = 0; f < feature_size; ++f) { @@ -86,7 +82,7 @@ void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_vari for (uint32_t y = 0; y < y_size; ++y) { for (uint32_t x = 0; x < x_size; ++x) { auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0)); - size_t data_index = output.get_layout().get_linear_offset(index_tensor); + size_t data_index = output->get_layout().get_linear_offset(index_tensor); float data = static_cast(buff[data_index]); sum += data; if (normalize_variance) @@ -110,16 +106,16 @@ void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_vari TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) { // mvn across channels fp32 test with normalize_variance set to false using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, false, true)); network network(engine, topology); @@ -139,14 +135,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx) { using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, true, true)); network network(engine, topology); @@ -164,16 +160,16 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx) { TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_outside_sqrt_fp16) { // mvn across channels fp16 test with normalize_variance set to false using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, false, true)); network network(engine, topology); @@ -193,14 +189,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16) { using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, true, true)); network network(engine, topology); @@ -218,16 +214,16 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16) { TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance) { // mvn across channels fp32 test with normalize_variance set to true using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, false, true)); network network(engine, topology); @@ -247,14 +243,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_normalize_variance) using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, true, true)); network network(engine, topology); @@ -274,14 +270,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, false, true)); network network(engine, topology); @@ -299,16 +295,16 @@ TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_normalize_variance_fp16) { // mvn across channels fp16 test with normalize_variance set to true using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, true, true)); network network(engine, topology); @@ -328,14 +324,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx) { using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, false, false)); network network(engine, topology); @@ -353,16 +349,16 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx) { TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt__bfyx) { // mvn within channels fp32 test with normalize_variance set to false using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, true, false)); network network(engine, topology); @@ -380,16 +376,16 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt__bfyx) { TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_fp16) { // mvn within channels fp16 test with normalize_variance set to false using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, false, false)); network network(engine, topology); @@ -409,14 +405,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_fp16) { using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", false, 1e-10f, true, false)); network network(engine, topology); @@ -434,16 +430,16 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_fp16) { TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance) { // mvn within channels fp32 test with normalize_variance set to true using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, false, false)); network network(engine, topology); @@ -463,14 +459,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_normalize_variance) using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, true, false)); network network(engine, topology); @@ -490,14 +486,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance using namespace cldnn; using namespace tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, false, false)); network network(engine, topology); @@ -515,16 +511,16 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_normalize_variance_fp16) { // mvn within channels fp16 test with normalize_variance set to true using namespace cldnn; - using namespace tests; + using namespace ::tests; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", "input", true, 1e-10f, true, false)); network network(engine, topology); @@ -551,16 +547,16 @@ struct mvn_basic_test_params { struct mvn_random_test : ::testing::TestWithParam { template - void fill_data(memory& mem, const tests::VVVVVF& data) { - auto size = mem.get_layout().size; - auto ptr = mem.pointer(); + void fill_data(memory::ptr mem, const tests::VVVVVF& data) { + auto size = mem->get_layout().size; + cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t bi = 0; bi < static_cast(size.batch[0]); ++bi) { for (size_t fi = 0; fi < static_cast(size.feature[0]); ++fi) { for (size_t zi = 0; zi < static_cast(size.spatial[2]); ++zi) { for (size_t yi = 0; yi < static_cast(size.spatial[1]); ++yi) { for (size_t xi = 0; xi < static_cast(size.spatial[0]); ++xi) { auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0)); - auto offset = mem.get_layout().get_linear_offset(tensor_addr); + auto offset = mem->get_layout().get_linear_offset(tensor_addr); ptr[offset] = data[bi][fi][xi][yi][zi]; } } @@ -570,8 +566,8 @@ struct mvn_random_test : ::testing::TestWithParam { } template - void fill_random_data(memory& mem, int min, int max, int k = 8) { - auto size = mem.get_layout().size; + void fill_random_data(memory::ptr mem, int min, int max, int k = 8) { + auto size = mem->get_layout().size; auto input_data = tests::generate_random_5d(size.batch[0], size.feature[0], size.spatial[0], @@ -583,14 +579,14 @@ struct mvn_random_test : ::testing::TestWithParam { fill_data(mem, input_data); } - void check_result(memory& output, bool across_channels, bool normalize_variance) { - if (output.get_layout().data_type == data_types::f32) { + void check_result(memory::ptr output, bool across_channels, bool normalize_variance) { + if (output->get_layout().data_type == data_types::f32) { if (across_channels) { mvn_compute_mean_across_channels(output, normalize_variance); } else { mvn_compute_mean_within_channels(output, normalize_variance); } - } else if (output.get_layout().data_type == data_types::f16) { + } else if (output->get_layout().data_type == data_types::f16) { if (across_channels) { mvn_compute_mean_across_channels(output, normalize_variance); } else { @@ -599,11 +595,11 @@ struct mvn_random_test : ::testing::TestWithParam { } } - void execute(const mvn_basic_test_params& params, const engine& eng) { + void execute(const mvn_basic_test_params& params, engine& eng) { auto& size = params.input_size; auto& output_pad = params.output_pad; - auto input = memory::allocate(eng, {params.input_type, params.input_format, size}); + auto input = eng.allocate_memory({params.input_type, params.input_format, size}); switch (params.input_type) { case data_types::f32: @@ -623,7 +619,7 @@ struct mvn_random_test : ::testing::TestWithParam { } topology topo; - topo.add(input_layout("input", input.get_layout())); + topo.add(input_layout("input", input->get_layout())); auto prim = mvn("mvn", "input", params.normalize_variance, 1e-10f, false, params.across_channels); prim.output_padding = output_pad; topo.add(prim); @@ -642,7 +638,7 @@ struct mvn_random_test : ::testing::TestWithParam { }; TEST_P(mvn_random_test, random) { - auto eng = tests::get_test_engine(); + auto& eng = tests::get_test_engine(); this->execute(GetParam(), eng); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp index 554e6486086..7ca881a7b27 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/non_max_suppression_test.cpp @@ -4,17 +4,14 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include - #include "test_utils.h" -#include "api/topology.hpp" -#include "api/network.hpp" -#include "api/input_layout.hpp" -#include "api/non_max_suppression.hpp" -#include "api/data.hpp" +#include +#include +#include using namespace cldnn; +using namespace ::tests; template struct non_max_suppression_basic : public testing::Test { @@ -73,14 +70,14 @@ struct non_max_suppression_basic : public testing::Test { const layout boxes_layout = layout(type_to_data_type::value, format::bfyx, tensor(batch(batch_size), feature(boxes_num), spatial(1, 4))); const layout scores_layout = layout(type_to_data_type::value, format::bfyx, tensor(batch(batch_size), feature(classes_num), spatial(1, boxes_num))); - memory get_boxes_memory(engine& engine) { - auto mem = memory::allocate(engine, boxes_layout); + memory::ptr get_boxes_memory(engine& engine) { + auto mem = engine.allocate_memory(boxes_layout); tests::set_values(mem, boxes_data); return mem; } - memory get_scores_memory(engine& engine) { - auto mem = memory::allocate(engine, scores_layout); + memory::ptr get_scores_memory(engine& engine) { + auto mem = engine.allocate_memory(scores_layout); tests::set_values(mem, scores_data); return mem; } @@ -92,7 +89,7 @@ using nms_types = testing::Types; TYPED_TEST_CASE(non_max_suppression_basic, nms_types); TYPED_TEST(non_max_suppression_basic, basic) { - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); topology topo; topo.add(input_layout("boxes", this->boxes_layout)); @@ -122,7 +119,7 @@ TYPED_TEST(non_max_suppression_basic, basic) { }; auto out_mem = result.at("nms").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected_out.size(), out_ptr.size()); for (size_t i = 0; i < expected_out.size(); ++i) { @@ -131,9 +128,9 @@ TYPED_TEST(non_max_suppression_basic, basic) { } TYPED_TEST(non_max_suppression_basic, num_per_class) { - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); - auto num_per_class_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(num_per_class_mem, { 1.f }); topology topo; @@ -165,7 +162,7 @@ TYPED_TEST(non_max_suppression_basic, num_per_class) { }; auto out_mem = result.at("nms").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected_out.size(), out_ptr.size()); for (size_t i = 0; i < expected_out.size(); ++i) { @@ -174,11 +171,11 @@ TYPED_TEST(non_max_suppression_basic, num_per_class) { } TYPED_TEST(non_max_suppression_basic, iou_threshold) { - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); - auto num_per_class_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(num_per_class_mem, { 3.f }); - auto iou_threshold_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(iou_threshold_mem, { 0.4f }); topology topo; @@ -211,7 +208,7 @@ TYPED_TEST(non_max_suppression_basic, iou_threshold) { }; auto out_mem = result.at("nms").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected_out.size(), out_ptr.size()); for (size_t i = 0; i < expected_out.size(); ++i) { @@ -220,13 +217,13 @@ TYPED_TEST(non_max_suppression_basic, iou_threshold) { } TYPED_TEST(non_max_suppression_basic, score_threshold) { - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); - auto num_per_class_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(num_per_class_mem, { 3.f }); - auto iou_threshold_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(iou_threshold_mem, { 0.4f }); - auto score_threshold_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto score_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(score_threshold_mem, { 0.4f }); topology topo; @@ -260,7 +257,7 @@ TYPED_TEST(non_max_suppression_basic, score_threshold) { }; auto out_mem = result.at("nms").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected_out.size(), out_ptr.size()); for (size_t i = 0; i < expected_out.size(); ++i) { @@ -269,15 +266,15 @@ TYPED_TEST(non_max_suppression_basic, score_threshold) { } TYPED_TEST(non_max_suppression_basic, soft_nms_sigma) { - auto engine = tests::get_test_engine(); + auto& engine = tests::get_test_engine(); - auto num_per_class_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto num_per_class_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(num_per_class_mem, { 3.f }); - auto iou_threshold_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto iou_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(iou_threshold_mem, { 0.4f }); - auto score_threshold_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto score_threshold_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(score_threshold_mem, { 0.4f }); - auto soft_nms_sigma_mem = memory::allocate(engine, layout(data_types::f32, format::bfyx, tensor(batch(1)))); + auto soft_nms_sigma_mem = engine.allocate_memory(layout(data_types::f32, format::bfyx, tensor(batch(1)))); tests::set_values(soft_nms_sigma_mem, { 0.5f }); topology topo; @@ -312,7 +309,7 @@ TYPED_TEST(non_max_suppression_basic, soft_nms_sigma) { }; auto out_mem = result.at("nms").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected_out.size(), out_ptr.size()); for (size_t i = 0; i < expected_out.size(); ++i) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/normalizel2_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/normalizel2_gpu_test.cpp index 00a400cce57..a96185c7d8f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/normalizel2_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/normalizel2_gpu_test.cpp @@ -3,15 +3,12 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include +#include "test_utils.h" + +#include +#include +#include #include #include @@ -23,15 +20,15 @@ TEST(normalizel2_f32_gpu, basic) { // Input : 1x2x3x3 // Output : 1x2x3x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const unsigned b = 1; const unsigned f = 2; const unsigned y = 3; const unsigned x = 3; - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {b, f, y, x}}); - auto weights = memory::allocate(engine, {data_types::f32, format::bfyx, {1, f, 1, 1}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {b, f, y, x}}); + auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}}); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -47,7 +44,7 @@ TEST(normalizel2_f32_gpu, basic) { set_values(weights, weightVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(data("Input1", weights)); topology.add(normalize("normalizel2", "Input0", "Input1", false)); @@ -58,7 +55,7 @@ TEST(normalizel2_f32_gpu, basic) { auto outputs = network.execute(); auto output = outputs.at("normalizel2").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = {0.f, 0.0995037f, @@ -88,15 +85,15 @@ TEST(normalizel2_f32_gpu, basic2) { // Input : 1x2x3x3 // Output : 1x2x3x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const unsigned b = 1; const unsigned f = 2; const unsigned y = 3; const unsigned x = 3; - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {b, f, y, x}}); - auto weights = memory::allocate(engine, {data_types::f32, format::bfyx, {1, f, 1, 1}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {b, f, y, x}}); + auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}}); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -112,7 +109,7 @@ TEST(normalizel2_f32_gpu, basic2) { set_values(weights, weightVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(data("Input1", weights)); topology.add(normalize("normalizel2", "Input0", "Input1", true)); @@ -123,7 +120,7 @@ TEST(normalizel2_f32_gpu, basic2) { auto outputs = network.execute(); auto output = outputs.at("normalizel2").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = {0.f, 0.0236691f, @@ -153,15 +150,15 @@ TEST(normalizel2_int8_gpu, basic) { // Input : 1x2x3x3 // Output : 1x2x3x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const unsigned b = 1; const unsigned f = 2; const unsigned y = 3; const unsigned x = 3; - auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {b, f, y, x}}); - auto weights = memory::allocate(engine, {data_types::f32, format::bfyx, {1, f, 1, 1}}); + auto input = engine.allocate_memory({data_types::i8, format::bfyx, {b, f, y, x}}); + auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}}); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -177,7 +174,7 @@ TEST(normalizel2_int8_gpu, basic) { set_values(weights, weightVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(data("Input1", weights)); topology.add(normalize("normalizel2", "Input0", "Input1", false)); @@ -188,7 +185,7 @@ TEST(normalizel2_int8_gpu, basic) { auto outputs = network.execute(); auto output = outputs.at("normalizel2").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = {0.f, 0.0995037f, @@ -218,15 +215,15 @@ TEST(normalizel2_int8_gpu, basic2) { // Input : 1x2x3x3 // Output : 1x2x3x3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const unsigned b = 1; const unsigned f = 2; const unsigned y = 3; const unsigned x = 3; - auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {b, f, y, x}}); - auto weights = memory::allocate(engine, {data_types::f32, format::bfyx, {1, f, 1, 1}}); + auto input = engine.allocate_memory({data_types::i8, format::bfyx, {b, f, y, x}}); + auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}}); std::vector inputVals(b * f * y * x); std::generate(inputVals.begin(), inputVals.end(), []() { @@ -242,7 +239,7 @@ TEST(normalizel2_int8_gpu, basic2) { set_values(weights, weightVals); topology topology; - topology.add(input_layout("Input0", input.get_layout())); + topology.add(input_layout("Input0", input->get_layout())); topology.add(data("Input1", weights)); topology.add(normalize("normalizel2", "Input0", "Input1", true)); @@ -253,7 +250,7 @@ TEST(normalizel2_int8_gpu, basic2) { auto outputs = network.execute(); auto output = outputs.at("normalizel2").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = {0.f, 0.0236691f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp index cb510e4a685..08bb8690be0 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp @@ -3,17 +3,11 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include "test_utils/test_utils.h" -#include "test_utils/uniform_quantized_real_distribution.hpp" +#include +#include #include @@ -83,13 +77,13 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp VVVVF input_rnd = generate_random_4d(input_b, input_f, input_y, input_x, min_random, max_random); VF input_rnd_vec = flatten_4d(test_input_fmt, input_rnd); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(input_b, input_f, input_x, input_y); - auto input = memory::allocate(engine, { type_to_data_type::value, test_input_fmt, input_tensor }); + auto input = engine.allocate_memory({ type_to_data_type::value, test_input_fmt, input_tensor }); set_values(input, input_rnd_vec); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output", "input", shape, one_hot_axis)); network network(engine, topology); @@ -99,8 +93,8 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); VVVVF output_cpu = one_hot_cpu(input_rnd, one_hot_axis, one_hot_limit, input_padding_y, input_padding_x, output_padding_y, output_padding_x); EXPECT_EQ(output_layout.format.value, test_input_fmt.value); @@ -166,11 +160,11 @@ TEST(one_hot_gpu_i32, bfzyx_ax4) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -182,8 +176,8 @@ TEST(one_hot_gpu_i32, bfzyx_ax4) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -225,11 +219,11 @@ TEST(one_hot_gpu_i64, bfzyx_ax4) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i64, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -241,8 +235,8 @@ TEST(one_hot_gpu_i64, bfzyx_ax4) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -284,11 +278,11 @@ TEST(one_hot_gpu_i32_to_f32, bfyx_ax4) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, data_types::f32, one_hot_axis)); set_values(input, input_rnd_vec); @@ -300,8 +294,8 @@ TEST(one_hot_gpu_i32_to_f32, bfyx_ax4) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -338,11 +332,11 @@ TEST(one_hot_gpu_i64_to_f32, bfyx_ax4) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i64, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, data_types::f32, one_hot_axis)); set_values(input, input_rnd_vec); @@ -354,8 +348,8 @@ TEST(one_hot_gpu_i64_to_f32, bfyx_ax4) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -389,11 +383,11 @@ TEST(one_hot_gpu_i32, bfzyx_ax0) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -405,8 +399,8 @@ TEST(one_hot_gpu_i32, bfzyx_ax0) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -444,11 +438,11 @@ TEST(one_hot_gpu_i64, bfzyx_ax0) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i64, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -460,8 +454,8 @@ TEST(one_hot_gpu_i64, bfzyx_ax0) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -499,11 +493,11 @@ TEST(one_hot_gpu_i32, bfzyx_ax1) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -515,8 +509,8 @@ TEST(one_hot_gpu_i32, bfzyx_ax1) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -554,11 +548,11 @@ TEST(one_hot_gpu_i64, bfzyx_ax1) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i64, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -570,8 +564,8 @@ TEST(one_hot_gpu_i64, bfzyx_ax1) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -609,11 +603,11 @@ TEST(one_hot_gpu_i32, bfzyx_ax2) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -625,8 +619,8 @@ TEST(one_hot_gpu_i32, bfzyx_ax2) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -664,11 +658,11 @@ TEST(one_hot_gpu_i64, bfzyx_ax2) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i64, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -680,8 +674,8 @@ TEST(one_hot_gpu_i64, bfzyx_ax2) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -719,11 +713,11 @@ TEST(one_hot_gpu_i32, bfzyx_ax3) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -735,8 +729,8 @@ TEST(one_hot_gpu_i32, bfzyx_ax3) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -774,11 +768,11 @@ TEST(one_hot_gpu_i64, bfzyx_ax3) { VF input_rnd_vec = {0, 1}; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor input_tensor(in_b, in_f, in_x, in_y); - auto input = memory::allocate(engine, { data_types::i64, format::bfyx, input_tensor }); + auto input = engine.allocate_memory({ data_types::i64, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output","input", shape, one_hot_axis)); set_values(input, input_rnd_vec); @@ -790,8 +784,8 @@ TEST(one_hot_gpu_i64, bfzyx_ax3) { EXPECT_EQ(outputs.begin()->first, "output"); auto output_memory = outputs.at("output").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); tensor output_tensor = output_layout.get_buffer_size(); int z_size = output_tensor.spatial[2]; @@ -818,11 +812,11 @@ TEST(one_hot_gpu_i64, bfzyx_ax3) { } TEST(one_hot_error, basic_error_wrong_axis) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx,{ 1, 1, 1, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output", "input", tensor(1, 1, 1, 50), 5)); std::string msg_to_find = "Incorrect parameters configuration: one_hot_axis should be less or equal to 4."; @@ -830,11 +824,11 @@ TEST(one_hot_error, basic_error_wrong_axis) { } TEST(one_hot_error, basic_error_bad_shape) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::i32, format::bfyx,{ 1, 1, 1, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output", "input", tensor(1, 5, 1, 50), 2)); std::string msg_to_find = "Incorrect parameters configuration: shape does not fit input size."; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp index d22a5bf8077..7fb709d9845 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp @@ -3,31 +3,28 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/permute.hpp" -#include "api/reorder.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include -#include + +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include +#include + #include -#include #include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace testing; TEST(permute_gpu_f32, output_ordering_test) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector> input_tensors = { @@ -60,9 +57,9 @@ TEST(permute_gpu_f32, output_ordering_test) for (auto const& perm : permutations) { - auto input = memory::allocate(engine, { data_types::f32, fr, tensor(inp_t) }); + auto input = engine.allocate_memory({ data_types::f32, fr, tensor(inp_t) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", perm)); network network(engine, topology); @@ -72,7 +69,7 @@ TEST(permute_gpu_f32, output_ordering_test) auto output_mem = output.get_memory(); EXPECT_EQ(outputs.size(), size_t(1)); auto ref_tensor = get_permutation(inp_t, perm); - auto out_tensor = output_mem.get_layout().size; + auto out_tensor = output_mem->get_layout().size; EXPECT_EQ(out_tensor.batch[0], ref_tensor[0]); EXPECT_EQ(out_tensor.feature[0], ref_tensor[1]); EXPECT_EQ(out_tensor.spatial[0], ref_tensor[2]); @@ -95,9 +92,9 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3) // // Output = input - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); std::vector values = { @@ -117,7 +114,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3) set_values(input, values); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 1, 2, 3 })); network network(engine, topology); @@ -129,7 +126,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3) auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 24; i++) { EXPECT_FLOAT_EQ(values[i], output_ptr[i]); @@ -157,9 +154,9 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2) // f1: b0: -15 -15 b1: -15 -15 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); set_values(input, { 1.0f, 2.0f, -15.f, @@ -176,7 +173,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 1, 3, 2 })); network network(engine, topology); @@ -206,7 +203,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2) -15.0f, -15.0f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 24; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -216,14 +213,14 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2) TEST(permute_gpu_f32, basic_yxfb_permute_1_0_2_3) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 100, 64, 1 } }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 100, 64, 1 } }); tests::set_random_values(input_mem); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), permute("permute", "input", { 1, 0, 2, 3 })); network network(engine, topology); @@ -235,8 +232,8 @@ TEST(permute_gpu_f32, basic_yxfb_permute_1_0_2_3) auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); - auto input_ptr = input_mem.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock input_ptr(input_mem, get_test_stream()); for (int i = 0; i < 6400; i++) { EXPECT_FLOAT_EQ(input_ptr[i], output_ptr[i]); @@ -265,9 +262,9 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding) // f1: b0: -15 -15 b1: -15 -15 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); set_values(input, { 1.0f, 2.0f, -15.f, @@ -284,8 +281,8 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding) }); topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), permute("permute", "reorder", { 0, 1, 3, 2 })); network network(engine, topology); @@ -315,7 +312,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding) -15.0f, -15.0f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 24; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -329,9 +326,9 @@ TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature) // Permute order : { 1, 0, 2, 3 } // Output : yxfb:2x8x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 8, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 8, 2, 1, 1 } }); set_values(input, { //b0 - b7 for f=0 @@ -342,7 +339,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 1, 0, 2, 3 })); network network(engine, topology); @@ -353,7 +350,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature) EXPECT_EQ(outputs.begin()->first, "permute"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 2); EXPECT_EQ(out_tensor.feature[0], 8); EXPECT_EQ(out_tensor.spatial[0], 1); @@ -370,7 +367,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature) 5.2f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -384,9 +381,9 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature) // Permute order : { 1, 0, 2, 3 } // Output : yxfb:2x8x1x1 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 8, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 8, 1, 1 } }); set_values(input, { //f0 - f7 for b=0 @@ -397,7 +394,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 1, 0, 2, 3 })); network network(engine, topology); @@ -408,7 +405,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature) EXPECT_EQ(outputs.begin()->first, "permute"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 8); EXPECT_EQ(out_tensor.feature[0], 2); EXPECT_EQ(out_tensor.spatial[0], 1); @@ -425,7 +422,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature) 5.2f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -436,9 +433,9 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature) template void permute_test_with_reorder() { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); set_values(input, { 1.0f, 2.0f, -15.f, @@ -455,7 +452,7 @@ void permute_test_with_reorder() }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", { DType, format::bfyx,{ 2, 2, 3, 2 } }), permute("permute", "reorder", { 0, 1, 3, 2 }), reorder("reorder_out", "permute", { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } })); @@ -487,7 +484,7 @@ void permute_test_with_reorder() -15.0f, -15.0f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 24; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -512,8 +509,8 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) // Permute1 order : {0, 3, 1, 2} // Permute2 order : {0, 2, 3, 1} - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 8, 1, 16}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 8, 1, 16}}); std::vector values = { 0.0f, 1.0f, 2.0f, 3.0f, @@ -553,7 +550,7 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) set_values(input, values); // unfused topology topology_unfused( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder1", "input", format::b_fs_yx_fsv4, data_types::f32), permute("permute", "reorder1", { 0, 3, 1, 2}), reorder("reorder2", "permute", format::bfyx, data_types::f32), @@ -568,7 +565,7 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) // fused network topology topology_fused( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder1", "input", format::b_fs_yx_fsv4, data_types::f32), permute("permute", "reorder1", { 0, 3, 1, 2}), reorder("reorder2", "permute", format::bfyx, data_types::f32), // to be fused to previous permute @@ -582,11 +579,11 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) auto outputs_fused = fused.execute(); auto outputs_unfused = unfused.execute(); auto output_fused = outputs_fused.begin()->second.get_memory(); - auto output_fused_ptr = output_fused.pointer(); + cldnn::mem_lock output_fused_ptr(output_fused, get_test_stream()); auto output_unfused = outputs_unfused.begin()->second.get_memory(); - auto output_unfused_ptr = output_unfused.pointer(); - EXPECT_EQ(output_fused.get_layout().format, cldnn::format::bfyx); - EXPECT_EQ(output_unfused.get_layout().format, cldnn::format::bfyx); + cldnn::mem_lock output_unfused_ptr(output_unfused, get_test_stream()); + EXPECT_EQ(output_fused->get_layout().format, cldnn::format::bfyx); + EXPECT_EQ(output_unfused->get_layout().format, cldnn::format::bfyx); EXPECT_EQ(fused.get_executed_primitives().size(), 4); EXPECT_EQ(unfused.get_executed_primitives().size(), 5); @@ -599,13 +596,13 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) TEST(fc_permute_crop_gpu, basic_permute_yxfb) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 512 } }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 5, 1, 512 } }); //Topolgy creates permute which "repalces" the batch with the feature. topology topology( - input_layout("input", input_mem.get_layout()), // yxfb {1, 5, 1, 512 }} + input_layout("input", input_mem->get_layout()), // yxfb {1, 5, 1, 512 }} permute("permute", "input", { 1, 0, 2, 3 }) // yxfb {5, 1, 1, 512} --- without permute fix yxfb {1, 5, 512, 1} ); @@ -617,25 +614,25 @@ TEST(fc_permute_crop_gpu, basic_permute_yxfb) EXPECT_EQ(outputs.begin()->first, "permute"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 5); EXPECT_EQ(out_tensor.feature[0], 1); EXPECT_EQ(out_tensor.spatial[0], 1); EXPECT_EQ(out_tensor.spatial[1], 512); - EXPECT_EQ(output.get_layout().format, cldnn::format::yxfb); + EXPECT_EQ(output->get_layout().format, cldnn::format::yxfb); } TEST(fc_permute_crop_gpu, basic_0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 5, 11264, 1, 1 } }); - auto weights_mem = memory::allocate(engine, { data_types::f32, format::yxio,{ 512, 11264, 1, 1 } }); - auto bias_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 512, 1 } }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::yxfb,{ 5, 11264, 1, 1 } }); + auto weights_mem = engine.allocate_memory({ data_types::f32, format::yxio,{ 512, 11264, 1, 1 } }); + auto bias_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 512, 1 } }); topology topology( - input_layout("input", input_mem.get_layout()), // bfyx {5, 11264, 1, 1}} + input_layout("input", input_mem->get_layout()), // bfyx {5, 11264, 1, 1}} data("weights", weights_mem), data("bias", bias_mem), fully_connected("fully_connected", "input", "weights", "bias"), // yxfb {5, 512, 1, 1} @@ -652,25 +649,25 @@ TEST(fc_permute_crop_gpu, basic_0) EXPECT_EQ(outputs.begin()->first, "crop"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 1); EXPECT_EQ(out_tensor.feature[0], 1); EXPECT_EQ(out_tensor.spatial[0], 1); EXPECT_EQ(out_tensor.spatial[1], 512); - EXPECT_EQ(output.get_layout().format, cldnn::format::yxfb); + EXPECT_EQ(output->get_layout().format, cldnn::format::yxfb); } TEST(fc_permute_gpu, basic_permute_bfyx) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 5, 1, 256 } }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 5, 1, 256 } }); tests::set_random_values(input_mem); //Topolgy creates permute which "repalces" the batch with the feature. topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), permute("permute", "input", { 1, 0, 2, 3 }) ); @@ -682,15 +679,15 @@ TEST(fc_permute_gpu, basic_permute_bfyx) EXPECT_EQ(outputs.begin()->first, "permute"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 5); EXPECT_EQ(out_tensor.feature[0], 1); EXPECT_EQ(out_tensor.spatial[0], 1); EXPECT_EQ(out_tensor.spatial[1], 256); - EXPECT_EQ(output.get_layout().format, cldnn::format::bfyx); + EXPECT_EQ(output->get_layout().format, cldnn::format::bfyx); - auto input_ptr = input_mem.pointer(); - auto output_ptr = output.pointer(); + cldnn::mem_lock input_ptr(input_mem, get_test_stream()); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 5 * 256; i++) EXPECT_NEAR(input_ptr[i], output_ptr[i], 1e-3f); @@ -698,7 +695,7 @@ TEST(fc_permute_gpu, basic_permute_bfyx) TEST(permute_gpu_f32, permute_bfwzyx) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int b = 1; const int f = 2; const int x = 3; @@ -708,8 +705,8 @@ TEST(permute_gpu_f32, permute_bfwzyx) std::vector permute_order = { 1, 0, 5, 4, 3, 2 }; auto input_size = cldnn::tensor(batch(b), feature(f), spatial(x, y, z, w)); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfwzyx, input_size }); - auto input_data = generate_random_1d(input_mem.get_layout().count(), -1, 1); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfwzyx, input_size }); + auto input_data = generate_random_1d(input_mem->get_layout().count(), -1, 1); set_values(input_mem, input_data); @@ -726,11 +723,11 @@ TEST(permute_gpu_f32, permute_bfwzyx) auto in_index = cldnn::tensor(batch(bi), feature(fi), spatial(xi, yi, zi, wi)); auto out_index = cldnn::tensor(batch(fi), feature(bi), spatial(wi, zi, yi, xi)); expected_output[expected_layout.get_linear_offset(out_index)] = - input_data[input_mem.get_layout().get_linear_offset(in_index)]; + input_data[input_mem->get_layout().get_linear_offset(in_index)]; } topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), permute("permute", "input", permute_order) ); @@ -742,16 +739,16 @@ TEST(permute_gpu_f32, permute_bfwzyx) EXPECT_EQ(outputs.begin()->first, "permute"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 2); EXPECT_EQ(out_tensor.feature[0], 1); EXPECT_EQ(out_tensor.spatial[0], 6); EXPECT_EQ(out_tensor.spatial[1], 5); EXPECT_EQ(out_tensor.spatial[2], 4); EXPECT_EQ(out_tensor.spatial[3], 3); - EXPECT_EQ(output.get_layout().format, cldnn::format::bfwzyx); + EXPECT_EQ(output->get_layout().format, cldnn::format::bfwzyx); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < output_ptr.size(); ++i) { @@ -787,7 +784,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape) // 0 2 0 2 0 2 0 2 // 1 3 1 3 1 3 1 3 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int b = 1; const int f = 4; const int x = 2; @@ -800,7 +797,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape) std::vector permute_order = { 0, 1, 5, 4, 2, 3 }; auto input_size = cldnn::tensor(batch(b), feature(f), spatial(x, y)); - auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx, input_size }); + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, input_size }); std::vector input_data = { 0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 1.f, 1.f, @@ -818,7 +815,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape) set_values(input_mem, input_data); topology topology( - input_layout("input", input_mem.get_layout()), + input_layout("input", input_mem->get_layout()), reorder("input_6d", "input", { data_types::f32, format::bfwzyx, cldnn::tensor(batch(b), feature(f), spatial(x, y)) }), reshape("reshape_4_to_6", "input_6d", cldnn::tensor(batch(b), feature(f_reshape), spatial(x, y, z_reshape, w_reshape))), permute("permute", "reshape_4_to_6", permute_order), @@ -835,7 +832,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape) auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < output_ptr.size(); ++i) { @@ -847,9 +844,9 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1) // Input : bfzyx:2x2x2x2x3 // Permute order : { 0,2,3,4,1 } - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 3, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 2, 3, 2, 2 } }); set_values(input, { 1.0f, 2.0f, -15.f, //B0, F0, // z0 y0 x-3 @@ -874,7 +871,7 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 2, 3, 4, 1 })); network network(engine, topology); @@ -885,14 +882,14 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1) EXPECT_EQ(outputs.begin()->first, "permute"); auto output = outputs.begin()->second.get_memory(); - auto out_tensor = output.get_layout().size; + auto out_tensor = output->get_layout().size; EXPECT_EQ(out_tensor.batch[0], 2); EXPECT_EQ(out_tensor.feature[0], 3); EXPECT_EQ(out_tensor.spatial[0], 2); EXPECT_EQ(out_tensor.spatial[1], 2); EXPECT_EQ(out_tensor.spatial[2], 2); - EXPECT_EQ(output.get_layout().format, cldnn::format::bfzyx); + EXPECT_EQ(output->get_layout().format, cldnn::format::bfzyx); float answers[48] = { 1.0f, 3.0f, 2.0f, 4.0f, @@ -909,7 +906,7 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1) -15.0f, -15.0f, -15.0f, -15.0f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 48; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -929,9 +926,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_3_1_2) { constexpr size_t array_size = 256; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 8, 8, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 8, 8, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -941,7 +938,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_3_1_2) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 3, 1, 2 })); network network(engine, topology); @@ -972,7 +969,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_3_1_2) { 142.f, 158.f, 174.f, 190.f, 206.f, 222.f, 238.f, 254.f, 143.f, 159.f, 175.f, 191.f, 207.f, 223.f, 239.f, 255.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -985,9 +982,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_3_1_2) { constexpr size_t array_size = 160; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 5, 8, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 5, 8, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -997,7 +994,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_3_1_2) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 3, 1, 2 })); network network(engine, topology); @@ -1022,7 +1019,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_3_1_2) { 156.f, 93.f, 109.f, 125.f, 141.f, 157.f, 94.f, 110.f, 126.f, 142.f, 158.f, 95.f, 111.f, 127.f, 143.f, 159.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1035,9 +1032,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_3_1_2) { constexpr size_t array_size = 160; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 8, 5, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 8, 5, 2 } }); set_values(input, { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, @@ -1053,7 +1050,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_3_1_2) { }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 3, 1, 2 })); network network(engine, topology); @@ -1078,7 +1075,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_3_1_2) { 88.f, 98.f, 108.f, 118.f, 128.f, 138.f, 148.f, 158.f, 89.f, 99.f, 109.f, 119.f, 129.f, 139.f, 149.f, 159.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1091,9 +1088,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_3_1_2) { constexpr size_t array_size = 100; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 5, 5, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 5, 5, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1103,7 +1100,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_3_1_2) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 3, 1, 2 })); network network(engine, topology); @@ -1128,7 +1125,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_3_1_2) { 58.f, 68.f, 78.f, 88.f, 98.f, 59.f, 69.f, 79.f, 89.f, 99.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1141,9 +1138,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_4_1_2_3) { constexpr size_t array_size = 512; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 8, 8, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 8, 8, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1153,7 +1150,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 4, 1, 2, 3 })); network network(engine, topology); @@ -1190,7 +1187,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_4_1_2_3) { 287.f, 319.f, 351.f, 383.f, 415.f, 447.f, 479.f, 511.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1203,9 +1200,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_4_1_2_3) { constexpr size_t array_size = 320; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 5, 8, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 5, 8, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1215,7 +1212,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 4, 1, 2, 3 })); network network(engine, topology); @@ -1244,7 +1241,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_4_1_2_3) { 254.f, 286.f, 318.f, 191.f, 223.f, 255.f, 287.f, 319.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1257,9 +1254,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_4_1_2_3) { constexpr size_t array_size = 320; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 8, 5, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 8, 5, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1269,7 +1266,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 4, 1, 2, 3 })); network network(engine, topology); @@ -1298,7 +1295,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_4_1_2_3) { 179.f, 199.f, 219.f, 239.f, 259.f, 279.f, 299.f, 319.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1311,9 +1308,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_4_1_2_3) { constexpr size_t array_size = 200; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 5, 5, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 5, 5, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1323,7 +1320,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 4, 1, 2, 3 })); network network(engine, topology); @@ -1352,7 +1349,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_4_1_2_3) { 119.f, 139.f, 159.f, 179.f, 199.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1365,9 +1362,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_5_4_1_2_3) { constexpr size_t array_size = 1024; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx,{ 2, 8, 8, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx,{ 2, 8, 8, 2, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1377,7 +1374,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_5_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 5, 1, 2, 3, 4 })); network network(engine, topology); @@ -1424,7 +1421,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_5_4_1_2_3) { 572.f, 636.f, 700.f, 764.f, 828.f, 892.f, 956.f, 1020.f, 573.f, 637.f, 701.f, 765.f, 829.f, 893.f, 957.f, 1021.f, 574.f, 638.f, 702.f, 766.f, 830.f, 894.f, 958.f, 1022.f, 575.f, 639.f, 703.f, 767.f, 831.f, 895.f, 959.f, 1023.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1437,9 +1434,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_5_4_1_2_3) { constexpr size_t array_size = 640; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx,{ 2, 5, 8, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx,{ 2, 5, 8, 2, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1449,7 +1446,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_5_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 5, 1, 2, 3, 4 })); network network(engine, topology); @@ -1484,7 +1481,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_5_4_1_2_3) { 569.f, 633.f, 378.f, 442.f, 506.f, 570.f, 634.f, 379.f, 443.f, 507.f, 571.f, 635.f, 380.f, 444.f, 508.f, 572.f, 636.f, 381.f, 445.f, 509.f, 573.f, 637.f, 382.f, 446.f, 510.f, 574.f, 638.f, 383.f, 447.f, 511.f, 575.f, 639.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1497,9 +1494,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_5_4_1_2_3) { constexpr size_t array_size = 640; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx,{ 2, 8, 5, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx,{ 2, 8, 5, 2, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1509,7 +1506,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_5_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 5, 1, 2, 3, 4 })); network network(engine, topology); @@ -1544,7 +1541,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_5_4_1_2_3) { 356.f, 396.f, 436.f, 476.f, 516.f, 556.f, 596.f, 636.f, 357.f, 397.f, 437.f, 477.f, 517.f, 557.f, 597.f, 637.f, 358.f, 398.f, 438.f, 478.f, 518.f, 558.f, 598.f, 638.f, 359.f, 399.f, 439.f, 479.f, 519.f, 559.f, 599.f, 639.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1557,9 +1554,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_5_4_1_2_3) { constexpr size_t array_size = 400; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx,{ 2, 5, 5, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx,{ 2, 5, 5, 2, 2, 2 } }); std::vector input_data; input_data.reserve(array_size); @@ -1569,7 +1566,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_5_4_1_2_3) { set_values(input, input_data); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), permute("permute", "input", { 0, 5, 1, 2, 3, 4 })); network network(engine, topology); @@ -1604,7 +1601,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_5_4_1_2_3) { 236.f, 276.f, 316.f, 356.f, 396.f, 237.f, 277.f, 317.f, 357.f, 397.f, 238.f, 278.f, 318.f, 358.f, 398.f, 239.f, 279.f, 319.f, 359.f, 399.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < array_size; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1618,7 +1615,7 @@ struct TiledPermuteParam { class TiledPermuteTest : public ::testing::TestWithParam { public: - const cldnn::engine engine; + cldnn::engine& engine = get_test_engine(); TiledPermuteTest(): engine(get_test_engine()) { } template @@ -1627,7 +1624,7 @@ public: } template - void set_random_values(const cldnn::memory& mem) const { + void set_random_values(const cldnn::memory::ptr mem) const { tests::set_random_values(mem); } @@ -1647,11 +1644,11 @@ void TiledPermuteTest::compare_value(FLOAT16 a, FLOAT16 b) const { } template<> -void TiledPermuteTest::set_random_values(const cldnn::memory& mem) const { +void TiledPermuteTest::set_random_values(const cldnn::memory::ptr mem) const { // tests::set_random_values() is not supported std::mt19937 gen; static std::uniform_int_distribution uid(std::numeric_limits::min(), std::numeric_limits::max()); - auto ptr = mem.pointer(); + cldnn::mem_lock ptr(mem, get_test_stream()); for (auto it = ptr.begin(); it != ptr.end(); ++it) { *it = static_cast(uid(gen)); } @@ -1680,11 +1677,11 @@ void TiledPermuteTest::run_test(const std::vector& si order.push_back(i); } - auto input = memory::allocate(engine, {Data_Type, format, tensor}); + auto input = engine.allocate_memory({Data_Type, format, tensor}); set_random_values(input); topology topology_ref = topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", {Data_Type, format_fsv, tensor}), permute("output", "reorder", order ) ); @@ -1698,7 +1695,7 @@ void TiledPermuteTest::run_test(const std::vector& si network_ref.set_input_data("input", input); auto outputs_ref = network_ref.execute(); auto output_ref = outputs_ref.begin()->second.get_memory(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); // run with permute_tile_8x8_4x4_fsv16 cldnn::build_options options_tile; @@ -1709,10 +1706,10 @@ void TiledPermuteTest::run_test(const std::vector& si network_tile.set_input_data("input", input); auto outputs_tile = network_tile.execute(); auto output_tile = outputs_tile.begin()->second.get_memory(); - auto output_tile_ptr = output_tile.pointer(); + cldnn::mem_lock output_tile_ptr(output_tile, get_test_stream()); // compare results - const size_t output_size= output_ref.get_layout().get_linear_size(); + const size_t output_size= output_ref->get_layout().get_linear_size(); for (size_t i = 0; i < output_size; i++) { compare_value(output_ref_ptr[i], output_tile_ptr[i]); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp index a1dc98c8b03..009490ac681 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp @@ -3,21 +3,17 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/pooling.hpp" -#include "api/mutable_data.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include "api/reorder.hpp" -#include -#include "test_utils/float16.h" + +#include "test_utils.h" + +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; namespace cldnn { template <> @@ -223,12 +219,12 @@ TEST(pooling_forward_gpu, basic_max_byxf_f32_wsiz3x3_wstr1x1_i1x3x3x8_nopad) { // Expected output: // [ 8.0, 0.0, 0.0, 4,0, 0,5, -0.5, -0.5, -0.5 ] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 8, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 8, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,3,3 }, { 1,1,1,1 })); network network(engine, topology); set_values(input_prim, { 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, @@ -248,7 +244,7 @@ TEST(pooling_forward_gpu, basic_max_byxf_f32_wsiz3x3_wstr1x1_i1x3x3x8_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(4.0f, output_ptr[3]); } @@ -268,12 +264,12 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) { // Expected output: // [ 2.0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,3,3 }, { 1,1,1,1 })); network network(engine, topology); @@ -286,7 +282,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.0f, output_ptr[0]); } @@ -306,12 +302,12 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) { // Expected output: // [ 2.0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max)); network network(engine, topology); @@ -324,7 +320,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.0f, output_ptr[0]); } @@ -336,12 +332,12 @@ TEST(pooling_forward_gpu, basic_max_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { // Global pooling: true // Padding: none - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max)); network network(engine, topology); @@ -365,7 +361,7 @@ TEST(pooling_forward_gpu, basic_max_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); std::vector answers = { 8, 44, 8, 81, 64, 8, 12, 66, 14, 8, 99, 64, 8, 11, 18, 21 }; @@ -382,12 +378,12 @@ TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { // Global pooling: true // Padding: none - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::i8, format::b_fs_yx_fsv16, { 1, 16, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::average)); network network(engine, topology); @@ -411,7 +407,7 @@ TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); std::vector answers = { 29, 199, 241, 63, 85, 85, 213, 64, 85, 85, 21, 64, 142, 227, 8, 65, @@ -428,7 +424,7 @@ TEST(pooling_forward_gpu, basic_avg_b_fs_yx_fsv16_i8_global_i3x3x1x1_nopad) { TEST(pooling_forward_gpu, basic_max_pooling_int8) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); layout in_layout = { type_to_data_type::value,format::byxf,{ 1,1,3,3 } }; layout out_layout = { type_to_data_type::value,format::byxf,{ 1,1,1,1 } }; layout byte_layout = { type_to_data_type::value, format::bfyx,{ 1,1,3,3 } }; @@ -436,7 +432,7 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) { std::list final_results = { 10.0f }; // Allocate memory for input image. - auto input_memory = memory::allocate(engine, in_layout); + auto input_memory = engine.allocate_memory(in_layout); set_values(input_memory, input_f); // Create input_layout description @@ -464,7 +460,7 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) { auto outputs = network.execute(); auto interm = outputs.at("reorder2").get_memory(); - auto interm_ptr = interm.pointer(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); unsigned int cntr = 0; for (const auto& exp : final_results) { @@ -474,7 +470,7 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) { TEST(pooling_forward_gpu, basic_avg_pooling_int8) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); layout in_layout = { type_to_data_type::value,format::byxf,{ 1,1,3,3 } }; layout out_layout = { type_to_data_type::value,format::byxf,{ 1,1,1,1 } }; layout byte_layout = { type_to_data_type::value, format::bfyx,{ 1,1,3,3 } }; @@ -488,7 +484,7 @@ TEST(pooling_forward_gpu, basic_avg_pooling_int8) { } final_result /= input_f.size(); // Allocate memory for input image. - auto input_memory = memory::allocate(engine, in_layout); + auto input_memory = engine.allocate_memory(in_layout); set_values(input_memory, input_f); // Create input_layout description @@ -516,7 +512,7 @@ TEST(pooling_forward_gpu, basic_avg_pooling_int8) { auto outputs = network.execute(); auto interm = outputs.at("reorder2").get_memory(); - auto interm_ptr = interm.pointer(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); EXPECT_EQ(final_result, interm_ptr[0]); } @@ -537,12 +533,12 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { // [ 2.0, 1.5] // [ 2.0, 1.5] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,1,1 })); network network(engine, topology); @@ -555,7 +551,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.0f, output_ptr[0]); EXPECT_EQ(1.5f, output_ptr[1]); @@ -581,12 +577,12 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr2x2_i4x4x1x1_nopad) { // [ 2.0, 0.5] // [ 0.5, 0.5] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 })); network network(engine, topology); @@ -599,7 +595,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr2x2_i4x4x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(2.0f, output_ptr[0]); EXPECT_EQ(0.5f, output_ptr[1]); @@ -635,12 +631,12 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x2x2_nopad) { // [ 0.5, 1.0] [ 1.0, 0.5] // [-0.5, 1.5] [ 1.0, 0.0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,1,1 })); network network(engine, topology); @@ -653,7 +649,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x2x2_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(1.0f, output_ptr[0]); EXPECT_EQ(0.0f, output_ptr[2]); EXPECT_EQ(0.5f, output_ptr[4]); EXPECT_EQ(1.5f, output_ptr[6]); @@ -685,12 +681,12 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) // [ 1.5, -0.5] // [ -1, 0.5] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0, 0, -1,-1 })); network network(engine, topology); @@ -703,7 +699,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ( 1.5f, output_ptr[0]); EXPECT_EQ(-0.5f, output_ptr[1]); EXPECT_EQ(-1.0f, output_ptr[2]); @@ -730,12 +726,12 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) // [ 1.5, -0.5] // [ 1, -0.5] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 })); network network(engine, topology); @@ -753,13 +749,13 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 4); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 4); - auto output_ptr = output_prim.pointer(); - EXPECT_EQ(1.5f, get_value(output_ptr, 0)); - EXPECT_EQ(-0.5f, get_value(output_ptr, 1)); - EXPECT_EQ(1.0f, get_value(output_ptr, 2)); - EXPECT_EQ(-0.5f, get_value(output_ptr, 3)); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); + EXPECT_EQ(1.5f, output_ptr[0]); + EXPECT_EQ(-0.5f, output_ptr[1]); + EXPECT_EQ(1.0f, output_ptr[2]); + EXPECT_EQ(-0.5f, output_ptr[3]); } TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { @@ -779,12 +775,12 @@ TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { // [ 1.0, 0.625] // [ 1.625, 0.875] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::average,{ 1,1,2,2 },{ 1,1,1,1 })); network network(engine, topology); @@ -797,7 +793,7 @@ TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(1.0f, output_ptr[0]); EXPECT_EQ(0.625f, output_ptr[1]); @@ -824,12 +820,12 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) // [ 0.375, -0.125] // [ -0.25, 0.125] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 })); network network(engine, topology); @@ -842,7 +838,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(0.375f, output_ptr[0]); EXPECT_EQ(-0.125f, output_ptr[1]); EXPECT_EQ(-0.25f, output_ptr[2]); @@ -869,12 +865,12 @@ TEST(pooling_forward_gpu, offsets_avg_bfyx_f32_wsiz3x3_wstr3x3_i1x1x3x3_zeropad) // [ 0.177777, -0.133333] // [ 0.333333, 0.55] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,3,3 }, { 1,1,3,3 }, { 0,0,-1,-1 })); network network(engine, topology); @@ -890,7 +886,7 @@ TEST(pooling_forward_gpu, offsets_avg_bfyx_f32_wsiz3x3_wstr3x3_i1x1x3x3_zeropad) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_NEAR(output_ptr[0], 0.177777f, 1e-05F); EXPECT_NEAR(output_ptr[1], -0.133333f, 1e-05F); @@ -917,12 +913,12 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) // [ 0.375, 0.5] // [ -0.125, -1.125] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 })); network network(engine, topology); @@ -934,9 +930,9 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 4); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 4); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); EXPECT_EQ(0.375f, output_ptr[0]); EXPECT_EQ(0.5f, output_ptr[1]); EXPECT_EQ(-0.125f, output_ptr[2]); @@ -966,7 +962,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out // [0, 0, 0, 0, 0, 0] // [0, 0, 0, 0, 0, 0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector formats_to_test = { format::yxfb , format::bfyx }; for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) @@ -974,10 +970,10 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out std::cout << "Testing format: " << format::order(*it) << std::endl; tensor input_tensor( 1, 1, 2, 2 ); - auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32, *it, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,2,2 }, 0 })); network network(engine, topology); @@ -998,7 +994,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], output_ptr[i]); } @@ -1027,7 +1023,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out // [0, 1, -0.5, 0, 0] // [0, 0, 0, 0, 0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector formats_to_test = { format::yxfb , format::bfyx }; for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) @@ -1035,10 +1031,10 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out std::cout << "Testing format: " << format::order(*it) << std::endl; tensor input_tensor( 1, 1, 3, 3 ); - auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32, *it, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 })); network network(engine, topology); @@ -1063,10 +1059,10 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 4); - EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 16); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 4); + EXPECT_EQ((int)output_prim->get_layout().get_buffer_size().count(), 16); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], output_ptr[i]); } @@ -1097,7 +1093,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp // [0, 0, 0, 0, 0, 0] // [0, 0, 0, 0, 0, 0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector formats_to_test = { format::yxfb , format::bfyx }; for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) @@ -1105,11 +1101,11 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp std::cout << "Testing format: " << format::order(*it) << std::endl; tensor input_tensor( 1, 1, 2, 2 ); - auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32, *it, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); - topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding(padding{ {0,0,1,2}, 0 }))); + topology.add(input_layout("input_prim", input_prim->get_layout())); + topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 }))); topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,2,2 }, 0 })); network network(engine, topology); @@ -1130,7 +1126,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], output_ptr[i]); } @@ -1160,7 +1156,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp // [0, 1, -0.5, 0] // [0, 0, 0, 0, 0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector formats_to_test = { format::yxfb , format::bfyx }; for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) @@ -1168,11 +1164,11 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp std::cout << "Testing format: " << format::order(*it) << std::endl; tensor input_tensor( 1, 1, 3, 3 ); - auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32, *it, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); - topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 }))); + topology.add(input_layout("input_prim", input_prim->get_layout())); + topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 }))); topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 })); network network(engine, topology); @@ -1197,10 +1193,10 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 4); - EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 16); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 4); + EXPECT_EQ((int)output_prim->get_layout().get_buffer_size().count(), 16); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], output_ptr[i]); } @@ -1231,7 +1227,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou // [0, 0, 0, 0, 0, 0] // [0, 0, 0, 0, 0, 0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector formats_to_test = { format::yxfb , format::bfyx }; for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) @@ -1239,11 +1235,11 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou std::cout << "Testing format: " << format::order(*it) << std::endl; tensor input_tensor( 1, 1, 4, 4 ); - auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32, *it, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); - topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(input_layout("input_prim", input_prim->get_layout())); + topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,0,0 }, padding{ { 0,0,2,2 }, 0 })); network network(engine, topology); @@ -1268,7 +1264,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], output_ptr[i]); } @@ -1299,7 +1295,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou // [0, 12, 14, 16, 0] // [0, 0, 0, 0, 0] - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); std::vector formats_to_test = { format::yxfb , format::bfyx }; for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) @@ -1307,11 +1303,11 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou std::cout << "Testing format: " << format::order(*it) << std::endl; tensor input_tensor( 1, 1, 5, 5 ); - auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32, *it, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); - topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); + topology.add(input_layout("input_prim", input_prim->get_layout())); + topology.add(reorder("reorder", "input_prim", input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 })); network network(engine, topology); @@ -1339,10 +1335,10 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou EXPECT_EQ(outputs.begin()->first, "pool_prim"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 9); - EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 25); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 9); + EXPECT_EQ((int)output_prim->get_layout().get_buffer_size().count(), 25); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], output_ptr[i]); } @@ -1368,10 +1364,10 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) { // f0: b0: 4 4 b1: 15 13 // f1: b0: 10 11 b1: 21 23 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 1.0f, 2.0f, -10.f, @@ -1385,7 +1381,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mutable_data("arg_max", arg_max)); topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); @@ -1396,9 +1392,9 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) { auto outputs = network.execute(); auto output = outputs.at("pooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); - auto argmax_ptr = arg_max.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); + cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 1); @@ -1445,10 +1441,10 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) { // f0: b0: 4 4 b1: 15 13 // f1: b0: 10 11 b1: 21 23 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 3, 2, 1 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 2, 3, 2, 1 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 2, 2, 1, 1 } }); set_values(input, { 1.0f, 2.0f, -10.f, @@ -1462,7 +1458,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mutable_data("arg_max", arg_max)); topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2, 1 }, { 1, 1, 1, 1, 1 })); @@ -1473,9 +1469,9 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2x1_max_with_argmax) { auto outputs = network.execute(); auto output = outputs.at("pooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); - auto argmax_ptr = arg_max.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); + cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfzyx); EXPECT_EQ(output_layout.size.spatial[2], 1); @@ -1524,10 +1520,10 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) { // f0: b0: 4 4 b1: 15 13 // f1: b0: 10 11 b1: 21 23 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 1.0f, 2.0f, -10.f, @@ -1541,8 +1537,8 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 }))); + topology.add(input_layout("input", input->get_layout())); + topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 }))); topology.add(mutable_data("arg_max", arg_max)); topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 })); @@ -1553,9 +1549,9 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) { auto outputs = network.execute(); auto output = outputs.at("pooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); - auto argmax_ptr = arg_max.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); + cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 1); @@ -1603,10 +1599,10 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) { // f0: b0: 4 4 b1: 15 13 // f1: b0: 10 11 b1: 21 23 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 1.0f, 2.0f, -10.f, @@ -1620,8 +1616,8 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 }))); + topology.add(input_layout("input", input->get_layout())); + topology.add(reorder("reorder", "input", input->get_layout().with_padding(padding{ { 0, 0, 2, 2 }, 0 }))); topology.add(mutable_data("arg_max", arg_max)); topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0))); @@ -1632,9 +1628,9 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) { auto outputs = network.execute(); auto output = outputs.at("pooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); - auto argmax_ptr = arg_max.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); + cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 1); @@ -1692,10 +1688,10 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) { // f0: b0: 4 4 b1: 15 13 // f1: b0: 10 11 b1: 21 23 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); - auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }); + auto arg_max = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 1 } }); set_values(input, { 1.0f, 2.0f, -10.f, @@ -1709,7 +1705,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(mutable_data("arg_max", arg_max)); topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 2, 2, 2, 1 })); @@ -1720,9 +1716,9 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) { auto outputs = network.execute(); auto output = outputs.at("pooling").get_memory(); - auto output_ptr = output.pointer(); - auto output_layout = output.get_layout(); - auto argmax_ptr = arg_max.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_layout = output->get_layout(); + cldnn::mem_lock argmax_ptr(arg_max, get_test_stream()); EXPECT_EQ(output_layout.format, format::bfyx); EXPECT_EQ(output_layout.size.spatial[1], 1); @@ -1755,38 +1751,36 @@ static void generic_average_wo_padding_test(format fmt, tensor output, tensor in { constexpr auto dt = std::is_same::value ? data_types::f32 : data_types::f16; - engine eng; + auto& engine = get_test_engine(); - if (!eng.get_info().supports_fp16) - { - if(dt == data_types::f16) - { + if (!engine.get_device_info().supports_fp16) { + if (dt == data_types::f16) { return; } } - auto input_mem = memory::allocate(eng, layout{ dt, fmt, input }); + auto input_mem = engine.allocate_memory(layout{ dt, fmt, input }); set_values(input_mem, std::vector(input.count(), DataType(1))); std::vector expected_output(output.count(), DataType(1)); topology tpl; - tpl.add(input_layout("in", input_mem.get_layout())); + tpl.add(input_layout("in", input_mem->get_layout())); auto pool_in = "in"; if (offset != tensor()) { - tpl.add(reorder("reorder", "in", input_mem.get_layout().with_padding((padding) offset.negate().sizes()))); + tpl.add(reorder("reorder", "in", input_mem->get_layout().with_padding((padding) offset.negate().sizes()))); pool_in = "reorder"; } tpl.add(pooling("pool", pool_in, pooling_mode::average_no_padding, window, stride, offset)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in", input_mem); auto output_mem = net.execute().at("pool").get_memory(); - ASSERT_EQ(output_mem.count(), expected_output.size()); - EXPECT_EQ(output_mem.get_layout().size, output); - auto out_ptr = output_mem.pointer(); + ASSERT_EQ(output_mem->count(), expected_output.size()); + EXPECT_EQ(output_mem->get_layout().size, output); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); for (size_t i = 0; i < expected_output.size(); ++i) EXPECT_FLOAT_EQ(out_ptr[i], expected_output[i]); @@ -1959,16 +1953,13 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) std::vector vGoldOutput; std::vector vTestOutput; - engine engine; + auto& engine = get_test_engine(); // "Golden" Pooling { // Mem initialization // This is user data, no kernels here - auto input = memory::allocate(engine, - { data_types::i8, - format::bfyx, - { in_B, in_F, in_X, in_Y } }); + auto input = engine.allocate_memory({ data_types::i8, format::bfyx, { in_B, in_F, in_X, in_Y } }); set_values(input, std::move(DataGold)); auto pool = pooling("pool_GOLD", @@ -1978,7 +1969,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) { 1, 1, S_X, S_Y }); // stride // Create a topology with a simple Convolution layer - topology topology(input_layout("input", input.get_layout()), + topology topology(input_layout("input", input->get_layout()), pool); // Network processing @@ -1989,7 +1980,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) auto searchC = outputs.find("pool_GOLD"); ASSERT_FALSE(searchC == outputs.end()); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); vGoldOutput.reserve(output_ptr.size()); for (size_t i = 0; i < output_ptr.size(); i++) vGoldOutput.push_back(output_ptr[i]); @@ -2003,15 +1994,12 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) // Mem initialization // This is user data, no kernels here - auto input = memory::allocate(engine, - { data_types::i8, - format::bfyx, - { in_B, in_F, in_X, in_Y } }); + auto input = engine.allocate_memory({ data_types::i8, format::bfyx, { in_B, in_F, in_X, in_Y } }); set_values(input, std::move(Data)); // Add input to topology topology.add( - input_layout("input", input.get_layout())); + input_layout("input", input->get_layout())); // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format topology.add(reorder("reorder_Swizzelled", @@ -2041,7 +2029,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) auto searchC = outputs.find("reorder_UnSwizzelled"); ASSERT_FALSE(searchC == outputs.end()); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); vTestOutput.reserve(output_ptr.size()); for (size_t i = 0; i < output_ptr.size(); i++) vTestOutput.push_back(output_ptr[i]); @@ -2057,8 +2045,8 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_1x1_stride_2x2_output) { - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; @@ -2080,10 +2068,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_1x1_stride_2x2_ou // [ 1.0, 0.625] // [ 1.625, 0.875] - auto input_prim = memory::allocate(engine, { data_types::f16, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input", input_prim.get_layout())); + topology.add(input_layout("input", input_prim->get_layout())); topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, { 1, 1, 3, 3 }))); topology.add(pooling("avg_pooling", "reorder_input", pooling_mode::average, { 1,1,2,2 }, { 1,1,1,1 })); topology.add(reorder("reorder_after_pooling", "avg_pooling", layout(data_types::f16, format::bfyx, { 1,1,2,2 }))); @@ -2098,7 +2086,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_1x1_stride_2x2_ou auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_EQ(1.0f, float(output_ptr[0])); EXPECT_EQ(0.625f, float(output_ptr[1])); @@ -2109,8 +2097,8 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_1x1_stride_2x2_ou TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_2x2_stride) { - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; @@ -2132,10 +2120,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_2x2_stride) // [ 1.0, 0 ] // [ 1.5, 3.5] - auto input_prim = memory::allocate(engine, { data_types::f16, format::yxfb, { 1, 1, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::yxfb, { 1, 1, 3, 3 } }); topology topology; - topology.add(input_layout("input", input_prim.get_layout())); + topology.add(input_layout("input", input_prim->get_layout())); topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, { 1, 1, 3, 3 }))); topology.add(pooling("avg_pooling", "reorder_input", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 })); topology.add(reorder("reorder_after_pooling", "avg_pooling", layout(data_types::f16, format::bfyx, { 1,1,3,3 }))); @@ -2149,7 +2137,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_2x2_stride) EXPECT_EQ(outputs.begin()->first, "reorder_after_pooling"); auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); EXPECT_EQ(1.0f, float(output_ptr[0])); EXPECT_EQ(0.f, float(output_ptr[1])); @@ -2161,8 +2149,8 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_2x2_stride) TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_2x2x3x3_input_2x2_pool_2x2_stride) { - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; @@ -2198,10 +2186,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_2x2x3x3_input_2x2_pool_2x2_stride) const int out_x = 2; const int out_y = 2; - auto input_prim = memory::allocate(engine, { data_types::f16, format::bfyx, { batch_count, features_count, 3, 3 } }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::bfyx, { batch_count, features_count, 3, 3 } }); topology topology; - topology.add(input_layout("input", input_prim.get_layout())); + topology.add(input_layout("input", input_prim->get_layout())); topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, { batch_count, features_count, 3, 3 }))); topology.add(pooling("avg_pooling", "reorder_input", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 })); topology.add(reorder("reorder_after_pooling", "avg_pooling", layout(data_types::f16, format::bfyx, { batch_count, features_count, out_y, out_x }))); @@ -2219,7 +2207,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_2x2x3x3_input_2x2_pool_2x2_stride) auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); ASSERT_EQ((int)output_ptr.size(), batch_count * features_count*out_x*out_y); @@ -2240,8 +2228,8 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_2x2x3x3_input_2x2_pool_2x2_stride) } TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x2_outpad) { - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; @@ -2269,10 +2257,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x // [0, 0, 0, 0, 0] tensor input_tensor(1, 1, 3, 3); - auto input_prim = memory::allocate(engine, { data_types::f16, format::bfyx, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor))); topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 })); topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ {0,0,1,1},0 }))); @@ -2299,10 +2287,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x EXPECT_EQ(outputs.begin()->first, "reorder_pooling"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 4); - EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 16); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 4); + EXPECT_EQ((int)output_prim->get_layout().get_buffer_size().count(), 16); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], float(output_ptr[i])); @@ -2311,8 +2299,8 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x } TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x2_outpad_2x1_inpad) { - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; @@ -2342,10 +2330,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x // [ 0, 0, 0, 0, 0] tensor input_tensor(1, 1, 5, 5); - auto input_prim = memory::allocate(engine, { data_types::f16, format::bfyx, input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f16, format::bfyx, input_tensor }); topology topology; - topology.add(input_layout("input_prim", input_prim.get_layout())); + topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(reorder("reorder_input", "input_prim", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ { 0,0,2,1 } , 0 }))); topology.add(pooling("pool_prim", "reorder_input", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 })); topology.add(reorder("reorder_pooling", "pool_prim", layout(data_types::f16, format::bfyx, input_tensor, padding{{0,0,1,1},0}))); @@ -2375,10 +2363,10 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x EXPECT_EQ(outputs.begin()->first, "reorder_pooling"); auto output_prim = outputs.begin()->second.get_memory(); - EXPECT_EQ((int)output_prim.get_layout().size.count(), 9); - EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 25); + EXPECT_EQ((int)output_prim->get_layout().size.count(), 9); + EXPECT_EQ((int)output_prim->get_layout().get_buffer_size().count(), 25); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (size_t i = 0; i < expected.size(); ++i) { EXPECT_EQ(expected[i], float(output_ptr[i])); @@ -2387,8 +2375,8 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3x2_outpad_2x3_inpad) { - const auto& engine = get_test_engine(); - bool f16_supported = !!engine.get_info().supports_fp16; + auto& engine = get_test_engine(); + bool f16_supported = !!engine.get_device_info().supports_fp16; if (!f16_supported) { std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl; return; @@ -2414,7 +2402,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 input_data[i] = FLOAT16((float)i/float(input_data.size())); } - auto input_prim = memory::allocate(engine, { data_types::f16,format::bfyx,input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f16,format::bfyx,input_tensor }); set_values(input_prim, input_data); std::vector golden_results; @@ -2422,15 +2410,15 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 { //GOLDEN TOPOLOGY topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 }))); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 }))); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, { 1,1,pool_size,pool_size }, { 1,1,stride_size,stride_size }, { 0,0,0,0 }, padding{ { 0,0,x_out_pad,y_out_pad },0 })); network golden_network(engine, golden_topology); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { golden_results.push_back(float(output_ptr[i])); @@ -2439,7 +2427,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 { //FSV32 TOPOLOGY topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); golden_topology.add(reorder("reorder_input", "input", layout(data_types::f16, format::fs_b_yx_fsv32, input_tensor, padding{ {0,0,x_in_pad, y_in_pad}, 0 }))); golden_topology.add(pooling("fsv32_pooling", "reorder_input", pooling_mode::average, { 1,1,pool_size,pool_size }, { 1,1,stride_size,stride_size }, { 0,0,0,0 }, padding{ { 0,0,x_out_pad,y_out_pad },0 })); golden_topology.add(reorder("reorder_pooling", "fsv32_pooling", layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 }))); @@ -2448,7 +2436,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 fsv32_network.set_input_data("input", input_prim); auto outputs = fsv32_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { fsv32_results.push_back(float(output_ptr[i])); @@ -2493,7 +2481,7 @@ public: virtual void run_expect(const VVVVVF& expected) { - auto eng = get_test_engine(); + auto& eng = get_test_engine(); auto topo = build_topology(eng); auto opts = build_options( build_option::optimize_data(true) @@ -2504,7 +2492,7 @@ public: auto input_lay = layout(input_type(), input_format(), input_size); - auto input_mem = memory::allocate(eng, input_lay); + auto input_mem = eng.allocate_memory(input_lay); std::vector input_flat(input_lay.get_linear_size(), static_cast(0)); for (size_t bi = 0; bi < batch_num(); ++bi) for (size_t fi = 0; fi < input_features(); ++fi) @@ -2520,8 +2508,8 @@ public: net.set_input_data("input", input_mem); auto result = net.execute(); auto out_mem = result.at(output_id()).get_memory(); - auto out_lay = out_mem.get_layout(); - auto out_ptr = out_mem.cldnn::memory::template pointer(); + auto out_lay = out_mem->get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); std::string kernel; for (auto i : net.get_primitives_info()) { @@ -2779,12 +2767,12 @@ public: using parent = pooling_random_test_base; using output_t = typename parent::output_t; - topology build_topology(const engine& eng) override { + topology build_topology(engine& eng) override { topology topo = parent::build_topology(eng); auto scale_lay = layout(this->output_type(), format::bfyx, tensor(batch(1), feature(this->input_features()), spatial(1, 1, 1, 1))); - auto scale_mem = memory::allocate(eng, scale_lay); - auto shift_mem = memory::allocate(eng, scale_lay); + auto scale_mem = eng.allocate_memory(scale_lay); + auto shift_mem = eng.allocate_memory(scale_lay); set_values(scale_mem, _scale); set_values(shift_mem, _shift); @@ -2864,7 +2852,7 @@ INSTANTIATE_TEST_CASE_P( TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 16; const int batches = 16; @@ -2880,7 +2868,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) auto input_data = generate_random_1d(batches * features * x_input * y_input, -10, 10); - auto input_prim = memory::allocate(engine, {data_types::f32, format::bfyx, input_tensor}); + auto input_prim = engine.allocate_memory({data_types::f32, format::bfyx, input_tensor}); set_values(input_prim, input_data); std::vector golden_results; @@ -2889,8 +2877,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::max, {1, 1, pool_size, pool_size}, {1, 1, stride_size, stride_size}, {0, 0, -x_in_pad, -y_in_pad})); @@ -2898,7 +2886,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { golden_results.push_back(float(output_ptr[i])); @@ -2908,7 +2896,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) { // bfzyx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add(pooling("bsv16_fsv16_pooling", "reorder_input", pooling_mode::max, {1, 1, pool_size, pool_size}, @@ -2922,9 +2910,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -2948,7 +2936,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x1_inpad) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 16; const int batches = 16; @@ -2964,7 +2952,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x auto input_data = generate_random_1d(batches * features * x_input * y_input, -10, 10); - auto input_prim = memory::allocate(engine, {data_types::f32, format::bfyx, input_tensor}); + auto input_prim = engine.allocate_memory({data_types::f32, format::bfyx, input_tensor}); set_values(input_prim, input_data); std::vector golden_results; @@ -2973,8 +2961,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add( pooling("golden_pooling", "reorder_input", pooling_mode::max, {1, 1, pool_size, pool_size}, {1, 1, stride_size, stride_size}, {0, 0, -x_in_pad, -y_in_pad})); @@ -2983,7 +2971,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { golden_results.push_back(float(output_ptr[i])); } @@ -2992,7 +2980,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x { // bs_fs_yx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add( @@ -3006,9 +2994,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3031,7 +3019,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 16; const int batches = 16; @@ -3047,7 +3035,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) auto input_data = generate_random_1d(batches * features * x_input * y_input, -10, 10); - auto input_prim = memory::allocate(engine, {data_types::f32, format::bfyx, input_tensor}); + auto input_prim = engine.allocate_memory({data_types::f32, format::bfyx, input_tensor}); set_values(input_prim, input_data); std::vector golden_results; @@ -3056,8 +3044,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, {1, 1, pool_size, pool_size}, {1, 1, stride_size, stride_size}, {0, 0, -x_in_pad, -y_in_pad})); @@ -3065,7 +3053,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3076,7 +3064,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) { // bs_fs_yx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add(pooling("bsv16_fsv16_pooling", "reorder_input", pooling_mode::average, {1, 1, pool_size, pool_size}, @@ -3090,9 +3078,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3115,7 +3103,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 16; const int batches = 16; @@ -3132,7 +3120,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) auto input_data = generate_random_1d(batches * features * x_input * y_input, -10, 10); - auto input_prim = memory::allocate(engine, {data_types::f32, format::bfyx, input_tensor}); + auto input_prim = engine.allocate_memory({data_types::f32, format::bfyx, input_tensor}); set_values(input_prim, input_data); std::vector golden_results; @@ -3141,8 +3129,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::average, {1, 1, pool_size, pool_size}, {1, 1, stride_size_x, stride_size_y}, {0, 0, -x_in_pad, -y_in_pad})); @@ -3150,7 +3138,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3161,7 +3149,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) { // bs_fs_yx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add(pooling("bsv16_fsv16_pooling", "reorder_input", pooling_mode::average, {1, 1, pool_size, pool_size}, {1, 1, stride_size_x, stride_size_y}, {0, 0, -x_in_pad, -y_in_pad})); @@ -3173,9 +3161,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3198,7 +3186,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 16; const int batches = 16; @@ -3215,7 +3203,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) auto input_data = generate_random_1d(batches * features * x_input * y_input, -10, 10); - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx,input_tensor }); set_values(input_prim, input_data); std::vector golden_results; @@ -3224,8 +3212,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::max, {1, 1, pool_size, pool_size}, {1, 1, stride_size_x, stride_size_y}, {0, 0, -x_in_pad, -y_in_pad})); @@ -3233,7 +3221,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3244,7 +3232,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) { // bs_fs_yx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add( @@ -3258,9 +3246,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3283,7 +3271,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 32; const int batches = 32; @@ -3300,7 +3288,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) auto input_data = generate_random_1d(batches * features * x_input * y_input, -10, 10); - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx,input_tensor }); set_values(input_prim, input_data); std::vector golden_results; @@ -3309,8 +3297,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::max, {1, 1, pool_size, pool_size}, {1, 1, stride_size_x, stride_size_y}, {0, 0, -x_in_pad, -y_in_pad})); @@ -3318,7 +3306,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3329,7 +3317,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) { // bs_fs_yx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add( @@ -3343,9 +3331,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3368,7 +3356,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int features = 16; const int batches = 32; @@ -3389,7 +3377,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) input_data[i] = static_cast(i); } - auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,input_tensor }); + auto input_prim = engine.allocate_memory({ data_types::f32,format::bfyx,input_tensor }); set_values(input_prim, input_data); std::vector golden_results; @@ -3398,8 +3386,8 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) { // golden topology topology golden_topology; - golden_topology.add(input_layout("input", input_prim.get_layout())); - golden_topology.add(reorder("reorder_input", "input", input_prim.get_layout())); + golden_topology.add(input_layout("input", input_prim->get_layout())); + golden_topology.add(reorder("reorder_input", "input", input_prim->get_layout())); golden_topology.add(pooling("golden_pooling", "reorder_input", pooling_mode::max, {1, 1, pool_size, pool_size}, {1, 1, stride_size_x, stride_size_y}, {0, 0, -x_in_pad, -y_in_pad})); @@ -3407,7 +3395,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); - auto output_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.begin()->second.get_memory(), get_test_stream()); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3418,7 +3406,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) { // bs_fs_yx_bsv16_fsv16 topology topology tested_topology; - tested_topology.add(input_layout("input", input_prim.get_layout())); + tested_topology.add(input_layout("input", input_prim->get_layout())); tested_topology.add(reorder("reorder_input", "input", layout(data_types::f32, format::bs_fs_yx_bsv16_fsv16, input_tensor))); tested_topology.add( @@ -3432,9 +3420,9 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) bsv16_fsv16_network.set_input_data("input", input_prim); auto outputs = bsv16_fsv16_network.execute(); - auto output_ptr = outputs.at("reorder_pooling").get_memory().pointer(); + cldnn::mem_lock output_ptr(outputs.at("reorder_pooling").get_memory(), get_test_stream()); - ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory().get_layout().format, format::bs_fs_yx_bsv16_fsv16); + ASSERT_EQ(outputs.at("bsv16_fsv16_pooling").get_memory()->get_layout().format, format::bs_fs_yx_bsv16_fsv16); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -3521,7 +3509,7 @@ public: return false; } - virtual void prepare_input_for_test(std::vector& inputs) + virtual void prepare_input_for_test(std::vector& inputs) { if (generic_params->data_type == data_types::f32) { @@ -3534,13 +3522,13 @@ public: } template - void prepare_input_for_test_typed(std::vector& inputs) + void prepare_input_for_test_typed(std::vector& inputs) { int k = (generic_params->data_type == data_types::f32) ? 8 : 4; auto input = inputs[0]; - auto input_size = inputs[0].get_layout().size; + auto input_size = inputs[0]->get_layout().size; VVVVF input_rnd = generate_random_4d(input_size.batch[0], input_size.feature[0], input_size.spatial[1], input_size.spatial[0], -10, 10, k); - VF input_rnd_vec = flatten_4d(input.get_layout().format, input_rnd); + VF input_rnd_vec = flatten_4d(input->get_layout().format, input_rnd); set_values(input, input_rnd_vec); } @@ -3579,14 +3567,13 @@ public: } template - memory generate_reference_typed(const std::vector& inputs) - { + memory::ptr generate_reference_typed(const std::vector& inputs) { auto pooling = std::static_pointer_cast(layer_params); - int batch = inputs[0].get_layout().size.batch[0]; - int feature = inputs[0].get_layout().size.feature[0]; - int height = inputs[0].get_layout().size.spatial[1]; - int width = inputs[0].get_layout().size.spatial[0]; + int batch = inputs[0]->get_layout().size.batch[0]; + int feature = inputs[0]->get_layout().size.feature[0]; + int height = inputs[0]->get_layout().size.spatial[1]; + int width = inputs[0]->get_layout().size.spatial[0]; cldnn::pooling_mode pooling_mode = pooling->mode; @@ -3605,22 +3592,22 @@ public: int pooled_height = output_tensor.spatial[1]; //Output is bfyx - auto output = memory::allocate(engine, cldnn::layout(inputs[0].get_layout().data_type, cldnn::format::bfyx, output_tensor, pooling->output_padding)); + auto output = engine.allocate_memory(cldnn::layout(inputs[0]->get_layout().data_type, cldnn::format::bfyx, output_tensor, pooling->output_padding)); - auto input_mem = inputs[0].pointer(); - auto output_mem = output.pointer(); + cldnn::mem_lock input_mem(inputs[0], get_test_stream()); + cldnn::mem_lock output_mem(output, get_test_stream()); - int output_width = output.get_layout().get_buffer_size().spatial[0]; - int output_height = output.get_layout().get_buffer_size().spatial[1]; + int output_width = output->get_layout().get_buffer_size().spatial[0]; + int output_height = output->get_layout().get_buffer_size().spatial[1]; - const auto input_desc = get_linear_memory_desc(inputs[0].get_layout()); - const auto output_desc = get_linear_memory_desc(output.get_layout()); + const auto input_desc = get_linear_memory_desc(inputs[0]->get_layout()); + const auto output_desc = get_linear_memory_desc(output->get_layout()); switch (pooling_mode) { case cldnn::pooling_mode::max: { - for (int i = 0; i < (int)output.get_layout().get_buffer_size().count(); i++) + for (int i = 0; i < (int)output->get_layout().get_buffer_size().count(); i++) { output_mem[i] = (generic_params->data_type == data_types::f32) ? -FLT_MAX : -65504; } @@ -3640,13 +3627,13 @@ public: int input_offset_y_end = std::min(input_offset_y_start + kernel_height, height); input_offset_y_start = std::max(input_offset_y_start, 0); - const size_t output_index = get_linear_index(output.get_layout(), b, f, h, w, output_desc); + const size_t output_index = get_linear_index(output->get_layout(), b, f, h, w, output_desc); for (int y = input_offset_y_start; y < input_offset_y_end; y++) { for (int x = input_offset_x_start; x < input_offset_x_end; x++) { - const size_t input_index = get_linear_index(inputs[0].get_layout(), b, f, y, x, input_desc); + const size_t input_index = get_linear_index(inputs[0]->get_layout(), b, f, y, x, input_desc); if (input_mem[input_index] > output_mem[output_index]) { @@ -3683,7 +3670,7 @@ public: return y*x; }; - for (int i = 0; i < (int)output.get_layout().get_buffer_size().count(); i++) + for (int i = 0; i < (int)output->get_layout().get_buffer_size().count(); i++) { output_mem[i] = 0; } @@ -3712,7 +3699,7 @@ public: { for (int x = input_offset_x_start; x < input_offset_x_end; x++) { - const size_t input_index = get_linear_index(inputs[0].get_layout(), b, f, y, x, input_desc); + const size_t input_index = get_linear_index(inputs[0]->get_layout(), b, f, y, x, input_desc); output_mem[output_index] += input_mem[input_index]; if (!dynamic_mode || pooling_mode == cldnn::pooling_mode::average_no_padding) { @@ -3746,7 +3733,7 @@ public: return output; } - virtual memory generate_reference(const std::vector& inputs) + virtual memory::ptr generate_reference(const std::vector& inputs) { if (generic_params->data_type == data_types::f32) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp index 264eb954d39..3425bfd427c 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp @@ -2,40 +2,34 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// +#include "test_utils.h" -#include -#include "api/memory.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include -#include -#include +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; //We expect additional reorder to be added in between "weights1" and "reshape1". //This situation should be handled properly by propagate constants optimization phase TEST(propagate_constants, copy_dependecies_from_nodes) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; build_opt.set_option(build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto weights1 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 1, 2 } }); set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) }); set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) }); set_values(weights2, { 1.1f, 0.1f }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights1", weights1)); topology.add(data("weights2", weights2)); topology.add(reshape("reshape1", "weights1", tensor(spatial(1, 2)))); @@ -49,9 +43,8 @@ TEST(propagate_constants, copy_dependecies_from_nodes) { auto outputs = network.execute(); float epsilon = 1e-2f; - for (auto& it : outputs) - { - auto output = it.second.get_memory().pointer(); + for (auto& it : outputs) { + cldnn::mem_lock output(it.second.get_memory(), get_test_stream()); EXPECT_NEAR(7.8f, output[0], epsilon); } -} \ No newline at end of file +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp index 7cbec32a861..a7ea1040828 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp @@ -2,18 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include "api/memory.hpp" -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include "test_utils/float16.h" +#include +#include + +#include namespace cldnn { @@ -21,7 +15,7 @@ template<> struct type_to_data_type { static const data_types value = d } using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace std; extern float cls_scores_data[]; @@ -65,11 +59,10 @@ class TestRunnerProposal public: explicit TestRunnerProposal(cldnn::tensor image_info_size); - memory Run(std::vector& data, - std::vector& rois); + memory::ptr Run(std::vector& data, + std::vector& rois); private: - engine _engine; layout _cls_scores_layout; layout _bbox_pred_layout; layout _image_info_layout; @@ -103,20 +96,21 @@ TestRunnerProposal::TestRunnerProposal(cldnn::tensor image_in _topology.add(_test_layer); - _network.reset(new network(_engine, _topology)); + _network.reset(new network(get_test_engine(), _topology)); } template -memory TestRunnerProposal::Run(std::vector& cls_scores_vals, - std::vector& bbox_pred_vals) +memory::ptr TestRunnerProposal::Run(std::vector& cls_scores_vals, + std::vector& bbox_pred_vals) { - memory cls_scores = memory::attach(_cls_scores_layout, cls_scores_vals.data(), cls_scores_vals.size()); - memory bbox_pred = memory::attach(_bbox_pred_layout, bbox_pred_vals.data(), bbox_pred_vals.size()); + auto& engine = get_test_engine(); + memory::ptr cls_scores = engine.attach_memory(_cls_scores_layout, cls_scores_vals.data()); + memory::ptr bbox_pred = engine.attach_memory(_bbox_pred_layout, bbox_pred_vals.data()); std::vector image_info_vals = { (ImInfoType)((float)image_h - 0.0000001f), // check fp robustness of the layer (ImInfoType)((float)image_w + 0.0000001f), // check fp robustness of the layer (ImInfoType)((float)image_z) }; - memory image_info = memory::allocate(_engine, _image_info_layout); + memory::ptr image_info = engine.allocate_memory(_image_info_layout); tests::set_values(image_info, image_info_vals); _network->set_input_data(cls_scores_name, cls_scores); @@ -134,10 +128,10 @@ TEST(proposal, basic) { TestRunnerProposal t({ 1, 3, 1, 1 }); - const memory& output = t.Run(cls_scores, bbox_pred); - ASSERT_EQ(output.get_layout().count(), proposal_ref_size); + memory::ptr output = t.Run(cls_scores, bbox_pred); + ASSERT_EQ(output->get_layout().count(), proposal_ref_size); - auto f = output.pointer(); + cldnn::mem_lock f(output, get_test_stream()); for (size_t i = 0; i < proposal_ref_size; i++) { EXPECT_NEAR(f[i], proposal_ref[i], epsilon); @@ -150,10 +144,10 @@ TEST(proposal, fp16) { TestRunnerProposal t({ 1, 3, 1, 1 }); - const memory& output = t.Run(cls_scores, bbox_pred); - ASSERT_EQ(output.get_layout().count(), proposal_ref_size); + memory::ptr output = t.Run(cls_scores, bbox_pred); + ASSERT_EQ(output->get_layout().count(), proposal_ref_size); - auto d = output.pointer(); + cldnn::mem_lock d(output, get_test_stream()); for (size_t i = 0; i < proposal_ref_size; i++) { FLOAT16 ref(proposal_ref[i]); @@ -167,10 +161,10 @@ TEST(proposal, scores_fp16_im_info_fp32) { TestRunnerProposal t({ 1, 3, 1, 1 }); - const memory& output = t.Run(cls_scores, bbox_pred); - ASSERT_EQ(output.get_layout().count(), proposal_ref_size); + memory::ptr output = t.Run(cls_scores, bbox_pred); + ASSERT_EQ(output->get_layout().count(), proposal_ref_size); - auto d = output.pointer(); + cldnn::mem_lock d(output, get_test_stream()); for (size_t i = 0; i < proposal_ref_size; i++) { FLOAT16 ref(proposal_ref[i]); @@ -184,10 +178,10 @@ TEST(proposal, scores_fp32_im_info_fp16) { TestRunnerProposal t({ 1, 3, 1, 1 }); - const memory& output = t.Run(cls_scores, bbox_pred); - ASSERT_EQ(output.get_layout().count(), proposal_ref_size); + memory::ptr output = t.Run(cls_scores, bbox_pred); + ASSERT_EQ(output->get_layout().count(), proposal_ref_size); - auto d = output.pointer(); + cldnn::mem_lock d(output, get_test_stream()); for (size_t i = 0; i < proposal_ref_size; i++) { float ref(proposal_ref[i]); @@ -201,10 +195,10 @@ TEST(proposal, img_info_batched) { TestRunnerProposal t({ 2, 3, 1, 1 }); - const memory& output = t.Run(cls_scores, bbox_pred); - ASSERT_EQ(output.get_layout().count(), proposal_ref_size); + memory::ptr output = t.Run(cls_scores, bbox_pred); + ASSERT_EQ(output->get_layout().count(), proposal_ref_size); - auto f = output.pointer(); + cldnn::mem_lock f(output, get_test_stream()); for (size_t i = 0; i < proposal_ref_size; i++) { EXPECT_NEAR(f[i], proposal_ref[i], epsilon); @@ -217,10 +211,10 @@ TEST(proposal, img_info_batch_only) { TestRunnerProposal t({ 3, 1, 1, 1 }); - const memory& output = t.Run(cls_scores, bbox_pred); - ASSERT_EQ(output.get_layout().count(), proposal_ref_size); + memory::ptr output = t.Run(cls_scores, bbox_pred); + ASSERT_EQ(output->get_layout().count(), proposal_ref_size); - auto f = output.pointer(); + cldnn::mem_lock f(output, get_test_stream()); for (size_t i = 0; i < proposal_ref_size; i++) { EXPECT_NEAR(f[i], proposal_ref[i], epsilon); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp index 58afd308928..a4e168ee9a0 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp @@ -3,21 +3,15 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include "test_utils/test_utils.h" +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; template struct pyramid_roi_align_typed_test : testing::Test { @@ -31,7 +25,7 @@ TYPED_TEST_CASE(pyramid_roi_align_typed_test, pyramid_roi_align_types); TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) { using Type = typename pyramid_roi_align_typed_test::Type; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int rois_num = 3; const int output_size = 2; @@ -85,11 +79,11 @@ TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) { auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size)); auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size)); - auto rois_mem = memory::allocate(engine, rois_lay); - auto P2_mem = memory::allocate(engine, P2_lay); - auto P3_mem = memory::allocate(engine, P3_lay); - auto P4_mem = memory::allocate(engine, P4_lay); - auto P5_mem = memory::allocate(engine, P5_lay); + auto rois_mem = engine.allocate_memory(rois_lay); + auto P2_mem = engine.allocate_memory(P2_lay); + auto P3_mem = engine.allocate_memory(P3_lay); + auto P4_mem = engine.allocate_memory(P4_lay); + auto P5_mem = engine.allocate_memory(P5_lay); tests::set_values(rois_mem, rois_data); tests::set_values(P2_mem, P2_data); @@ -129,7 +123,7 @@ TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) { auto result = net.execute(); auto out_mem = result.at("pyramid").get_memory(); - auto out_ptr = out_mem.pointer(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); ASSERT_EQ(expected_out.size(), out_ptr.size()); for (size_t i = 0; i < expected_out.size(); ++i) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/quantize_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/quantize_gpu_test.cpp index 1d0dfaa4beb..f5090ab08ff 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/quantize_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/quantize_gpu_test.cpp @@ -3,31 +3,25 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include "test_utils/test_utils.h" +#include +#include +#include #include -#include -#include using namespace cldnn; using namespace ::tests; TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 16, 2, 2}}); - auto input_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto input_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 16, 2, 2}}); + auto input_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto input_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -83,7 +77,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) { topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_low), data("input_high", input_high), data("output_low", output_low), @@ -96,13 +90,13 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) { auto outputs = network.execute(); auto output = outputs.at("quantize").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), (size_t)64); - ASSERT_EQ(output.get_layout().count(), (size_t)64); + ASSERT_EQ(output->count(), (size_t)64); + ASSERT_EQ(output->get_layout().count(), (size_t)64); - ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint32_t)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint32_t)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " index = " << i; @@ -110,11 +104,11 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) { } TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 8, 2, 2}}); - auto input_thresh = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 8, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 8, 2, 2}}); + auto input_thresh = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 8, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -147,7 +141,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) { topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_thresh), data("input_high", input_thresh), data("output_low", output_low), @@ -160,13 +154,13 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) { auto outputs = network.execute(); auto output = outputs.at("quantize").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), (size_t)32); - ASSERT_EQ(output.get_layout().count(), (size_t)32); + ASSERT_EQ(output->count(), (size_t)32); + ASSERT_EQ(output->get_layout().count(), (size_t)32); - ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint32_t)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint32_t)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " index = " << i; @@ -174,11 +168,11 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) { } TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 8, 2, 2}}); - auto input_thresh = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 8, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 8, 2, 2}}); + auto input_thresh = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 8, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -210,7 +204,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_thresh), data("input_high", input_thresh), data("output_low", output_low), @@ -226,13 +220,13 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) auto outputs = network.execute(); auto output = outputs.at("reorder").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), (size_t)32); - ASSERT_EQ(output.get_layout().count(), (size_t)32); + ASSERT_EQ(output->count(), (size_t)32); + ASSERT_EQ(output->get_layout().count(), (size_t)32); - ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint32_t)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint32_t)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " index = " << i; @@ -240,12 +234,12 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) } TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) { - const cldnn::engine& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 16, 2, 2}}); - auto input_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto input_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + cldnn::engine& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 16, 2, 2}}); + auto input_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto input_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -291,7 +285,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) { topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_low), data("input_high", input_high), data("output_low", output_low), @@ -304,13 +298,13 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) { auto outputs = network.execute(); auto output = outputs.at("quantize").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), (size_t)64); - ASSERT_EQ(output.get_layout().count(), (size_t)64); + ASSERT_EQ(output->count(), (size_t)64); + ASSERT_EQ(output->get_layout().count(), (size_t)64); - ASSERT_EQ(output.size(), ref_data.size() * sizeof(float)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(float)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " index = " << i; @@ -318,12 +312,12 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) { } TEST(quantize_gpu, quantize_levels_3) { - const cldnn::engine& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 16, 2, 2}}); - auto input_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto input_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + cldnn::engine& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 16, 2, 2}}); + auto input_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto input_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -380,7 +374,7 @@ TEST(quantize_gpu, quantize_levels_3) { topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_low), data("input_high", input_high), data("output_low", output_low), @@ -393,14 +387,14 @@ TEST(quantize_gpu, quantize_levels_3) { auto outputs = network.execute(); auto output = outputs.at("quantize").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), ref_data.size()); - ASSERT_EQ(output.get_layout().count(), ref_data.size()); + ASSERT_EQ(output->count(), ref_data.size()); + ASSERT_EQ(output->get_layout().count(), ref_data.size()); // Check that memory physical size consider binary pack - ASSERT_EQ(output.size(), ref_data.size() * sizeof(float)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(float)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " i=" << i; @@ -408,12 +402,12 @@ TEST(quantize_gpu, quantize_levels_3) { } TEST(quantize_gpu, quantize_levels_256_2d_unsigned) { - const cldnn::engine& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 16, 2, 2}}); - auto input_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto input_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + cldnn::engine& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 16, 2, 2}}); + auto input_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto input_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.1f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -471,7 +465,7 @@ TEST(quantize_gpu, quantize_levels_256_2d_unsigned) { topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_low), data("input_high", input_high), data("output_low", output_low), @@ -484,14 +478,14 @@ TEST(quantize_gpu, quantize_levels_256_2d_unsigned) { auto outputs = network.execute(); auto output = outputs.at("quantize").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), ref_data.size()); - ASSERT_EQ(output.get_layout().count(), ref_data.size()); + ASSERT_EQ(output->count(), ref_data.size()); + ASSERT_EQ(output->get_layout().count(), ref_data.size()); // Check that memory physical size consider binary pack - ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint8_t)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint8_t)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " i=" << i; @@ -499,12 +493,12 @@ TEST(quantize_gpu, quantize_levels_256_2d_unsigned) { } TEST(quantize_gpu, quantize_levels_256_3d_unsigned) { - const cldnn::engine& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {1, 16, 2, 1, 2}}); - auto input_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto input_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); - auto output_low = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); - auto output_high = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + cldnn::engine& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfzyx, {1, 16, 2, 1, 2}}); + auto input_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto input_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 16, 1, 1 } }); + auto output_low = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); + auto output_high = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { -1.0f, 2.1f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, @@ -562,7 +556,7 @@ TEST(quantize_gpu, quantize_levels_256_3d_unsigned) { topology topology; topology.add( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("input_low", input_low), data("input_high", input_high), data("output_low", output_low), @@ -576,14 +570,14 @@ TEST(quantize_gpu, quantize_levels_256_3d_unsigned) { auto outputs = network.execute(); auto output = outputs.at("out").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), ref_data.size()); - ASSERT_EQ(output.get_layout().count(), ref_data.size()); + ASSERT_EQ(output->count(), ref_data.size()); + ASSERT_EQ(output->get_layout().count(), ref_data.size()); // Check that memory physical size consider binary pack - ASSERT_EQ(output.size(), ref_data.size() * sizeof(uint8_t)); + ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint8_t)); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_EQ(output_ptr[i], ref_data[i]) << " i=" << i; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reduce_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reduce_gpu_test.cpp index f1b906058ab..77a3e06d40c 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/reduce_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reduce_gpu_test.cpp @@ -2,20 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "api/reduce.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include "test_utils/float16.h" +#include "test_utils.h" + +#include +#include +#include + #include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; template struct accumulator_type { @@ -432,7 +429,7 @@ struct output_data_type { template class ReduceTestBase : public ::testing::TestWithParam { protected: - cldnn::engine engine = get_test_engine(); + cldnn::engine& engine = get_test_engine(); int batch_num, input_f, input_w, input_z, input_y, input_x; cldnn::format input_format = format::any; cldnn::reduce_mode reduce_mode; @@ -480,10 +477,10 @@ public: auto input_size = tensor(batch(batch_num), feature(input_f), spatial(input_x, input_y, input_z, input_w)); auto input_data = generate_random_6d(batch_num, input_f, input_x, input_y, input_z, input_w, 1, 10); auto input_lay = layout(input_dt, layout_format, input_size); - auto input_mem = memory::allocate(engine, input_lay); + auto input_mem = engine.allocate_memory(input_lay); { - auto input_ptr = input_mem.pointer(); + cldnn::mem_lock input_ptr(input_mem, get_test_stream()); for (int fi = 0; fi < input_f; fi++) for (int wi = 0; wi < input_w; wi++) for (int zi = 0; zi < input_z; zi++) @@ -505,7 +502,7 @@ public: if (force_output_dt) { red.output_data_type = output_dt; } - topology.add(input_layout("input", input_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); topology.add(red); build_options options; options.set_option(build_option::optimize_data(true)); @@ -517,8 +514,8 @@ public: network.execute(); auto out_mem = network.get_output("reduce").get_memory(); - auto out_ptr = out_mem.pointer(); - auto out_lay = out_mem.get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); + auto out_lay = out_mem->get_layout(); ASSERT_EQ(out_lay.size.sizes()[0], reference_result.size()); // b ASSERT_EQ(out_lay.size.sizes()[1], reference_result[0].size()); // f @@ -543,11 +540,11 @@ public: std::cout << "Reference value at batch: " << bi << " output_f: " << fi << " y: " << yi << " x: " << xi << " = " << val_ref << " Val = " << val << std::endl; - + EXPECT_TRUE(equal); if (!equal) - break; + break; } } } @@ -752,13 +749,13 @@ INSTANTIATE_TEST_CASE_P(DISABLED_reduce_gpu_ref_f32_f32, general_reduce_gpu::PrintToStringParamName); TEST(reduce_gpu, common_bfyx) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}}); set_values(input, {1.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b}, 0)); network network(engine, topology); @@ -774,7 +771,7 @@ TEST(reduce_gpu, common_bfyx) { std::vector ref_data = {1.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -782,13 +779,13 @@ TEST(reduce_gpu, common_bfyx) { } TEST(reduce_gpu, common_bfyx_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 4, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 4, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_x, cldnn::reduce::along_y}, 1)); network network(engine, topology); @@ -804,7 +801,7 @@ TEST(reduce_gpu, common_bfyx_keepdims) { std::vector ref_data = {6.0f, 22.0f, 38.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -812,13 +809,13 @@ TEST(reduce_gpu, common_bfyx_keepdims) { } TEST(reduce_gpu, regr_bfyx_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1, 3, 2, 2} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 3, 2, 2} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, { cldnn::reduce::along_b, cldnn::reduce::along_x }, 1)); network network(engine, topology); @@ -834,7 +831,7 @@ TEST(reduce_gpu, regr_bfyx_keepdims) { std::vector ref_data = { 1.0f, 5.0f, 9.0f, 13.0f, 17.0f, 21.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -842,13 +839,13 @@ TEST(reduce_gpu, regr_bfyx_keepdims) { } TEST(reduce_gpu, common_bfzyx) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {1, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfzyx, {1, 1, 1, 1, 1}}); set_values(input, {1.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b}, 0)); network network(engine, topology); @@ -864,7 +861,7 @@ TEST(reduce_gpu, common_bfzyx) { std::vector ref_data = {1.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -872,13 +869,13 @@ TEST(reduce_gpu, common_bfzyx) { } TEST(reduce_gpu, common_bfzyx_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {1, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfzyx, {1, 1, 1, 1, 1}}); set_values(input, {1.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b}, 1)); network network(engine, topology); @@ -894,7 +891,7 @@ TEST(reduce_gpu, common_bfzyx_keepdims) { std::vector ref_data = {1.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -902,13 +899,13 @@ TEST(reduce_gpu, common_bfzyx_keepdims) { } TEST(reduce_gpu, common_bfwzyx) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, tensor(format::bfwzyx, {1, 3, 4, 1, 1, 1})}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, tensor(format::bfwzyx, {1, 3, 4, 1, 1, 1})}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_w, cldnn::reduce::along_z, cldnn::reduce::along_y, cldnn::reduce::along_x}, 0)); network network(engine, topology); @@ -924,7 +921,7 @@ TEST(reduce_gpu, common_bfwzyx) { std::vector ref_data = {6.0f, 22.0f, 38.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -932,13 +929,13 @@ TEST(reduce_gpu, common_bfwzyx) { } TEST(reduce_gpu, common_bfwzyx_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, tensor(format::bfwzyx, {1, 3, 4, 1, 1, 1})}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, tensor(format::bfwzyx, {1, 3, 4, 1, 1, 1})}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_f, cldnn::reduce::along_w, cldnn::reduce::along_z}, 1)); network network(engine, topology); @@ -954,7 +951,7 @@ TEST(reduce_gpu, common_bfwzyx_keepdims) { std::vector ref_data = {66.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -962,14 +959,14 @@ TEST(reduce_gpu, common_bfwzyx_keepdims) { } TEST(reduce_gpu, common_bfwzyx_max_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 4, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 4, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::max, {cldnn::reduce::along_b, cldnn::reduce::along_f}, 1)); network network(engine, topology); @@ -985,7 +982,7 @@ TEST(reduce_gpu, common_bfwzyx_max_keepdims) { std::vector ref_data = {20.0f, 21.0f, 22.0f, 23.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -993,13 +990,13 @@ TEST(reduce_gpu, common_bfwzyx_max_keepdims) { } TEST(reduce_gpu, common_bfwzyx_min) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::min, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1015,7 +1012,7 @@ TEST(reduce_gpu, common_bfwzyx_min) { std::vector ref_data = {0.0f, 3.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1023,13 +1020,13 @@ TEST(reduce_gpu, common_bfwzyx_min) { } TEST(reduce_gpu, common_bfwzyx_min_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::min, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1045,7 +1042,7 @@ TEST(reduce_gpu, common_bfwzyx_min_keepdims) { std::vector ref_data = {0.0f, 3.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1053,13 +1050,13 @@ TEST(reduce_gpu, common_bfwzyx_min_keepdims) { } TEST(reduce_gpu, common_bfwzyx_mean) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::mean, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1075,7 +1072,7 @@ TEST(reduce_gpu, common_bfwzyx_mean) { std::vector ref_data = {1.0f, 4.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1083,13 +1080,13 @@ TEST(reduce_gpu, common_bfwzyx_mean) { } TEST(reduce_gpu, common_bfwzyx_mean_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::mean, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1105,7 +1102,7 @@ TEST(reduce_gpu, common_bfwzyx_mean_keepdims) { std::vector ref_data = {1.0f, 4.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1113,13 +1110,13 @@ TEST(reduce_gpu, common_bfwzyx_mean_keepdims) { } TEST(reduce_gpu, common_bfwzyx_prod) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::prod, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1135,7 +1132,7 @@ TEST(reduce_gpu, common_bfwzyx_prod) { std::vector ref_data = {0.0f, 60.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1143,13 +1140,13 @@ TEST(reduce_gpu, common_bfwzyx_prod) { } TEST(reduce_gpu, common_bfwzyx_prod_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::prod, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1165,7 +1162,7 @@ TEST(reduce_gpu, common_bfwzyx_prod_keepdims) { std::vector ref_data = {0.0f, 60.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1173,14 +1170,14 @@ TEST(reduce_gpu, common_bfwzyx_prod_keepdims) { } TEST(reduce_gpu, common_bfwzyx_sum_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 4, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 4, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum, {cldnn::reduce::along_b, cldnn::reduce::along_f}, 1)); network network(engine, topology); @@ -1196,7 +1193,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_keepdims) { std::vector ref_data = {60.0f, 66.0f, 72.0f, 78.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1204,13 +1201,13 @@ TEST(reduce_gpu, common_bfwzyx_sum_keepdims) { } TEST(reduce_gpu, common_bfwzyx_logical_and) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::logical_and, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1226,7 +1223,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_and) { std::vector ref_data = {0, 1}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1234,13 +1231,13 @@ TEST(reduce_gpu, common_bfwzyx_logical_and) { } TEST(reduce_gpu, common_bfwzyx_logical_and_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::logical_and, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1256,7 +1253,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_and_keepdims) { std::vector ref_data = {0, 1}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1264,13 +1261,13 @@ TEST(reduce_gpu, common_bfwzyx_logical_and_keepdims) { } TEST(reduce_gpu, common_bfwzyx_logical_or) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::logical_or, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1286,7 +1283,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_or) { std::vector ref_data = {1, 1}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1294,13 +1291,13 @@ TEST(reduce_gpu, common_bfwzyx_logical_or) { } TEST(reduce_gpu, common_bfwzyx_logical_or_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::logical_or, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1316,7 +1313,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_or_keepdims) { std::vector ref_data = {1, 1}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1324,13 +1321,13 @@ TEST(reduce_gpu, common_bfwzyx_logical_or_keepdims) { } TEST(reduce_gpu, common_bfwzyx_sum_square) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum_square, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1346,7 +1343,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_square) { std::vector ref_data = {5.0f, 50.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1354,13 +1351,13 @@ TEST(reduce_gpu, common_bfwzyx_sum_square) { } TEST(reduce_gpu, common_bfwzyx_sum_square_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::sum_square, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1376,7 +1373,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_square_keepdims) { std::vector ref_data = {5.0f, 50.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1384,13 +1381,13 @@ TEST(reduce_gpu, common_bfwzyx_sum_square_keepdims) { } TEST(reduce_gpu, common_bfwzyx_l1) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::l1, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1406,7 +1403,7 @@ TEST(reduce_gpu, common_bfwzyx_l1) { std::vector ref_data = {3.0f, 12.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1414,13 +1411,13 @@ TEST(reduce_gpu, common_bfwzyx_l1) { } TEST(reduce_gpu, common_bfwzyx_l1_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::l1, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1436,7 +1433,7 @@ TEST(reduce_gpu, common_bfwzyx_l1_keepdims) { std::vector ref_data = {3.0f, 12.0f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1444,13 +1441,13 @@ TEST(reduce_gpu, common_bfwzyx_l1_keepdims) { } TEST(reduce_gpu, common_bfwzyx_l2) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::l2, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1466,7 +1463,7 @@ TEST(reduce_gpu, common_bfwzyx_l2) { std::vector ref_data = {2.236067977f, 7.071067812f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1474,13 +1471,13 @@ TEST(reduce_gpu, common_bfwzyx_l2) { } TEST(reduce_gpu, common_bfwzyx_l2_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, -2.0f, 3.0f, 4.0f, -5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::l2, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1496,7 +1493,7 @@ TEST(reduce_gpu, common_bfwzyx_l2_keepdims) { std::vector ref_data = {2.236067977f, 7.071067812f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1504,13 +1501,13 @@ TEST(reduce_gpu, common_bfwzyx_l2_keepdims) { } TEST(reduce_gpu, common_bfwzyx_log_sum) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::log_sum, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1526,7 +1523,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum) { std::vector ref_data = {1.0986122887f, 2.4849066498f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1534,13 +1531,13 @@ TEST(reduce_gpu, common_bfwzyx_log_sum) { } TEST(reduce_gpu, common_bfwzyx_log_sum_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::log_sum, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1556,7 +1553,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_keepdims) { std::vector ref_data = {1.0986122887f, 2.4849066498f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1564,13 +1561,13 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_keepdims) { } TEST(reduce_gpu, common_bfwzyx_log_sum_exp) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::log_sum_exp, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 0)); network network(engine, topology); @@ -1586,7 +1583,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp) { std::vector ref_data = {2.407605964f, 5.407605964f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1594,13 +1591,13 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp) { } TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}}); set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", "input", reduce_mode::log_sum_exp, {cldnn::reduce::along_f, cldnn::reduce::along_w}, 1)); network network(engine, topology); @@ -1616,7 +1613,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) { std::vector ref_data = {2.407605964f, 5.407605964f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < ref_data.size(); ++i) { EXPECT_TRUE(are_equal(ref_data[i], output_ptr[i])); @@ -1626,7 +1623,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) { template class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParam { protected: - cldnn::engine engine = get_test_engine(); + cldnn::engine& engine = get_test_engine(); int batch_num, input_f, input_w, input_z, input_y, input_x; cldnn::format input_format = format::any; cldnn::reduce_mode reduce_mode; @@ -1675,10 +1672,11 @@ public: auto input_size = tensor(batch(batch_num), feature(input_f), spatial(input_x, input_y, input_z, input_w)); auto input_data = generate_random_6d(batch_num, input_f, input_x, input_y, input_z, input_w, 1, 5, 9); auto input_lay = layout(input_dt, layout_format, input_size); - auto input_mem = memory::allocate(engine, input_lay); + auto input_mem = engine.allocate_memory(input_lay); { - auto input_ptr = input_mem.pointer(); + cldnn::mem_lock input_ptr(input_mem, get_test_stream()); + for (int fi = 0; fi < input_f; fi++) for (int wi = 0; wi < input_w; wi++) for (int zi = 0; zi < input_z; zi++) @@ -1718,7 +1716,7 @@ public: if (force_output_dt) { red.output_data_type = output_dt; } - topology.add(input_layout("input", input_mem.get_layout())); + topology.add(input_layout("input", input_mem->get_layout())); topology.add(red); build_options options; options.set_option(build_option::optimize_data(true)); @@ -1730,8 +1728,8 @@ public: network.execute(); auto out_mem = network.get_output("reduce").get_memory(); - auto out_ptr = out_mem.pointer(); - auto out_lay = out_mem.get_layout(); + cldnn::mem_lock out_ptr(out_mem, get_test_stream()); + auto out_lay = out_mem->get_layout(); ASSERT_EQ(out_lay.size.sizes()[0], reference_result.size()); // b ASSERT_EQ(out_lay.size.sizes()[1], reference_result[0].size()); // f diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/region_yolo_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/region_yolo_gpu_test.cpp index 62d02ef2abd..e1d5d9b2a71 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/region_yolo_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/region_yolo_gpu_test.cpp @@ -2,18 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -186,26 +184,26 @@ namespace internal template static void runRegionTest(internal::region_yolo_test_params& params) { - engine eng; + auto& engine = get_test_engine(); const tensor kInputTensor(params.tensor[0], params.tensor[1], params.tensor[2], params.tensor[3]); auto inputData = generate_random_1d(params.tensor[0] * params.tensor[1] * params.tensor[2] * params.tensor[3], -1, 1); - auto inputPrim = memory::allocate(eng, { params.dataType, format::bfyx, kInputTensor }); + auto inputPrim = engine.allocate_memory({ params.dataType, format::bfyx, kInputTensor }); set_values(inputPrim, inputData); topology topology; - topology.add(input_layout("InputData", inputPrim.get_layout())); + topology.add(input_layout("InputData", inputPrim->get_layout())); topology.add(reorder("reorder_pre", "InputData", params.fmt, params.dataType)); topology.add(region_yolo("region_yolo", "reorder_pre", params.coords, params.classes, params.regionNum, static_cast(params.mask.size()), params.softMax)); topology.add(reorder("reorder_post", "region_yolo", format::bfyx, params.dataType)); - network network(eng, topology); + network network(engine, topology); network.set_input_data("InputData", inputPrim); auto outputs = network.execute(); auto output = outputs.at("reorder_post").get_memory(); - auto outputData = output.pointer(); + cldnn::mem_lock outputData(output, get_test_stream()); /// reference value std::vector refOutputData(inputData.size()); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/removing_output_node_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/removing_output_node_test.cpp index 65e1c9f0ec0..f8151686b50 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/removing_output_node_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/removing_output_node_test.cpp @@ -2,22 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include +#include "test_utils.h" -#include -#include -#include -#include -#include - -#include "test_utils/test_utils.h" +#include +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace testing; TEST(removing_output_node, multiple_outputs) { @@ -28,7 +22,7 @@ TEST(removing_output_node, multiple_outputs) { // |_ // reshape(bfyx); - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 1; auto x_size = 1; @@ -40,10 +34,10 @@ TEST(removing_output_node, multiple_outputs) { tensor after_strided_slice = tensor(spatial(y_size, feature_num), feature(batch_num), batch(1)); tensor after_reshape = tensor(feature(batch_num * feature_num * y_size * x_size)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, initial_shape }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, initial_shape }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(begin, { 1, 0, 1, 0 @@ -56,7 +50,7 @@ TEST(removing_output_node, multiple_outputs) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(shuffle_channels("shuffle_channels", "input", group, axis)); topology.add(reshape("reshape", "shuffle_channels", after_reshape)); topology.add(data("input2", begin)); @@ -76,9 +70,9 @@ TEST(removing_output_node, multiple_outputs) { auto outputs = network.execute(); auto output = outputs.at("reshape").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - ASSERT_TRUE(output.get_layout().size == after_reshape); + ASSERT_TRUE(output->get_layout().size == after_reshape); for (size_t i = 0; i < out_vec.size(); i++) EXPECT_EQ(output_ptr[i], out_vec[i]); @@ -86,9 +80,9 @@ TEST(removing_output_node, multiple_outputs) { // checking the output node has the same name after output node deleting due to StridedSlice optimization ASSERT_TRUE(outputs.find("strided_slice") != outputs.end()); auto output2 = outputs.at("strided_slice").get_memory(); - auto output_ptr2 = output.pointer(); + cldnn::mem_lock output_ptr2(output, get_test_stream()); - ASSERT_TRUE(output2.get_layout().size == after_strided_slice); + ASSERT_TRUE(output2->get_layout().size == after_strided_slice); for (size_t i = 0; i < out_vec.size(); i++) EXPECT_EQ(output_ptr2[i], out_vec[i]); @@ -114,10 +108,10 @@ TEST(removing_output_node, output_node_optimization) { // 21 28 39 // 18 20 20 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ 1, 1, 5, 4 } }); - auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32,format::yxfb,{ 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); @@ -126,7 +120,7 @@ TEST(removing_output_node, output_node_optimization) { { 17.0f, 19.0f, 19.0f } }; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights", weights)); topology.add(convolution("conv", "input", { "weights" }, { 1,1,1,2 })); topology.add(activation("relu", "conv", activation_func::relu)); @@ -140,8 +134,8 @@ TEST(removing_output_node, output_node_optimization) { EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp index f2c80c3b061..6f0b6a3dfc1 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp @@ -3,31 +3,27 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/reorder.hpp" -#include "api/crop.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" -#include + +#include "test_utils.h" + +#include +#include +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/primitives/crop.hpp" +#include #include -#include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace testing; static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, const data_types input_data_type, const data_types output_data_type, cldnn::format input_format, cldnn::format output_format, int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in = 0, int32_t w_in = 0) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); tensor ts; if (input_format.dimension() == 4) { @@ -40,11 +36,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, ts = { b_in, f_in, x_in, y_in, z_in, w_in }; } - auto input = memory::allocate(engine, { input_data_type, input_format, ts }); + auto input = engine.allocate_memory({ input_data_type, input_format, ts }); layout output_layout(output_data_type, output_format, ts); if (input_data_type == data_types::i8) { - auto input_ptr = input.pointer(); + mem_lock input_ptr{input, get_test_stream()}; unsigned char i = 1; for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it) { @@ -54,7 +50,7 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, } } } else { - auto input_ptr = input.pointer(); + mem_lock input_ptr{input, get_test_stream()}; float i = 1.f; for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it) { @@ -64,7 +60,7 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, } topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); // run on reference(reorder_data) kernel @@ -78,11 +74,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, std::map outputs_ref; outputs_ref = network_ref.execute(); - cldnn::event e1 = outputs_ref.at("reorder").get_event(); - e1.wait(); + cldnn::event::ptr e1 = outputs_ref.at("reorder").get_event(); + e1->wait(); auto output_ref = outputs_ref.begin()->second.get_memory(); - auto output_ref_ptr = output_ref.pointer(); + mem_lock output_ref_ptr{output_ref, get_test_stream()}; // run on optimized kernel cldnn::build_options options; @@ -95,11 +91,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, std::map outputs; outputs = network.execute(); - cldnn::event e2 = outputs.at("reorder").get_event(); - e2.wait(); + cldnn::event::ptr e2 = outputs.at("reorder").get_event(); + e2->wait(); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + mem_lock output_ptr{output, get_test_stream()}; // compare results const size_t output_size = output_ref_ptr.size(); @@ -206,17 +202,17 @@ TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_differen } TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int32_t b_in = 1; const int32_t f_in = 8 * 4; const int32_t y_in = 4; const int32_t x_in = 8 * 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { b_in,f_in,x_in,y_in } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { b_in,f_in,x_in,y_in } }); layout output_layout(data_types::f32, format::b_fs_yx_fsv16, { b_in,f_in,x_in,y_in }); // Set incremental input value - auto input_ptr = input.pointer(); + mem_lock input_ptr{input, get_test_stream()}; float i = 0.f; for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it) { @@ -224,7 +220,7 @@ TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) { } topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -235,7 +231,7 @@ TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) { EXPECT_EQ(outputs.begin()->first, "reorder"); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + mem_lock output_ptr{output, get_test_stream()}; auto get_fsv16_index = [](int32_t /* b_size */, int32_t f_size, int32_t y_size, int32_t x_size, int32_t b, int32_t f, int32_t y, int32_t x) { @@ -291,9 +287,9 @@ TEST(reorder_gpu_f32, basic) { // b1 f1: 12 8 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::bfyx,{ 2,2,2,2 }); set_values(input, { @@ -311,7 +307,7 @@ TEST(reorder_gpu_f32, basic) { }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -337,7 +333,7 @@ TEST(reorder_gpu_f32, basic) { 12.0f, 8.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -376,11 +372,11 @@ TEST(reorder_gpu_f32, basic_subtract) { // b1 f1: 10 7 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); layout output_layout( data_types::f32, format::bfyx, {2,2,2,2} ); - auto subtract = memory::allocate(engine, { data_types::f32, format::byxf, { 1, 2, 2, 2 } }); + auto subtract = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 2, 2, 2 } }); set_values(input, { 1.f, 0.f, @@ -402,8 +398,8 @@ TEST(reorder_gpu_f32, basic_subtract) { }); topology topology( - input_layout("input", input.get_layout()), - input_layout("subtract", subtract.get_layout()), + input_layout("input", input->get_layout()), + input_layout("subtract", subtract->get_layout()), reorder("reorder", "input", output_layout, "subtract")); network network(engine, topology); @@ -429,7 +425,7 @@ TEST(reorder_gpu_f32, basic_subtract) { 10.0f, 7.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -465,9 +461,9 @@ TEST(reorder_gpu_f32, basic_subtract_value) { // b1 f1: 9.5 5.5 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::bfyx,{ 2,2,2,2 }); std::vector subtract_val = { 0.5, 2.5 }; @@ -486,7 +482,7 @@ TEST(reorder_gpu_f32, basic_subtract_value) { }); topology topology; - topology.add(input_layout("input", input.get_layout()), reorder("reorder", "input", output_layout, subtract_val)); + topology.add(input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout, subtract_val)); network network(engine, topology); network.set_input_data("input", input); @@ -510,7 +506,7 @@ TEST(reorder_gpu_f32, basic_subtract_value) { 9.5f, 5.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_TRUE(are_equal(answers[i], output_ptr[i])); @@ -549,18 +545,18 @@ TEST(reorder_gpu_f16, basic_subtract_f32_output_f32) { // b1 f1: 10 7 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; } - auto input = memory::allocate(engine, { data_types::f16, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb, { 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::bfyx,{ 2,2,2,2 }); - auto subtract = memory::allocate(engine, { data_types::f32, format::byxf, { 1, 2, 2, 2 } }); + auto subtract = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 2, 2, 2 } }); set_values(input, { half_t(1.f), half_t(0.f), @@ -582,7 +578,7 @@ TEST(reorder_gpu_f16, basic_subtract_f32_output_f32) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("subtract", subtract)); topology.add(reorder("reorder", "input", output_layout, "subtract")); @@ -608,7 +604,7 @@ TEST(reorder_gpu_f16, basic_subtract_f32_output_f32) { 10.0f, 7.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_TRUE(are_equal(answers[i], output_ptr[i])); @@ -644,15 +640,15 @@ TEST(reorder_gpu_f16, basic_subtract_value) { // b1 f1: 9.5 5.5 // - const auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) + auto& engine = get_test_engine(); + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); return; } - auto input = memory::allocate(engine, { data_types::f16, format::yxfb, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb, { 2, 2, 2, 2 } }); layout output_layout(data_types::f16, format::bfyx,{ 2,2,2,2 }); std::vector subtract_val = { 0.5, 2.5 }; @@ -671,7 +667,7 @@ TEST(reorder_gpu_f16, basic_subtract_value) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reorder("reorder", "input", output_layout, subtract_val)); network network(engine, topology); @@ -696,7 +692,7 @@ TEST(reorder_gpu_f16, basic_subtract_value) { half_t(9.5f), half_t(5.5f) }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_TRUE(are_equal(static_cast(answers[i]), static_cast(output_ptr[i]))); @@ -713,9 +709,9 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { // Output is expected to contain the same value as input in range of indices from 0x0000 to 0xF801. // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); @@ -736,14 +732,14 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { expected_values[0xF802] = half_t(0x8000, 0); // -0 expected_values[0xF803] = half_t(0xFC12, 0); // A NaN (sample: -NaN.0x12). - auto input = memory::allocate(engine, { data_types::f16, format::yxfb, { 1, static_cast(expected_values.size()) / 4, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb, { 1, static_cast(expected_values.size()) / 4, 2, 2 } }); layout interm_layout( data_types::f32, format::byxf, { 1, static_cast(expected_values.size()) / 4, 2, 2 }); - auto output_layout = input.get_layout(); + auto output_layout = input->get_layout(); set_values(input, expected_values); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reorder("reorder_f16_f32", "input", interm_layout)); topology.add(reorder("reorder_f32_f16", "reorder_f16_f32", output_layout)); @@ -762,7 +758,7 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { EXPECT_TRUE(outputs.find("reorder_f32_f16") != outputs.end()); auto interm = outputs.at("reorder_f16_f32").get_memory(); - auto interm_ptr = interm.pointer(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); // Sample positive. EXPECT_TRUE(are_equal(interm_ptr[0x3400], 0.25f)); @@ -783,7 +779,7 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { EXPECT_TRUE(std::isnan(interm_ptr[0xF803])); auto output = outputs.at("reorder_f32_f16").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 0xF802; ++i) // NOTE: do not test for possibly ambiguous values of floating point (-0, NaNs). { EXPECT_TRUE(are_equal(static_cast(expected_values[i]), static_cast(output_ptr[i]))); @@ -792,14 +788,14 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { TEST(reorder_gpu, basic_convert_int8) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); layout in_layout = { type_to_data_type::value,format::byxf,{ 1,1,3,3 } }; layout byte_layout = { type_to_data_type::value, format::bfyx,{ 1,1,3,3 } }; std::initializer_list input_f = { 1.0f, -2.5f, 3.1f, -4.0f, 5.03f, -6.99f, 7.0f, -8.0f, 9.0f }; std::list final_results = { 1.0f, -3.0f, 3.0f, -4.0f, 5.0f, -7.0f, 7.0f, -8.0f, 9.0f }; // Allocate memory for input image. - auto input_memory = memory::allocate(engine, in_layout); + auto input_memory = engine.allocate_memory(in_layout); set_values(input_memory, input_f); // Create input_layout description @@ -830,7 +826,7 @@ TEST(reorder_gpu, basic_convert_int8) { auto outputs = network.execute(); auto interm = outputs.at("reorder2").get_memory(); - auto interm_ptr = interm.pointer(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); unsigned int cntr = 0; for (const auto& exp : final_results) { @@ -849,9 +845,9 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { // const int kernel_size = 5; const int feature_size = 4; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - if (!engine.get_info().supports_fp16) + if (!engine.get_device_info().supports_fp16) { std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl; EXPECT_EQ(1, 1); @@ -869,7 +865,7 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { layout output_layout = { type_to_data_type::value, format::bfyx, {1,4,kernel_size,kernel_size } }; // Allocate memory for input image. - auto input_memory = memory::allocate(engine, in_layout); + auto input_memory = engine.allocate_memory(in_layout); set_values(input_memory, input_i8); // Create input_layout description @@ -916,8 +912,8 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { EXPECT_TRUE(outputs.find("crop") != outputs.end()); auto interm = outputs.at("reorder_input").get_memory(); - auto interm_ptr = interm.pointer(); - auto interm_size = outputs.at("reorder_input").get_memory().count(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); + auto interm_size = outputs.at("reorder_input").get_memory()->count(); EXPECT_EQ(interm_size,(size_t) (1*feature_size*kernel_size*kernel_size)); // Sample positive. @@ -939,8 +935,8 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { } auto output = outputs.at("crop").get_memory(); - auto output_ptr = output.pointer(); - auto output_size = outputs.at("crop").get_memory().count(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + auto output_size = output->count(); EXPECT_EQ(output_size,(size_t) (1 * (feature_size-1)*kernel_size*kernel_size)); for (target_index = 0; target_index < output_size; target_index++) @@ -980,9 +976,9 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding) // f1: b0: 5 6 b1: 1.5 5.2 // f1: b0: 7 8 b1: 12 8 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::bfyx, { 2,2,2,2 }); set_values(input, { @@ -1000,8 +996,8 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding) }); topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().format, input.get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 1, 2 }, 0 }), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().format, input->get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 1, 2 }, 0 }), reorder("reorder2", "reorder", output_layout)); network network(engine, topology); @@ -1026,7 +1022,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding) 1.5f, 5.2f, 12.0f, 8.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1059,9 +1055,9 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding) // b1 f1: 12 8 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::yxfb, { 2,2,2,2 }); set_values(input, { @@ -1079,8 +1075,8 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding) }); topology topology( - input_layout("input", input.get_layout()), - reorder("reorder", "input", input.get_layout().format, input.get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 2, 1 }, 0 }), + input_layout("input", input->get_layout()), + reorder("reorder", "input", input->get_layout().format, input->get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 2, 1 }, 0 }), reorder("reorder2", "reorder", output_layout)); network network(engine, topology); @@ -1106,7 +1102,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding) 8.f, 8.f }; std::vector out; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { out.push_back(output_ptr[i]); @@ -1120,9 +1116,9 @@ TEST(reorder_gpu_f32, basic_bfyx_to_bfzyx) // Input : bfyx:2x2x2x2 // Output : bfzyx:2x2x1X2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); set_values(input, { 1.f, 0.f, @@ -1139,7 +1135,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_bfzyx) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", format::bfzyx, data_types::f32)); network network(engine, topology); @@ -1150,8 +1146,8 @@ TEST(reorder_gpu_f32, basic_bfyx_to_bfzyx) EXPECT_EQ(outputs.begin()->first, "reorder"); auto output = outputs.begin()->second.get_memory(); - EXPECT_TRUE(output.get_layout().format == format::bfzyx); - auto sizes = output.get_layout().size; + EXPECT_TRUE(output->get_layout().format == format::bfzyx); + auto sizes = output->get_layout().size; EXPECT_TRUE(sizes.batch[0] == 2); EXPECT_TRUE(sizes.feature[0] == 2); EXPECT_TRUE(sizes.spatial[0] == 2); @@ -1172,7 +1168,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_bfzyx) 8.f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1184,9 +1180,9 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx) // Input : yxfb:2x2x2x2 // Output : bfzyx:2x2x1X2x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); set_values(input, { 1.f, 0.f, @@ -1203,7 +1199,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", format::bfzyx, data_types::f32)); network network(engine, topology); @@ -1214,8 +1210,8 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx) EXPECT_EQ(outputs.begin()->first, "reorder"); auto output = outputs.begin()->second.get_memory(); - EXPECT_TRUE(output.get_layout().format == format::bfzyx); - auto sizes = output.get_layout().size; + EXPECT_TRUE(output->get_layout().format == format::bfzyx); + auto sizes = output->get_layout().size; EXPECT_TRUE(sizes.batch[0] == 2); EXPECT_TRUE(sizes.feature[0] == 2); EXPECT_TRUE(sizes.spatial[0] == 2); @@ -1236,7 +1232,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx) 12.0f, 8.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1248,9 +1244,9 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx) // Input : bfzyx:2x2x2x2x2 // Output : bfyx:2x2x4x2 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 2, 2, 2, 2 } }); set_values(input, { 1.f, 0.f, @@ -1279,7 +1275,7 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", format::bfyx, data_types::f32)); network network(engine, topology); @@ -1290,8 +1286,8 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx) EXPECT_EQ(outputs.begin()->first, "reorder"); auto output = outputs.begin()->second.get_memory(); - EXPECT_TRUE(output.get_layout().format == format::bfyx); - auto sizes = output.get_layout().size; + EXPECT_TRUE(output->get_layout().format == format::bfyx); + auto sizes = output->get_layout().size; EXPECT_TRUE(sizes.batch[0] == 2); EXPECT_TRUE(sizes.feature[0] == 2); EXPECT_TRUE(sizes.spatial[0] == 2); @@ -1324,7 +1320,7 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx) 8.f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]); @@ -1333,11 +1329,11 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx) TEST(reorder_gpu_opt, basic_remove_redundant) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } }); + memory::ptr in = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), reorder("r1", "in", format::bfyx, data_types::f32), reorder("r2", "r1", format::yxfb, data_types::f32) }; @@ -1345,26 +1341,26 @@ TEST(reorder_gpu_opt, basic_remove_redundant) build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); EXPECT_TRUE(executed_primitives.count("r1") == 0); ASSERT_TRUE(outputs.count("r2") == 1); - EXPECT_TRUE(outputs.at("r2").get_memory().get_layout().format == format::yxfb); + EXPECT_TRUE(outputs.at("r2").get_memory()->get_layout().format == format::yxfb); } TEST(reorder_gpu_opt, remove_redundant_activation_fuse) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 1, 2, 1 } }); + memory::ptr in = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 1, 2, 1 } }); set_values(in, { -1.0f, -1.0f }); - memory scale_mem = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{1, 1, 1, 1 } }); + memory::ptr scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{1, 1, 1, 1 } }); set_values(scale_mem, { 2.0f }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), reorder("r1", "in", format::bfyx, data_types::f32), activation("relu", "r1", activation_func::relu_negative_slope, {0.01f, 0.0f}), data("scale_data", scale_mem), @@ -1374,22 +1370,22 @@ TEST(reorder_gpu_opt, remove_redundant_activation_fuse) build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); - auto out_ptr = outputs.begin()->second.get_memory().pointer(); + cldnn::mem_lock out_ptr(outputs.begin()->second.get_memory(), get_test_stream()); EXPECT_FLOAT_EQ(out_ptr[0], -0.02f); EXPECT_FLOAT_EQ(out_ptr[1], -0.02f); } TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::f32, format::yxfb, tensor{ 1, 2, 2, 1 } }); - memory weights = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } }); + memory::ptr in = engine.allocate_memory({ data_types::f32, format::yxfb, tensor{ 1, 2, 2, 1 } }); + memory::ptr weights = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), convolution("conv", "in",{ "weights" }), data("weights", weights), reorder("r1", "conv", format::bfyx, data_types::f32) //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case) @@ -1401,23 +1397,23 @@ TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders) opts.set_option(build_option::outputs({ "r1" })); opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); EXPECT_TRUE(outputs.count("conv") == 0); ASSERT_TRUE(outputs.count("r1") == 1); - EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx); + EXPECT_TRUE(outputs.at("r1").get_memory()->get_layout().format == format::bfyx); } TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::f32, format::yxfb, tensor{ 1, 2, 2, 1 } }); - memory weights = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } }); + memory::ptr in = engine.allocate_memory({ data_types::f32, format::yxfb, tensor{ 1, 2, 2, 1 } }); + memory::ptr weights = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), convolution("conv", "in",{ "weights" }), data("weights", weights), reorder("r1", "conv", format::bfyx, data_types::f32), //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case) @@ -1427,7 +1423,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders) build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); @@ -1436,16 +1432,16 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders) EXPECT_TRUE(executed_primitives.count("r1") == 0); //all pirmitives in this test needs to be executed ASSERT_TRUE(outputs.count("output") == 1); - EXPECT_TRUE(outputs.at("output").get_memory().get_layout().format == format::bfyx); + EXPECT_TRUE(outputs.at("output").get_memory()->get_layout().format == format::bfyx); } TEST(reorder_gpu_opt, non_trivial_remove_redundant) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::f32, format::yxfb, tensor{ 1, 1, 5, 2 } }); + memory::ptr in = engine.allocate_memory({ data_types::f32, format::yxfb, tensor{ 1, 1, 5, 2 } }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), reorder("r1", "in", format::bfyx, data_types::f32) }; @@ -1453,7 +1449,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto executed_primitives = net.get_executed_primitives(); @@ -1463,15 +1459,15 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) //ASSERT_TRUE(all_primitives.at("r1") == "_optimized_"); EXPECT_TRUE(executed_primitives.at("in") != outputs.at("r1").get_event()); ASSERT_TRUE(outputs.count("r1") == 1); - EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx); + EXPECT_TRUE(outputs.at("r1").get_memory()->get_layout().format == format::bfyx); } TEST(reorder_gpu_opt, mean_mul) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::i8, format::bfyx, tensor{ 1, 3, 1, 2 } }); - memory mul = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{1, 3, 1, 2 } }); + memory::ptr in = engine.allocate_memory({ data_types::i8, format::bfyx, tensor{ 1, 3, 1, 2 } }); + memory::ptr mul = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{1, 3, 1, 2 } }); set_values(in, { 1, 2, @@ -1481,7 +1477,7 @@ TEST(reorder_gpu_opt, mean_mul) { 0.5f, 2.5f, -5.0f, 4.3f, 1.2f, -3.5f }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), data("mul",mul), reorder("r1", "in", format::bfyx, data_types::f32,"mul", reorder_mean_mode::mul) }; @@ -1489,12 +1485,12 @@ TEST(reorder_gpu_opt, mean_mul) float answers[] = { 0.5f, 5.0f, -15.0f, 17.2f, 6.0f, -21.0f }; build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto output = outputs.begin()->second.get_memory(); - auto ptr = output.pointer(); + cldnn::mem_lock ptr(output, get_test_stream()); float* a_ptr = answers; for (auto& val : ptr) EXPECT_FLOAT_EQ(*(a_ptr++), val);; @@ -1503,10 +1499,10 @@ TEST(reorder_gpu_opt, mean_mul) TEST(reorder_gpu_opt, mean_div) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::i8, format::bfyx, tensor{ 1, 3, 1, 2 } }); - memory mul = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } }); + memory::ptr in = engine.allocate_memory({ data_types::i8, format::bfyx, tensor{ 1, 3, 1, 2 } }); + memory::ptr mul = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } }); set_values(in, { 1, 2, @@ -1516,7 +1512,7 @@ TEST(reorder_gpu_opt, mean_div) { 0.5f, 2.0f, -3.0f, 8.0f, 1.25f, -3.0f }); topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), data("mul",mul), reorder("r1", "in", format::bfyx, data_types::f32,"mul", reorder_mean_mode::div) }; @@ -1524,12 +1520,12 @@ TEST(reorder_gpu_opt, mean_div) float answers[] = { 2.0f, 1.0f, -1.0f, 0.5f, 4.0f, -2.0f }; build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto output = outputs.begin()->second.get_memory(); - auto ptr = output.pointer(); + cldnn::mem_lock ptr(output, get_test_stream()); float* a_ptr = answers; for (auto& val : ptr) EXPECT_FLOAT_EQ(*(a_ptr++), val);; @@ -1538,9 +1534,9 @@ TEST(reorder_gpu_opt, mean_div) TEST(reorder_gpu_opt, mean_mul_val) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::i8, format::bfyx, tensor{ 1, 3, 1, 2 } }); + memory::ptr in = engine.allocate_memory({ data_types::i8, format::bfyx, tensor{ 1, 3, 1, 2 } }); set_values(in, { 1, 2, @@ -1548,19 +1544,19 @@ TEST(reorder_gpu_opt, mean_mul_val) 5, 60 }); std::vector mul_val = { 2.0f, 0.5f, 10.0f }; topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), reorder("r1", "in", format::bfyx, data_types::f32, mul_val, reorder_mean_mode::mul) }; float answers[] = { 2.0f, 4.0f, 1.5f, 2.0f, 50.0f, 600.0f }; build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto output = outputs.begin()->second.get_memory(); - auto ptr = output.pointer(); + cldnn::mem_lock ptr(output, get_test_stream()); float* a_ptr = answers; for (auto& val : ptr) EXPECT_FLOAT_EQ(*(a_ptr++), val);; @@ -1568,9 +1564,9 @@ TEST(reorder_gpu_opt, mean_mul_val) TEST(reorder_gpu_opt, mean_mul_val_float_to_int) { - engine eng; + auto& engine = get_test_engine(); - memory in = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } }); + memory::ptr in = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } }); set_values(in, { 0.6f, 1.5f, @@ -1578,19 +1574,19 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int) 5.0f, 60.0f }); std::vector mul_val = { 1.4f, 0.5f, 5.0f }; topology tpl{ - input_layout("in", in.get_layout()), + input_layout("in", in->get_layout()), reorder("r1", "in", format::bfyx, data_types::i8, mul_val, reorder_mean_mode::mul) }; char answers[] = { 1, 2, 2, 2, 25, 127 }; build_options opts; opts.set_option(build_option::optimize_data(true)); - network net(eng, tpl, opts); + network net(engine, tpl, opts); net.set_input_data("in", in); auto outputs = net.execute(); auto output = outputs.begin()->second.get_memory(); - auto ptr = output.pointer(); + cldnn::mem_lock ptr(output, get_test_stream()); char* a_ptr = answers; for (auto& val : ptr) EXPECT_EQ(*(a_ptr++), val); @@ -1599,9 +1595,9 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int) TEST(reorder_gpu_i32, basic) { // Test for converting data types f32->i32 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); layout output_layout(data_types::i32, format::bfyx, { 2,2,2,2 }); set_values(input, { @@ -1612,7 +1608,7 @@ TEST(reorder_gpu_i32, basic) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -1632,7 +1628,7 @@ TEST(reorder_gpu_i32, basic) }; int32_t* a_ptr = answers; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto& val : output_ptr) EXPECT_EQ(*(a_ptr++), val); } @@ -1640,9 +1636,9 @@ TEST(reorder_gpu_i32, basic) TEST(reorder_gpu_i64, basic) { // Test for converting data types f32->i64 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); layout output_layout(data_types::i64, format::bfyx, { 2,2,2,2 }); set_values(input, { @@ -1653,7 +1649,7 @@ TEST(reorder_gpu_i64, basic) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -1673,19 +1669,19 @@ TEST(reorder_gpu_i64, basic) }; int64_t* a_ptr = answers; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (auto& val : output_ptr) EXPECT_EQ(*(a_ptr++), val); } TEST(reorder_gpu_binary, binary_output) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); layout output_layout(data_types::bin, format::b_fs_yx_32fp, { 2, 2, 2, 2 }); // Data is supposed to be quantized to {0,1} values @@ -1698,7 +1694,7 @@ TEST(reorder_gpu_binary, binary_output) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -1709,17 +1705,17 @@ TEST(reorder_gpu_binary, binary_output) EXPECT_EQ(outputs.begin()->first, "reorder"); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1, 2, 3, 1, 1, 1, 0, 3 }; // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), input.get_layout().count()); - ASSERT_EQ(output.get_layout().count(), input.get_layout().count()); + ASSERT_EQ(output->count(), input->get_layout().count()); + ASSERT_EQ(output->get_layout().count(), input->get_layout().count()); // Check that memory physical size consider binary pack - ASSERT_EQ(output.size(), answers.size() * sizeof(uint32_t)); + ASSERT_EQ(output->size(), answers.size() * sizeof(uint32_t)); for (size_t i = 0; i < answers.size(); ++i) { EXPECT_EQ(answers[i], output_ptr[i]) << "index: " << i; @@ -1728,12 +1724,12 @@ TEST(reorder_gpu_binary, binary_output) TEST(reorder_gpu_binary, binary_input) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); - auto input = memory::allocate(engine, { data_types::bin, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::bin, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } }); layout output_layout(data_types::f32, format::bfyx, { 2, 2, 2, 2 }); // Data is supposed to be quantized to {0,1} values @@ -1749,7 +1745,7 @@ TEST(reorder_gpu_binary, binary_input) 1, 1, 0, 3 }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -1760,13 +1756,13 @@ TEST(reorder_gpu_binary, binary_input) EXPECT_EQ(outputs.begin()->first, "reorder"); auto output = outputs.begin()->second.get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Check that layout and memory contains logical size of tensor - ASSERT_EQ(output.count(), input.get_layout().count()); - ASSERT_EQ(output.get_layout().count(), input.get_layout().count()); + ASSERT_EQ(output->count(), input->get_layout().count()); + ASSERT_EQ(output->get_layout().count(), input->get_layout().count()); - ASSERT_EQ(output.size(), answers.size() * sizeof(float)); + ASSERT_EQ(output->size(), answers.size() * sizeof(float)); for (size_t i = 0; i < answers.size(); ++i) { EXPECT_EQ(answers[i], output_ptr[i]) << "index: " << i; @@ -1791,9 +1787,9 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) // Expected output: // 0 1 -1 0 8 9 7 8 // 2 3 1 2 10 11 9 10 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, layout{ data_types::f32, format::bfyx, tensor{ batch(1), feature(4), spatial(2, 2) } }); + auto input = engine.allocate_memory(layout{ data_types::f32, format::bfyx, tensor{ batch(1), feature(4), spatial(2, 2) } }); std::vector data = { 1.f, 2.f, 3.f, 4.f, @@ -1813,7 +1809,7 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder1", "input", format::bfwzyx, data_types::f32), reshape("reshape1", "reorder1", tensor(batch(2), feature(2), spatial(1, 1, 2, 2) )), reorder("reorder2", "reshape1", format::bfwzyx, data_types::f32, sub_bfwzyx), @@ -1832,8 +1828,8 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) EXPECT_EQ(outputs.begin()->first, "out_reorder"); auto output = outputs.begin()->second.get_memory(); - EXPECT_TRUE(output.get_layout().format == format::bfwzyx); - auto sizes = output.get_layout().size; + EXPECT_TRUE(output->get_layout().format == format::bfwzyx); + auto sizes = output->get_layout().size; EXPECT_EQ(sizes.batch[0], 1); EXPECT_EQ(sizes.feature[0], 4); EXPECT_EQ(sizes.spatial[0], 2); @@ -1841,7 +1837,7 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) EXPECT_EQ(sizes.spatial[2], 1); EXPECT_EQ(sizes.spatial[2], 1); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), expected.size()); for (size_t i = 0; i < expected.size(); i++) @@ -1852,20 +1848,20 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int32_t b_in = 2; const int32_t f_in = 2; const int32_t x_in = 2; const int32_t y_in = 2; const int32_t z_in = 2; - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { b_in,f_in,x_in,y_in,z_in } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { b_in,f_in,x_in,y_in,z_in } }); layout output_layout(data_types::f32, format::bs_fs_zyx_bsv16_fsv16,{ b_in,f_in,x_in,y_in,z_in }); tests::set_random_values(input); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -1906,8 +1902,8 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16) return output_offset; }; - auto input_ptr = input.pointer(); - auto output_ptr = output.pointer(); + cldnn::mem_lock input_ptr(input, get_test_stream()); + cldnn::mem_lock output_ptr(output, get_test_stream()); int32_t linear_index = 0; for (int32_t b = 0; b < b_in; b++) { for (int32_t f = 0; f < f_in; f++) { @@ -1930,7 +1926,7 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16) TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); const int32_t b_in = 2; const int32_t f_in = 2; const int32_t x_in = 2; @@ -1941,13 +1937,13 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded) const int32_t y_pad= 2; const int32_t x_pad= 1; - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { b_in,f_in,x_in,y_in,z_in } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { b_in,f_in,x_in,y_in,z_in } }); layout output_layout(data_types::f32, format::bs_fs_zyx_bsv16_fsv16,{ b_in,f_in,x_in,y_in,z_in }); tests::set_random_values(input); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout.with_padding(padding({0, 0, x_pad, y_pad, 0}, 0.f)))); network network(engine, topology); @@ -1988,8 +1984,8 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded) return output_offset; }; - auto input_ptr = input.pointer(); - auto output_ptr = output.pointer(); + cldnn::mem_lock input_ptr(input, get_test_stream()); + cldnn::mem_lock output_ptr(output, get_test_stream()); int32_t linear_index = 0; for (int32_t b = 0; b < b_in; b++) { for (int32_t f = 0; f < f_in; f++) { @@ -2011,16 +2007,16 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded) TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { 2, 12, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 2, 12, 1, 1 } }); set_values(input, { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f }); const std::string reorder_name = "reorder_prim"; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("first_activation", "input", activation_func::abs), reorder(reorder_name, "first_activation", format::bfyx, data_types::f32), activation("second_activation", reorder_name, activation_func::abs)); @@ -2045,7 +2041,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed) 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), 24); for (size_t i = 0; i < output_ptr.size(); i++) { @@ -2055,19 +2051,19 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed) TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_not_allowed) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::b_fs_yx_fsv16, { 1, 8, 1, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::oiyx, { 1, 8, 3, 3 } }); + auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 1, 8, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx, { 1, 8, 3, 3 } }); set_values(input, { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f }); - set_values(weights, std::vector(weights.count(), 1)); + set_values(weights, std::vector(weights->count(), 1)); const std::string reorder_name = "reorder"; const std::string reorder_primitive_name = "reorder:" + reorder_name; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), data("weights", weights), reorder(reorder_name, "input", format::bfyx, data_types::f32), convolution("convolution", reorder_name, {"weights"}, {1,1,1,1}, {0,0,-1,-1}, {1,1,1,1})); @@ -2089,7 +2085,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_not_allowed) float answers[1] = { 28.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 1; i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]) << "i=" << i; @@ -2098,9 +2094,9 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_not_allowed) TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, + auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { 2, 4, 1, 1 }, padding({1, 16, 0, 0}, {1, 0, 0, 0}) }); @@ -2124,7 +2120,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded) const std::string reorder_name = "reorder_prim"; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder(reorder_name, "input", format::bfyx, data_types::f32), activation("activation", reorder_name, activation_func::abs)); @@ -2148,7 +2144,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded) 16.f, 17.f, 18.f, 19.f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), 8); for (size_t i = 0; i < output_ptr.size(); i++) { EXPECT_FLOAT_EQ(answers[i], output_ptr[i]) << "i=" << i; @@ -2158,21 +2154,21 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded) TEST(reorder_gpu, any_format) { auto& engine = get_test_engine(); - auto input = memory::allocate(engine, layout(data_types::f32, format::yxfb, tensor(5, 7, 13, 9))); + auto input = engine.allocate_memory(layout(data_types::f32, format::yxfb, tensor(5, 7, 13, 9))); topology topo; - topo.add(input_layout("in", input.get_layout())); + topo.add(input_layout("in", input->get_layout())); topo.add(reorder("out", "in", format::any, data_types::f32)); network net(engine, topo); - auto data = generate_random_1d(input.count(), -1, 1); + auto data = generate_random_1d(input->count(), -1, 1); set_values(input, data); net.set_input_data("in", input); auto outputs = net.execute(); auto out_mem = outputs.at("out").get_memory(); - auto output = out_mem.pointer(); + cldnn::mem_lock output(out_mem, get_test_stream()); for (size_t i = 0; i < data.size(); ++i) { EXPECT_EQ(output[i], data[i]) << "i = " << i; @@ -2181,9 +2177,9 @@ TEST(reorder_gpu, any_format) { TEST(reorder_image2d_rgba_to_bfyx_gpu, basic) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::image_2d_rgba, { 1, 3, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::u8, format::image_2d_rgba, { 1, 3, 2, 2 } }); layout output_layout(data_types::f16, format::bfyx, { 1, 3, 2, 2 }); set_values(input, { @@ -2194,7 +2190,7 @@ TEST(reorder_image2d_rgba_to_bfyx_gpu, basic) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -2217,7 +2213,7 @@ TEST(reorder_image2d_rgba_to_bfyx_gpu, basic) 50.0f, 253.0f, }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr (output, get_test_stream()); for (int i = 0; i < 12; i++) { EXPECT_NEAR(FLOAT16(answers[i] / 255.f), output_ptr[i], 1e-3f); @@ -2227,9 +2223,9 @@ TEST(reorder_image2d_rgba_to_bfyx_gpu, basic) TEST(reorder_bfyx_to_image2d_rgba_gpu, basic) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 3, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 3, 2, 2 } }); layout output_layout(data_types::u8, format::image_2d_rgba, { 1, 3, 2, 2 }); set_values(input, { @@ -2244,7 +2240,7 @@ TEST(reorder_bfyx_to_image2d_rgba_gpu, basic) }); topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), reorder("reorder", "input", output_layout)); network network(engine, topology); @@ -2263,7 +2259,7 @@ TEST(reorder_bfyx_to_image2d_rgba_gpu, basic) 251, 252, 253, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { EXPECT_EQ(answers[i], output_ptr[i]); @@ -2329,7 +2325,7 @@ public: } template - memory generate_reference_typed(const std::vector& inputs) + memory::ptr generate_reference_typed(const std::vector& inputs) { auto reorder = std::static_pointer_cast(layer_params); primitive_id mean = reorder->mean; @@ -2337,12 +2333,12 @@ public: assert(mean == ""); assert(subtract_per_feature.size() == 0); - auto output = memory::allocate(engine, cldnn::layout(*reorder->output_data_type, inputs[0].get_layout().format, inputs[0].get_layout().size)); + auto output = engine.allocate_memory(cldnn::layout(*reorder->output_data_type, inputs[0]->get_layout().format, inputs[0]->get_layout().size)); - cldnn::pointer input_mem = inputs[0].pointer(); - cldnn::pointer output_mem = output.pointer(); + cldnn::mem_lock input_mem(inputs[0], get_test_stream()); + cldnn::mem_lock output_mem(output, get_test_stream()); - for (size_t i = 0; i < inputs[0].get_layout().get_linear_size(); i++) + for (size_t i = 0; i < inputs[0]->get_layout().get_linear_size(); i++) { // Write the output in the same order as the input with type conversion as needed. // The correct order will be checked in generic_test::compare_buffers. @@ -2352,7 +2348,7 @@ public: return output; } - virtual memory generate_reference(const std::vector& inputs) + virtual memory::ptr generate_reference(const std::vector& inputs) { if (generic_params->data_type == data_types::f32) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp index 1a02c57d79c..ca7da7f9e83 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/resample_gpu_test.cpp @@ -3,19 +3,16 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/resample.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include -#include + +#include "test_utils.h" + +#include +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(resample_gpu, basic_in2x3x2x2_nearest) { // Input : 2x2x3x2 @@ -29,15 +26,15 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) { // f1: b0: 7 8 -16 b1: 12 9 -17 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); auto output_size = tensor(batch(2), feature(2), spatial(6, 4)); uint32_t num_filter = 0u; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, num_filter, resample_type::nearest)); set_values(input, { @@ -58,7 +55,7 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float answers[96] = { 1.f, 1.f, 2.f, 2.f, -10.f, -10.f, @@ -101,15 +98,15 @@ TEST(resample_gpu, basic_in2x3x2x2_bilinear) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(4, 4)); uint32_t num_filter = 1u; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, num_filter, resample_type::caffe_bilinear)); set_values(input, { @@ -123,9 +120,9 @@ TEST(resample_gpu, basic_in2x3x2x2_bilinear) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t) 16); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t) 16); float answers[16] = { 1.f, 1.25f, 1.75f, 2.f, @@ -152,14 +149,14 @@ TEST(resample_gpu, basic_in1x1x2x2_interp) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(4, 4)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear)); set_values(input, { @@ -173,9 +170,9 @@ TEST(resample_gpu, basic_in1x1x2x2_interp) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t) 16); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t) 16); float answers[16] = { 1.0f, 1.5f, 2.0f, 2.0f, @@ -202,14 +199,14 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_f16) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(4, 4)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reorder("input_to_b_fs_yx_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32)); topology.add(resample("resample", "input_to_b_fs_yx_fsv16", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear)); topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32)); @@ -228,12 +225,12 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_f16) { auto outputs = net.execute(); auto resample_out = outputs.at("resample").get_memory(); - ASSERT_EQ(resample_out.get_layout().format, format::b_fs_yx_fsv16); + ASSERT_EQ(resample_out->get_layout().format, format::b_fs_yx_fsv16); auto output = outputs.at("res_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t) 16); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t) 16); float answers[16] = { 1.0f, 1.5f, 2.0f, 2.0f, @@ -260,14 +257,14 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_fsv32) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(4, 4)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reorder("input_to_fs_b_yx_fsv32", "input", format::fs_b_yx_fsv32, data_types::f16)); topology.add(resample("resample", "input_to_fs_b_yx_fsv32", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 0, resample_type::bilinear)); topology.add(reorder("res_to_bfyx", "resample", format::bfyx, data_types::f32)); @@ -286,12 +283,12 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_fsv32) { auto outputs = net.execute(); auto resample_out = outputs.at("resample").get_memory(); - ASSERT_EQ(resample_out.get_layout().format, format::fs_b_yx_fsv32); + ASSERT_EQ(resample_out->get_layout().format, format::fs_b_yx_fsv32); auto output = outputs.at("res_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t) 16); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t) 16); float answers[16] = { 1.0f, 1.5f, 2.0f, 2.0f, @@ -319,14 +316,14 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_align_1) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(4, 4)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, {0, 0, 0, 0}, {0, 0, 0, 0}, 1, resample_type::bilinear)); set_values(input, { @@ -340,9 +337,9 @@ TEST(resample_gpu, basic_in1x1x2x2_interp_align_1) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t) 16); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t) 16); float answers[16] = { 1.000000f, 1.333333f, 1.666667f, 2.000000f, @@ -369,15 +366,15 @@ TEST(resample_gpu, nearest_asymmetric) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(5, 4)); topology topology; uint32_t num_filter = 1u; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, num_filter, resample_type::nearest)); set_values(input, { @@ -391,9 +388,9 @@ TEST(resample_gpu, nearest_asymmetric) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t)20); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t)20); float answers[20] = { 1.f, 1.f, 1.f, 2.f, 2.f, @@ -420,15 +417,15 @@ TEST(resample_gpu, nearest_asymmetric_i8) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i8, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(5, 4)); topology topology; uint32_t num_filter = 1u; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, num_filter, resample_type::nearest)); set_values(input, { @@ -442,9 +439,9 @@ TEST(resample_gpu, nearest_asymmetric_i8) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t)20); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t)20); int8_t answers[20] = { 1, 1, 1, 2, 2, @@ -471,15 +468,15 @@ TEST(resample_gpu, bilinear_asymmetric) { // f0: b0: 3 4 // - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto output_size = tensor(batch(1), feature(1), spatial(6, 4)); topology topology; uint32_t num_filter = 1u; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(resample("upsampling", "input", output_size, num_filter, resample_type::caffe_bilinear)); set_values(input, { @@ -493,9 +490,9 @@ TEST(resample_gpu, bilinear_asymmetric) { auto outputs = net.execute(); auto output = outputs.at("upsampling").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); - EXPECT_EQ(output.get_layout().get_linear_size(), (size_t)24); + EXPECT_EQ(output->get_layout().get_linear_size(), (size_t)24); float answers[24] = { 1.f, 1.f, 1.33333f, 1.66667f, 2.f, 2.f, @@ -525,21 +522,21 @@ struct resample_random_test_params { struct resample_random_test : testing::TestWithParam{ template - void fill_random_typed(memory& mem, int min, int max, int k) { - auto size = mem.get_layout().size; + void fill_random_typed(memory::ptr mem, int min, int max, int k) { + auto size = mem->get_layout().size; size_t b = size.batch[0]; size_t f = size.feature[0]; size_t x = size.spatial[0]; size_t y = size.spatial[1]; auto data = generate_random_4d(b, f, y, x, min, max, k); - auto ptr = mem.pointer(); + cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { for (size_t xi = 0; xi < x; ++xi) { auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); - auto offset = mem.get_layout().get_linear_offset(coords); + auto offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = data[bi][fi][yi][xi]; } } @@ -547,8 +544,8 @@ struct resample_random_test : testing::TestWithParamget_layout().data_type; switch (dt) { case data_types::f32: fill_random_typed(mem, -127, 127, 2); @@ -568,19 +565,19 @@ struct resample_random_test : testing::TestWithParam - void compare_nearest_typed(const memory& input, const memory& output, uint32_t align_corners) { - auto output_lay = output.get_layout(); + void compare_nearest_typed(const memory::ptr input, const memory::ptr output, uint32_t align_corners) { + auto output_lay = output->get_layout(); size_t b = output_lay.size.batch[0]; size_t f = output_lay.size.feature[0]; size_t x = output_lay.size.spatial[0]; size_t y = output_lay.size.spatial[1]; - size_t in_x = input.get_layout().size.spatial[0]; - size_t in_y = input.get_layout().size.spatial[1]; + size_t in_x = input->get_layout().size.spatial[0]; + size_t in_y = input->get_layout().size.spatial[1]; float x_ratio = x > align_corners ? static_cast(in_x - align_corners) / static_cast(x - align_corners) : 0.f; float y_ratio = y > align_corners ? static_cast(in_y - align_corners) / static_cast(y - align_corners) : 0.f; - auto in_ptr = input.pointer(); - auto out_ptr = output.pointer(); + cldnn::mem_lock in_ptr(input, get_test_stream()); + cldnn::mem_lock out_ptr(output, get_test_stream()); for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { @@ -588,10 +585,10 @@ struct resample_random_test : testing::TestWithParam(floor(x_ratio * xi)); auto in_yi = static_cast(floor(y_ratio * yi)); auto in_coords = tensor(batch(bi), feature(fi), spatial(in_xi, in_yi, 0, 0)); - auto in_offset = input.get_layout().get_linear_offset(in_coords); + auto in_offset = input->get_layout().get_linear_offset(in_coords); auto in_val = in_ptr[in_offset]; auto out_coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); - auto out_offset = output.get_layout().get_linear_offset(out_coords); + auto out_offset = output->get_layout().get_linear_offset(out_coords); auto out_val = out_ptr[out_offset]; EXPECT_EQ(in_val, out_val) << " at bi=" << bi << ", fi=" << fi << ", xi=" << xi << ", yi=" << yi; } @@ -601,20 +598,20 @@ struct resample_random_test : testing::TestWithParam - void compare_bilinear_typed(const memory& input, const memory& output, uint32_t align_corners) { - auto output_lay = output.get_layout(); + void compare_bilinear_typed(const memory::ptr input, const memory::ptr output, uint32_t align_corners) { + auto output_lay = output->get_layout(); size_t b = output_lay.size.batch[0]; size_t f = output_lay.size.feature[0]; size_t x = output_lay.size.spatial[0]; size_t y = output_lay.size.spatial[1]; - auto input_lay = input.get_layout(); + auto input_lay = input->get_layout(); size_t in_x = input_lay.size.spatial[0]; size_t in_y = input_lay.size.spatial[1]; float x_ratio = x > align_corners ? static_cast(in_x - align_corners) / static_cast(x - align_corners) : 0.f; float y_ratio = y > align_corners ? static_cast(in_y - align_corners) / static_cast(y - align_corners) : 0.f; - auto in_ptr = input.pointer(); - auto out_ptr = output.pointer(); + cldnn::mem_lock in_ptr(input, get_test_stream()); + cldnn::mem_lock out_ptr(output, get_test_stream()); for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { @@ -658,8 +655,8 @@ struct resample_random_test : testing::TestWithParamget_layout().data_type; if (operation == resample_type::nearest) { // Nearest resampling implicitly ignores align_corners if (dt == data_types::f32) { @@ -691,7 +688,7 @@ struct resample_random_test : testing::TestWithParam { template - void fill_random_typed(memory& mem, int min, int max, int k) { - auto size = mem.get_layout().size; + void fill_random_typed(memory::ptr mem, int min, int max, int k) { + auto size = mem->get_layout().size; size_t b = size.batch[0]; size_t f = size.feature[0]; size_t x = size.spatial[0]; size_t y = size.spatial[1]; auto data = generate_random_4d(b, f, y, x, min, max, k); - auto ptr = mem.pointer(); + cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { for (size_t xi = 0; xi < x; ++xi) { auto coords = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); - auto offset = mem.get_layout().get_linear_offset(coords); + auto offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = data[bi][fi][yi][xi]; } } @@ -805,8 +802,8 @@ struct caffe_resample_random_test : testing::TestWithParamget_layout().data_type; switch (dt) { case data_types::f32: fill_random_typed(mem, -127, 127, 2); @@ -826,16 +823,16 @@ struct caffe_resample_random_test : testing::TestWithParam - bool compare_outputs(const memory& out_ref, const memory& out_opt) { - auto output_lay = out_ref.get_layout(); - auto opt_output_lay = out_opt.get_layout(); + bool compare_outputs(const memory::ptr out_ref, const memory::ptr out_opt) { + auto output_lay = out_ref->get_layout(); + auto opt_output_lay = out_opt->get_layout(); size_t b = output_lay.size.batch[0]; size_t f = output_lay.size.feature[0]; size_t x = output_lay.size.spatial[0]; size_t y = output_lay.size.spatial[1]; - auto ref_ptr = out_ref.pointer(); - auto opt_ptr = out_opt.pointer(); + cldnn::mem_lock ref_ptr(out_ref, get_test_stream()); + cldnn::mem_lock opt_ptr(out_opt, get_test_stream()); for (size_t bi = 0; bi < b; ++bi) { for (size_t fi = 0; fi < f; ++fi) { for (size_t yi = 0; yi < y; ++yi) { @@ -859,10 +856,10 @@ struct caffe_resample_random_test : testing::TestWithParamget_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1009,7 +1004,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float answers[96] = { 0.f, 1.f, 1.f, 1.f, @@ -1058,19 +1053,19 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1098,7 +1093,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float answers[96] = { 0.f, 0.f, 1.f, 1.f, @@ -1147,19 +1142,19 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1187,7 +1182,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float answers[96] = { 0.f, 0.f, 1.f, 1.f, @@ -1236,19 +1231,19 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1276,7 +1271,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float answers[96] = { 0.f, 0.f, 0.f, 1.f, @@ -1325,19 +1320,19 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); auto output_size = tensor(batch(b), feature(f), spatial(x*2, y*2)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1365,7 +1360,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float answers[96] = { 0.f, 0.f, 0.f, 1.f, @@ -1414,21 +1409,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 2; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1456,7 +1451,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0.f, 0.f, 1.f, @@ -1483,21 +1478,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 1; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1525,7 +1520,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0.f, 0.f, 1.f, @@ -1546,21 +1541,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 2; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1588,7 +1583,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0.f, 1.f, 1.f, @@ -1615,21 +1610,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 2; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1657,7 +1652,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 2.f, 3.f, 3.f, @@ -1684,21 +1679,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 2; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::nearest; @@ -1726,7 +1721,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0.f, 0.f, 1.f, @@ -1753,21 +1748,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 2; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::cubic; @@ -1793,7 +1788,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0.29600694f, 0.8828125f, 1.46961806f, @@ -1820,20 +1815,20 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 1; int f = 1; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::cubic; @@ -1853,7 +1848,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 5.34722222f, 3.f, 0.65277778f, @@ -1872,21 +1867,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 2; int f = 2; int y = 3; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 2; x = 3; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::caffe_bilinear; @@ -1912,7 +1907,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 0.5f, 1.f, 1.5f, @@ -1939,21 +1934,21 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear_onnx) { // Output : 2x2x6x4 // Sample Type: Nearest - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 1; int f = 1; int y = 2; int x = 2; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 4; x = 4; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::bilinear; @@ -1974,7 +1969,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear_onnx) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 1.f, 1.33333f, 1.66667f, 2.f, @@ -1994,21 +1989,21 @@ TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) { // Output : 1x1x1x2 // Sample Type: Linear - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); int b = 1; int f = 1; int y = 2; int x = 4; tensor shape = tensor{batch(b), feature(f), spatial(x, y)}; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, shape }); y = 1; x = 2; auto output_size = tensor(batch(b), feature(f), spatial(x, y)); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); int32_t antialias = 0; float cube_coeff = -0.75f; resample_type mode = resample_type::caffe_bilinear; @@ -2031,7 +2026,7 @@ TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) { auto outputs = net.execute(); auto output = outputs.at("interpolate").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector answers = { 2.6666665f, 4.3333331f diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp index 69ad0f2be1f..ff35e58e75e 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp @@ -3,19 +3,15 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" -#include "test_utils/test_utils.h" +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; using namespace testing; void verify_float(const float& output_value, const float& value) { @@ -30,7 +26,7 @@ template void generic_reshape_test(format fmt, tensor const& input_size, tensor const& reshape_size, bool /* in_place */, padding const& input_padd = padding(), padding const& output_padd = padding()) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); //allocate input memory auto data_type = data_types::f32; @@ -43,10 +39,10 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re else if (std::is_same::value) data_type = data_types::i64; - auto input = memory::allocate(engine, {data_type, fmt, input_size}); + auto input = engine.allocate_memory({data_type, fmt, input_size}); { - auto input_ptr = input.cldnn::memory::pointer(); + cldnn::mem_lock input_ptr(input, get_test_stream()); auto input_itr = input_ptr.begin(); auto elements = input_size.count(); @@ -59,9 +55,9 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re topology tpl; std::string reshape_input = "input"; - tpl.add(input_layout("input", input.get_layout())); + tpl.add(input_layout("input", input->get_layout())); if (input_padd) { - auto padded_input_layout = input.get_layout(); + auto padded_input_layout = input->get_layout(); padded_input_layout.data_padding = input_padd; tpl.add(reorder("reorder", "input", padded_input_layout)); reshape_input = "reorder"; @@ -79,15 +75,15 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re auto net_input = outputs.at(reshape_input).get_memory(); auto output = outputs.at("reshape").get_memory(); - EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type); //reshape should not change data_type - EXPECT_TRUE(output.get_layout().format.value == input.get_layout().format.value); //reshape should not change format + EXPECT_TRUE(output->get_layout().data_type == input->get_layout().data_type); //reshape should not change data_type + EXPECT_TRUE(output->get_layout().format.value == input->get_layout().format.value); //reshape should not change format //output size should be equal to requested plus output padding - ASSERT_TRUE(output.get_layout().size == reshape_size); - ASSERT_TRUE(output.get_layout().get_buffer_size() == reshape_size.add(output_padd.lower_size()).add(output_padd.upper_size())); + ASSERT_TRUE(output->get_layout().size == reshape_size); + ASSERT_TRUE(output->get_layout().get_buffer_size() == reshape_size.add(output_padd.lower_size()).add(output_padd.upper_size())); { - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); auto output_itr = output_ptr.begin(); auto sizes = reshape_size.sizes(fmt); @@ -439,15 +435,15 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) { // b1f0: 0.0 // b1f1: 4.0 - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 2; auto x_size = 1; auto y_size = 1; - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(reshape("reshape", "relu", tensor(batch(4)))); topology.add(reorder("reorder1", "reshape", format::yxfb, data_types::f32)); @@ -464,13 +460,13 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) { auto outputs = network.execute(); auto output = outputs.at("relu1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out1.size(); i++) EXPECT_EQ(output_ptr[i], out1[i]); auto output_2 = outputs.at("relu2").get_memory(); - auto output_ptr_2 = output_2.pointer(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); for (size_t i = 0; i < out2.size(); i++) EXPECT_EQ(output_ptr_2[i], out2[i]); @@ -487,12 +483,12 @@ TEST(reshape_gpu_f32, calc_output_shape) { // // output_shape (1, 1, 1, 4) - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 2, 1, 1}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {2, 2, 1, 1}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reshape("reshape", "input", tensor(1, 1, 0, -1))); set_values(input, {-1.f, 2.f, -3.f, 4.f}); @@ -506,14 +502,14 @@ TEST(reshape_gpu_f32, calc_output_shape) { auto output = outputs.at("reshape").get_memory(); - EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type); - EXPECT_TRUE(output.get_layout().format == input.get_layout().format); + EXPECT_TRUE(output->get_layout().data_type == input->get_layout().data_type); + EXPECT_TRUE(output->get_layout().format == input->get_layout().format); - ASSERT_TRUE(output.get_layout().size == tensor(1, 1, 1, 4)); + ASSERT_TRUE(output->get_layout().size == tensor(1, 1, 1, 4)); float answers[4] = {-1.f, 2.f, -3.f, 4.f}; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { EXPECT_TRUE(are_equal(answers[i], output_ptr[i])); } @@ -523,12 +519,12 @@ TEST(reshape_gpu_f32, basic_bfwzyx) { // input: bfwzyx, (3, 3, 2, 2, 1, 1) // reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1) - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, layout{data_types::f32, format::bfwzyx, tensor{batch(3), feature(3), spatial(1, 1, 2, 2)}}); + auto input = engine.allocate_memory(layout{data_types::f32, format::bfwzyx, tensor{batch(3), feature(3), spatial(1, 1, 2, 2)}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(reshape("reshape", "input", tensor(batch(1), feature(1), spatial(2, 2, 3, 3)), padding({0, 0, 0, 0, 0, 1}, 0.f))); // clang-format off @@ -573,10 +569,10 @@ TEST(reshape_gpu_f32, basic_bfwzyx) { auto output = outputs.at("reshape").get_memory(); - EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type); - EXPECT_TRUE(output.get_layout().format == input.get_layout().format); + EXPECT_TRUE(output->get_layout().data_type == input->get_layout().data_type); + EXPECT_TRUE(output->get_layout().format == input->get_layout().format); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), expected_out.size()); for (size_t i = 0; i < expected_out.size(); i++) { @@ -585,14 +581,14 @@ TEST(reshape_gpu_f32, basic_bfwzyx) { } TEST(reshape_gpu_f32, shrink_chain_partial) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 2; auto feature_num = 2; auto x_size = 1; auto y_size = 1; - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}); - auto scale_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); - auto shift_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}); + auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); std::vector scale_vals = {0.f, 1.f, 2.f, 3.f}; std::vector scale_shifts = {5.f, 10.f, 15.f, 20.0f}; @@ -600,7 +596,7 @@ TEST(reshape_gpu_f32, shrink_chain_partial) { set_values(shift_in, scale_shifts); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("scale_in", scale_in)); topology.add(data("shift_in", shift_in)); topology.add(activation("relu", "input", activation_func::relu)); @@ -621,17 +617,17 @@ TEST(reshape_gpu_f32, shrink_chain_partial) { auto outputs = network.execute(); auto output = outputs.at("out_reorder").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out.size(); i++) EXPECT_EQ(output_ptr[i], out[i]) << " i=" << i; } TEST(reshape_gpu_f32, shrink_chain_full) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); - auto scale_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); - auto shift_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); std::vector scale_vals = {0.f, 1.f, 2.f, 3.f}; std::vector scale_shifts = {5.f, 10.f, 15.f, 20.0f}; @@ -639,7 +635,7 @@ TEST(reshape_gpu_f32, shrink_chain_full) { set_values(shift_in, scale_shifts); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("scale_in", scale_in)); topology.add(data("shift_in", shift_in)); topology.add(activation("relu", "input", activation_func::relu)); @@ -660,17 +656,17 @@ TEST(reshape_gpu_f32, shrink_chain_full) { auto outputs = network.execute(); auto output = outputs.at("out_reorder").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out.size(); i++) EXPECT_EQ(output_ptr[i], out[i]) << " i=" << i; } TEST(reshape_gpu_f32, shrink_chain_out) { - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); - auto scale_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); - auto shift_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); + auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }}); std::vector scale_vals = {0.f, 1.f, 2.f, 3.f}; std::vector scale_shifts = {5.f, 10.f, 15.f, 20.0f}; @@ -678,7 +674,7 @@ TEST(reshape_gpu_f32, shrink_chain_out) { set_values(shift_in, scale_shifts); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(reshape("reshape", "relu", tensor(spatial(2, 2)))); topology.add(reorder("reorder", "reshape", format::bfyx, data_types::f32)); @@ -695,7 +691,7 @@ TEST(reshape_gpu_f32, shrink_chain_out) { auto outputs = network.execute(); auto output = outputs.at("reshape1").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < out.size(); i++) EXPECT_EQ(output_ptr[i], out[i]) << " i=" << i; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp index b7de3b16fa6..cb674704b29 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp @@ -3,25 +3,22 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include +#include "test_utils.h" + +#include +#include #include -#include using namespace cldnn; using namespace ::tests; TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 1; int32_t seq_axis = 0; @@ -34,8 +31,8 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -48,7 +45,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 2.0f, 1.0f @@ -60,10 +57,10 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) { } TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 3, 1, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); int32_t batch_axis = 0; int32_t seq_axis = 1; @@ -78,8 +75,8 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -92,7 +89,7 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f, @@ -106,10 +103,10 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) { } TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 3, 1, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); int32_t batch_axis = 2; int32_t seq_axis = 0; @@ -124,8 +121,8 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -138,11 +135,11 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f }; @@ -152,10 +149,10 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) { } TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 0; int32_t seq_axis = 3; @@ -170,8 +167,8 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -184,12 +181,12 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, - 13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f, + 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, + 13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f, 19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f }; @@ -199,10 +196,10 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) { } TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 0; int32_t seq_axis = 2; @@ -217,8 +214,8 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -231,12 +228,12 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f, - 8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f, - 14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f, + 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f, + 8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f, + 14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f, 20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f }; @@ -246,10 +243,10 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) { } TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); int32_t batch_axis = 2; int32_t seq_axis = 0; @@ -264,8 +261,8 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -278,12 +275,12 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f, - 12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f, + 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f, + 6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f, + 12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f, 18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f }; @@ -293,10 +290,10 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) { } TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 1; int32_t seq_axis = 0; @@ -309,8 +306,8 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -323,7 +320,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 2.0f, 1.0f @@ -335,10 +332,10 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) { } TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 1; int32_t seq_axis = 0; @@ -351,8 +348,8 @@ TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -365,7 +362,7 @@ TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 2.0f, 1.0f @@ -377,10 +374,10 @@ TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) { } TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 1, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); int32_t batch_axis = 0; int32_t seq_axis = 1; @@ -395,8 +392,8 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -409,7 +406,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f, @@ -423,10 +420,10 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) { } TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 1, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); int32_t batch_axis = 2; int32_t seq_axis = 0; @@ -441,8 +438,8 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -455,7 +452,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, @@ -469,10 +466,10 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) { } TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 0; int32_t seq_axis = 3; @@ -487,8 +484,8 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -501,7 +498,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, @@ -516,10 +513,10 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) { } TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); int32_t batch_axis = 0; int32_t seq_axis = 2; @@ -534,8 +531,8 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -548,7 +545,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f, @@ -563,10 +560,10 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) { } TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) { - engine engine; + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); - auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); + auto seq_lengths = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); int32_t batch_axis = 2; int32_t seq_axis = 0; @@ -581,8 +578,8 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("seq_lengths", seq_lengths.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("seq_lengths", seq_lengths->get_layout())); topology.add( reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis) ); @@ -595,7 +592,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) { auto outputs = network.execute(); auto output = outputs.at("reverse_sequence").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp deleted file mode 100644 index 1e0359f1dad..00000000000 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp +++ /dev/null @@ -1,1994 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/scale.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include "api/reorder.hpp" -#include "api/data.hpp" - -#include - -using namespace cldnn; -using namespace tests; - -TEST(scale_gpu, basic_in2x3x2x2_mixed_types_in_fp32_out_fp16) { - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); - auto shift_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); - - std::vector input_vec = { 1.0f, 0.0f, 5.0f, 1.5f, 2.0f, 0.0f, - 6.0f, 5.0f, -10.0f, -11.0f, -12.0f, -13.0f, - - 3.0f, 0.5f, 7.0f, 12.0f, 4.0f, -0.5f, - 8.0f, 8.0f, -14.0f, -15.0f, -16.0f, -17.0f }; - set_values(input, input_vec); - set_values(scale_input, { 2.0f, -1.0f }); - set_values(shift_input, { -5.0f, 10.0f }); - - std::vector result_vec = { -3.0f, -5.0f, 5.0f, -2.0f, -1.0f, -5.0f, - 4.0f, 5.0f, 20.0f, 21.0f, 22.0f, 23.0f, - - 1.0f, -4.0f, 9.0f, 19.0f , 3.0f, -6.0f, - 2.0f, 2.0f, 24.0f, 25.0f, 26.0f, 27.0f }; - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(data("scale_input", scale_input)); - topology.add(data("shift_input", shift_input)); - topology.add(scale("scale", "input", "scale_input", "shift_input", optional_data_type{data_types::f16})); - topology.add(reorder("reorder", "scale", format::bfyx, data_types::f32)); - - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("reorder").get_memory(); - auto output_ptr = output.pointer(); - - ASSERT_EQ(result_vec.size(), output.count()); - - for (unsigned int i = 0; i < result_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], result_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_mixed_types_in_fp16_out_fp32) { - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); - auto shift_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); - - std::vector input_vec = { half_t(1.0f), half_t(0.0f), half_t(5.0f), half_t(1.5f), half_t(2.0f), half_t(0.0f), - half_t(6.0f), half_t(5.0f), half_t(-10.0f), half_t(-11.0f), half_t(-12.0f), half_t(-13.0f), - - half_t(3.0f), half_t(0.5f), half_t( 7.0f), half_t(12.0f), half_t(4.0f), half_t(-0.5f), - half_t(8.0f), half_t(8.0f), half_t(-14.0f), half_t(-15.0f), half_t(-16.0f), half_t(-17.0f) }; - set_values(input, input_vec); - set_values(scale_input, { half_t(2.0f), half_t(-1.0f) }); - set_values(shift_input, { half_t(-5.0f), half_t(10.0f) }); - - std::vector result_vec = { -3.0f, -5.0f, 5.0f, -2.0f, -1.0f, -5.0f, - 4.0f, 5.0f, 20.0f, 21.0f, 22.0f, 23.0f, - - 1.0f, -4.0f, 9.0f, 19.0f , 3.0f, -6.0f, - 2.0f, 2.0f, 24.0f, 25.0f, 26.0f, 27.0f }; - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(data("scale_input", scale_input)); - topology.add(data("shift_input", shift_input)); - topology.add(scale("scale", "input", "scale_input", "shift_input", optional_data_type{data_types::f32})); - topology.add(reorder("reorder", "scale", format::bfyx, data_types::f32)); - - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("reorder").get_memory(); - auto output_ptr = output.pointer(); - - ASSERT_EQ(result_vec.size(), output.count()); - - for (unsigned int i = 0; i < result_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], result_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_mixed_types_in_fp32_scale_fp16_out_fp16) { - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); - auto shift_input = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); - - std::vector input_vec = { 1.0f, 0.0f, 5.0f, 1.5f, 2.0f, 0.0f, - 6.0f, 5.0f, -10.0f, -11.0f, -12.0f, -13.0f, - - 3.0f, 0.5f, 7.0f, 12.0f, 4.0f, -0.5f, - 8.0f, 8.0f, -14.0f, -15.0f, -16.0f, -17.0f }; - set_values(input, input_vec); - set_values(scale_input, { half_t(2.0f), half_t(-1.0f) }); - set_values(shift_input, { half_t(-5.0f), half_t(10.0f) }); - - std::vector result_vec = { -3.0f, -5.0f, 5.0f, -2.0f, -1.0f, -5.0f, - 4.0f, 5.0f, 20.0f, 21.0f, 22.0f, 23.0f, - - 1.0f, -4.0f, 9.0f, 19.0f , 3.0f, -6.0f, - 2.0f, 2.0f, 24.0f, 25.0f, 26.0f, 27.0f }; - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(data("scale_input", scale_input)); - topology.add(data("shift_input", shift_input)); - topology.add(scale("scale", "input", "scale_input", "shift_input", optional_data_type{data_types::f16})); - topology.add(reorder("reorder", "scale", format::bfyx, data_types::f32)); - - build_options bo; - bo.set_option(build_option::optimize_data(true)); - network network(engine, topology, bo); - - network.set_input_data("input", input); - - auto outputs = network.execute(); - - auto output = outputs.at("reorder").get_memory(); - auto output_ptr = output.pointer(); - - ASSERT_EQ(result_vec.size(), output.count()); - - for (unsigned int i = 0; i < result_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], result_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_same_size) { - // Scale : 2x3x2x2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // f0: b0: 0.1 0.2 0.25 b1: 0.3 0.4 0.5 - // f0: b0: 0.6 0.7 0.75 b1: 0.8 0.9 1 - // f1: b0: 1.1 1.2 1.25 b1: 1.3 1.4 1.5 - // f1: b0: 1.6 1.7 1.75 b1: 1.8 1.9 2 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 0.3f, 1.1f, 1.3f, - 0.2f, 0.4f, 1.2f, 1.4f, - 0.25f, 0.5f, 1.25f, 1.5f, - 0.6f, 0.8f, 1.6f, 1.8f, - 0.7f, 0.9f, 1.7f, 1.9f, - 0.75f, 1.f, 1.75f, 2.f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bfyx) { - // Scale : 2x3x2x2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // f0: b0: 0.1 0.2 0.25 b1: 0.3 0.4 0.5 - // f0: b0: 0.6 0.7 0.75 b1: 0.8 0.9 1 - // f1: b0: 1.1 1.2 1.25 b1: 1.3 1.4 1.5 - // f1: b0: 1.6 1.7 1.75 b1: 1.8 1.9 2 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 2.f, -10.f, 0.f, 0.f, -11.f, - 3.f, 4.f, -14.f, 0.5f, -0.5f, -15.f, - 5.f, 6.f, -12.f, 1.5f, 5.2f, -13.f, - 7.f, 8.f, -16.f, 12.f, 8.f, -17.f - }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f, - 0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f, - 1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f, - 1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_scale_bfyx) { - // Scale : 2x3x2x2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // f0: b0: 0.1 0.2 0.25 b1: 0.3 0.4 0.5 - // f0: b0: 0.6 0.7 0.75 b1: 0.8 0.9 1 - // f1: b0: 1.1 1.2 1.25 b1: 1.3 1.4 1.5 - // f1: b0: 1.6 1.7 1.75 b1: 1.8 1.9 2 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size, y_size } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f, - 0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f, - 1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f, - 1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = l + x_size * (k + y_size * (j + i * feature_num)); - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bias_term) { - // Scale : 2x3x2x2 - // Bias : 2x3x2x2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // f0: b0: 0.1 0.2 0.25 b1: 0.3 0.4 0.5 - // f0: b0: 0.6 0.7 0.75 b1: 0.8 0.9 1 - // f1: b0: 1.1 1.2 1.25 b1: 1.3 1.4 1.5 - // f1: b0: 1.6 1.7 1.75 b1: 1.8 1.9 2 - // - // Bias: - // f0: b0: 1.1 1.2 1.25 b1: 1.3 1.4 1.5 - // f0: b0: 2.6 2.7 2.75 b1: 2.8 2.9 2 - // f1: b0: 3.1 3.2 3.25 b1: 3.3 3.4 3.5 - // f1: b0: 4.6 4.7 4.75 b1: 4.8 4.9 4 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } }); - auto bias = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(input_layout("bias", bias.get_layout())); - topology.add(scale("scale", "input", "scale_input", "bias")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 0.3f, 1.1f, 1.3f, - 0.2f, 0.4f, 1.2f, 1.4f, - 0.25f, 0.5f, 1.25f, 1.5f, - 0.6f, 0.8f, 1.6f, 1.8f, - 0.7f, 0.9f, 1.7f, 1.9f, - 0.75f, 1.f, 1.75f, 2.f - }; - set_values(scale_input, scale_input_vec); - - std::vector bias_vec = { - 1.1f, 2.3f, 3.1f, 4.3f, - 1.2f, 2.4f, 3.2f, 4.4f, - 1.25f, 2.5f, 3.25f, 4.5f, - 1.6f, 2.8f, 3.6f, 4.8f, - 1.7f, 2.9f, 3.7f, 4.9f, - 1.75f, 2.f, 3.75f, 4.f - }; - set_values(bias, bias_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - network.set_input_data("bias", bias); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i] + bias_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_scalar) { - // Scale : 1 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // 0.1 0.2 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = 0; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_y) { - // Scale : 2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // 0.1 0.2 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1,1,1,y_size } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, - 0.2f, - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = k; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_fb) { - // Scale : 2x3x2x2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: per feature per batch - // f0b0: 0.1 f0b1: 0.2 - // f1b0: 0.5 f1b1: 2.0 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, 1, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 0.2f, 0.5f, 2.0f, - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = i + feature_num * j; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_f) { - // Scale : 2x3x2x2 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: per feature - // f0bx: 0.1 f1bx: 0.2 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, feature_num, 1, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - //f0bx //f1bx - 0.1f, 0.2f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = j; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_x) { - // Scale : 3 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // 0.1 0.2 0.25 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, x_size, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, - 0.2f, - 0.25f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = l; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_xy) { - // Scale : 2x3x1x1 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // f0: 0.1 0.2 0.25 - // f0: 0.6 0.7 0.75 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, x_size, y_size } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, - 0.2f, - 0.25f, - 0.6f, - 0.7f, - 0.75f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = l + x_size * k; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_batch1) { - // Scale : 2x3x2x1 - // Input : 2x3x2x2 - // Output : 2x3x2x2 - - // Input: - // f0: b0: 1 2 -10 b1: 0 0 -11 - // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15 - // f1: b0: 5 6 -12 b1: 1.5 5.2 -13 - // f1: b0: 7 8 -16 b1: 12 8 -17 - // - // Scale: - // f0: b0: 0.1 0.2 0.25 - // f0: b0: 0.6 0.7 0.75 - // f1: b0: 1.1 1.2 1.25 - // f1: b0: 1.6 1.7 1.75 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto x_size = 3; - auto y_size = 2; - - auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, feature_num, x_size, y_size } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 1.1f, - 0.2f, 1.2f, - 0.25f, 1.25f, - 0.6f, 1.6f, - 0.7f, 1.7f, - 0.75f, 1.75f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int j = 0; j < feature_num; ++j) { //F - for (int i = 0; i < batch_num; ++i) { //B - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = i + batch_num * (j + feature_num * (l + x_size * k)); - int linear_id_scale = j + feature_num * (l + x_size * k); - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } -} - -TEST(scale_gpu, basic_in2x3_scale_same_size_bx) { - // Scale : 2x3 - // Bias : 2x3 - // Input : 2x3 - // Output : 2x3 - - // Input: - // b0: 1 2 -0.75 - // b1: 0 -1.5 -3 - // - // Scale: - // b0: 3.1 0.2 0.17 - // b1: 10 -3 1 - - // Bias: - // b0: -0.1 3.2 7 - // b1: 0 1 -1 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } }); - auto bias_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(input_layout("bias_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input", "bias_input")); - - std::vector input_vec = { - 1.f, 2.f, -0.75f, - 0.f, -1.5f, -3.f, - }; - set_values(input, input_vec); - - std::vector scale_vec = { - 3.1f, 0.2f, 0.17f, - 10.f, -3.f, 1.f, - }; - set_values(scale_input, scale_vec); - - std::vector bias_vec = { - -0.1f, 3.2f, 7.f, - 0.f, 1.f, -1.f, - }; - set_values(bias_input, bias_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - network.set_input_data("bias_input", bias_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i] + bias_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3_scale_same_size_xb) { - // Scale : 2x3 - // Bias : 2x3 - // Input : 2x3 - // Output : 2x3 - - // Input: - // x0: 1 2 -0.75 - // x1: 0 -1.5 -3 - // - // Scale: - // x0: 3.1 0.2 0.17 - // x1: 10 -3 1 - - // Bias: - // x0: -0.1 3.2 7 - // x1: 0 1 -1 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } }); - auto bias_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(input_layout("bias_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input", "bias_input")); - - std::vector input_vec = { - 1.f, 2.f, -0.75f, - 0.f, -1.5f, -3.f, - }; - set_values(input, input_vec); - - std::vector scale_vec = { - 3.1f, 0.2f, 0.17f, - 10.f, -3.f, 1.f, - }; - set_values(scale_input, scale_vec); - - std::vector bias_vec = { - -0.1f, 3.2f, 7.f, - 0.f, 1.f, -1.f, - }; - set_values(bias_input, bias_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - network.set_input_data("bias_input", bias_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i] + bias_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3_scale_single_value_bx) { - // Scale : 1x1 - // Bias : 1x1 - // Input : 2x3 - // Output : 2x3 - - // Input: - // b0: 1 2 -0.75 - // b1: 0 -1.5 -3 - // - // Scale: - // 3.1 - - // Bias: - // -0.1 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); - auto bias_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(input_layout("bias_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input", "bias_input")); - - std::vector input_vec = { - 1.f, 2.f, -0.75f, - 0.f, -1.5f, -3.f, - }; - set_values(input, input_vec); - - std::vector scale_vec = { - 3.1f, - }; - set_values(scale_input, scale_vec); - - std::vector bias_vec = { - -0.1f, - }; - set_values(bias_input, bias_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - network.set_input_data("bias_input", bias_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[0] + bias_vec[0], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3_scale_single_value_xb) { - // Scale : 1x1 - // Bias : 1x1 - // Input : 2x3 - // Output : 2x3 - - // Input: - // x0: 1 2 -0.75 - // x1: 0 -1.5 -3 - // - // Scale: - // 3.1 - - // Bias: - // -0.1 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } }); - auto bias_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(input_layout("bias_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input", "bias_input")); - - std::vector input_vec = { - 1.f, 2.f, -0.75f, - 0.f, -1.5f, -3.f, - }; - set_values(input, input_vec); - - std::vector scale_vec = { - 3.1f, - }; - set_values(scale_input, scale_vec); - - std::vector bias_vec = { - -0.1f, - }; - set_values(bias_input, bias_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - network.set_input_data("bias_input", bias_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[0] + bias_vec[0], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_bx) { - // Scale : 2x3 - // Input : 2x3 - // Output : 2x3 - - // Input: - // b0: 1 2 -0.75 - // b1: 0 -1.5 -3 - // - // Scale: - // b0: 3.1 0.2 0.17 - // b1: 10 -3 1 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 2.f, -0.75f, - 0.f, -1.5f, -3.f, - }; - set_values(input, input_vec); - - std::vector scale_vec = { - 3.1f, 0.2f, 0.17f, - 10.f, -3.f, 1.f, - }; - set_values(scale_input, scale_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_xb) { - // Scale : 2x3 - // Input : 2x3 - // Output : 2x3 - - // Input: - // x0: 1 2 -0.75 - // x1: 0 -1.5 -3 - // - // Scale: - // x0: 3.1 0.2 0.17 - // x1: 10 -3 1 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 2.f, -0.75f, - 0.f, -1.5f, -3.f, - }; - set_values(input, input_vec); - - std::vector scale_vec = { - 3.1f, 0.2f, 0.17f, - 10.f, -3.f, 1.f, - }; - set_values(scale_input, scale_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x3x2x2_scale_yxfb_bfyx_same_size_padding) { - // Scale : 2x2x1x1 - // Input : 2x2x1x1 - // Output : 2x2x1x1 - // Output Padding: 2x2 - // Input Padding: 2x1 (with reorder) - - // Input: - // 1 2 - // 3 4 - - // - // Scale: - // 0.1 0.2 - // 0.6 0.5 - - const auto& engine = get_test_engine(); - std::vector formats_to_test = { format::yxfb , format::bfyx }; - - for (std::vector::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it) - { - std::cout << "Testing format: " << format::order(*it) << std::endl; - - tensor input_tensor(1, 1, 2, 2); - - auto input = memory::allocate(engine, { data_types::f32, *it, input_tensor }); - auto scale_input = memory::allocate(engine, { data_types::f32, *it, input_tensor }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 }))); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "reorder", "scale_input", {}, padding( { 0, 0, 2, 2 }, 0 ))); - - std::vector input_vec = { 1.f, 2.f, 3.f, 4.f }; - set_values(input, input_vec); - - std::vector scale_input_vec = { 0.1f, 0.2f, 0.6f, 0.5f }; - set_values(scale_input, scale_input_vec); - - std::vector expected = { - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.1f, 0.4f, 0.f, 0.f, - 0.f, 0.f, 1.8f, 2.0f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - }; - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < expected.size(); ++i) { - EXPECT_NEAR(output_ptr[i], expected[i], 1e-05F); - } - } -} - -TEST(scale_gpu, basic_in2x2x2x3x2_scale_same_size_bfzyx) { - // Scale : 2x2x2x3x2 - // Input : 2x2x2x3x2 - // Output : 2x2x2x3x2 - - const auto& engine = get_test_engine(); - - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 2, 3, 2 } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 2, 2, 2, 3, 2 } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 2.f, -10.f, 0.f, 0.f, -11.f, - 3.f, 4.f, -14.f, 0.5f, -0.5f, -15.f, - 5.f, 6.f, -12.f, 1.5f, 5.2f, -13.f, - 7.f, 8.f, -16.f, 12.f, 8.f, -17.f, - 1.f, 2.f, -10.f, 0.f, 0.f, -11.f, - 3.f, 4.f, -14.f, 0.5f, -0.5f, -15.f, - 5.f, 6.f, -12.f, 1.5f, 5.2f, -13.f, - 7.f, 8.f, -16.f, 12.f, 8.f, -17.f - }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f, - 0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f, - 1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f, - 1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f, - 0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f, - 0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f, - 1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f, - 1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (unsigned int i = 0; i < input_vec.size(); ++i) { - EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F); - } -} - -TEST(scale_gpu, basic_in2x2x2x2x3_scale_z) { - // Scale : 2 - // Input : 2x2x2x2x3 - // Output : 2x2x2x2x3 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto z_size = 2; - auto y_size = 2; - auto x_size = 3; - - auto input = memory::allocate(engine, { data_types::f32,format::bfzyx,{ batch_num, feature_num, x_size, y_size, z_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1,1,1,1,z_size } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f, - 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f - }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, - 0.2f, - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int i = 0; i < batch_num; ++i) { //B - for (int j = 0; j < feature_num; ++j) { //F - for (int m = 0; m < z_size; ++m) { //Z - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = l + x_size * (k + y_size * (m + z_size * (j + feature_num*i))); - int linear_id_scale = m; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } - } -} - -TEST(scale_gpu, basic_in2x2x2x2x3_scale_xyz) { - // Scale : 1x1x2x2x3 - // Input : 2x2x2x2x3 - // Output : 2x2x2x2x3 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto z_size = 2; - auto y_size = 2; - auto x_size = 3; - - auto input = memory::allocate(engine, { data_types::f32,format::bfzyx,{ batch_num, feature_num, x_size, y_size, z_size } }); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1,1, x_size, y_size, z_size } }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = { - 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f, - 1.f, 0.f, 5.f, 1.5f, - 2.f, 0.f, 6.f, 5.2f, - -10.f, -11.f, -12.f, -13.f, - 3.f, 0.5f, 7.f, 12.f, - 4.f, -0.5f, 8.f, 8.f, - -14.f, -15.f, -16.f, -17.f - }; - set_values(input, input_vec); - - std::vector scale_input_vec = { - 0.1f, - 0.2f, - 0.25f, - 0.6f, - 0.7f, - 0.75f, - 0.1f, - 0.2f, - 0.25f, - 0.6f, - 0.7f, - 0.75f - }; - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int i = 0; i < batch_num; ++i) { //B - for (int j = 0; j < feature_num; ++j) { //F - for (int m = 0; m < z_size; ++m) { //Z - for (int k = 0; k < y_size; ++k) { //Y - for (int l = 0; l < x_size; ++l) { //X - int linear_id = l + x_size * (k + y_size * (m + z_size * (j + feature_num*i))); - int linear_id_scale = l + x_size * (k + y_size * m); - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F); - } - } - } - } - } -} - -TEST(scale_gpu, basic_in2x2x2x2x2x3_scale_4d) { - // Scale : 1x2x1x1 - // Input : 2x2x2x2x2x3 - // Output : 2x2x2x2x2x3 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto w_size = 2; - auto z_size = 2; - auto y_size = 2; - auto x_size = 3; - - tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, x_size, y_size, z_size, w_size }); - tensor scale_size = tensor(format::bfyx, { 1, feature_num, 1, 1 }); - - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, in_size}); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, scale_size }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = generate_random_1d(in_size.count(), -10, 10); - set_values(input, input_vec); - - std::vector scale_input_vec = generate_random_1d(scale_input.count(), -10, 10); - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int b = 0; b < batch_num; ++b) { - for (int f = 0; f < feature_num; ++f) { - for (int w = 0; w < w_size; ++w) { - for (int z = 0; z < z_size; ++z) { - for (int y = 0; y < y_size; ++y) { - for (int x = 0; x < x_size; ++x) { - int linear_id = x + x_size * (y + y_size * (z + z_size * (w + w_size * (f + feature_num * b)))); - int linear_id_scale = f; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05f); - } - } - } - } - } - } -} - -TEST(scale_gpu, basic_in2x2x2x2x2x3_scale_6d) { - // Scale : 1x2x1x1x1x1 - // Input : 2x2x2x2x2x3 - // Output : 2x2x2x2x2x3 - - const auto& engine = get_test_engine(); - - auto batch_num = 2; - auto feature_num = 2; - auto w_size = 2; - auto z_size = 2; - auto y_size = 2; - auto x_size = 3; - - tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, x_size, y_size, z_size, w_size }); - tensor scale_size = tensor(format::bfwzyx, { 1, feature_num, 1, 1, 1, 1 }); - - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, in_size}); - auto scale_input = memory::allocate(engine, { data_types::f32, format::bfwzyx, scale_size }); - - topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input", scale_input.get_layout())); - topology.add(scale("scale", "input", "scale_input")); - - std::vector input_vec = generate_random_1d(in_size.count(), -10, 10); - set_values(input, input_vec); - - std::vector scale_input_vec = generate_random_1d(scale_input.count(), -10, 10); - set_values(scale_input, scale_input_vec); - - network network(engine, topology); - - network.set_input_data("input", input); - network.set_input_data("scale_input", scale_input); - - auto outputs = network.execute(); - - auto output = outputs.at("scale").get_memory(); - auto output_ptr = output.pointer(); - - for (int b = 0; b < batch_num; ++b) { - for (int f = 0; f < feature_num; ++f) { - for (int w = 0; w < w_size; ++w) { - for (int z = 0; z < z_size; ++z) { - for (int y = 0; y < y_size; ++y) { - for (int x = 0; x < x_size; ++x) { - int linear_id = x + x_size * (y + y_size * (z + z_size * (w + w_size * (f + feature_num * b)))); - int linear_id_scale = f; - EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05f); - } - } - } - } - } - } -} - -////////////////////////////////////////////////////////////////////////////// -// // -// Exhaustive Negative Matrix tests // -// // -////////////////////////////////////////////////////////////////////////////// - -//TODO: this should be done using TEST_P or some equivallent construct -static network setup_scale_network( - const data_types dt, - const tensor input_tensor, - const tensor scale_tensor, - const tensor bias_tensor, - const format f, - const format of, - bool pass_bias //TODO: a WA for lack of std::optional bias -) -{ - const auto& engine = get_test_engine(); - topology topology; - - auto input_mem = memory::allocate(engine, { dt, f, input_tensor }); - auto scale_mem = memory::allocate(engine, { dt, of, scale_tensor }); - topology.add(input_layout("input", input_mem.get_layout())); - topology.add(input_layout("scale_input", scale_mem.get_layout())); - - if (pass_bias) - { - auto bias_mem = memory::allocate(engine, { dt, f, bias_tensor }); - topology.add(input_layout("bias_input", bias_mem.get_layout())); - - topology.add(scale("scale", "input", "scale_input", "bias_input" )); - } - else - { - topology.add(scale("scale", "input", "scale_input")); - } -//TODO: this will be supported after the API change -// else -// { -// assert(!pass_bias); -// -// topology.add(scale("scale", "input", "scale_input")); -// } - - return network(engine, topology); -} - -TEST(NegativeScaleTest, TestAll) { - auto d = data_types::f32; - auto f = format::bfyx; - auto of = format::yxfb; - - std::vector t { 3, 4, 5, 6 }; - std::vector t2 { 5, 6, 4, 3 }; - - // broadcast rules mean that either the dim size is equal to input dim or is 1 - std::vector> good_ts = - { - { 1, 4, 5, 6 }, { 3, 1, 5, 6 }, { 3, 4, 1, 6 }, { 3, 4, 5, 1 }, - { 1, 1, 5, 6 }, { 1, 4, 1, 6 }, { 1, 4, 5, 1 }, { 3, 1, 1, 6 }, { 3, 1, 5, 1 }, { 3, 4, 1, 1 }, - { 1, 1, 1, 6 }, { 1, 1, 5, 1 }, { 1, 4, 1, 1 }, { 3, 1, 1, 1 } - }; - std::vector> bad_ts = { { 2, 4, 5, 6 }, { 3, 2, 5, 6 }, { 3, 4, 2, 6 }, { 3, 4, 5, 2 } }; - - //TODO: should be ASSERT_THROW(statement, exception_type) - but what exception type? - ASSERT_ANY_THROW(setup_scale_network(d, tensor{ }, tensor{ }, tensor{ }, f, of, false)); - ASSERT_ANY_THROW(setup_scale_network(d, tensor{ }, tensor{ }, tensor{ }, f, of, true)); - - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t2), tensor(t), f, of, true)); - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t2), tensor(t), f, of, false)); - - // make sure that it's the input that's masked in the scale/bias with a "1", not ther other way around - for (const auto & good : good_ts) - { - ASSERT_ANY_THROW(setup_scale_network(d, tensor(good), tensor(t), tensor(t), f, of, true)); - } - - // sizes must either be equal to input or at most have - for (const auto & bad : bad_ts) - { - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(bad), tensor(t), f, of, true)); - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t), tensor(bad), f, of, true)); - - for (const auto & good : good_ts) - { - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(bad), tensor(good), f, of, true)); - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(good), tensor(bad), f, of, true)); - } - } - - // we expect the broadcast mask to be identical for scale and bias, when present - for (unsigned i = 0; i < good_ts.size(); ++i) - for (unsigned j = 0; j < good_ts.size(); ++j) - if (i != j) - { - ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(good_ts[i]), tensor(good_ts[j]), f, of, true)); - } - -} - -////////////////////////////////////////////////////////////////////////////// -// // -// Exhaustive Positive Matrix tests // -// // -////////////////////////////////////////////////////////////////////////////// - -using namespace cldnn; - -class scale_test : public tests::generic_test -{ -public: - static void TearDownTestCase() - { - all_generic_params.clear(); - all_layer_params.clear(); - } - - //TODO: use an enum instead of int i - static std::vector> generate_specific_test_params(int variant) - { - std::vector> all_layer_params; - - switch(variant) - { - case 0: all_layer_params.emplace_back(new scale("scale", "input0", "input1")); break; - case 1: all_layer_params.emplace_back(new scale("scale", "input0", "input1", "input2")); break; - // case 3: all_layer_params.push_back(new scale("scale", "input0", "input1", true)); // This case should be checked by negative_scale_test - // case 4: all_layer_params.push_back(new scale("scale", "input0", "input1", false)); // This case should be checked by negative_scale_test - default: assert(0); - } - - return all_layer_params; - } - - static std::vector> generate_generic_test_params(int variant) - { - assert(!variant || variant == 1); - - std::vector> all_generic_params; - - auto data_types = test_data_types(); - - for (cldnn::data_types dt : data_types) - for (tensor & t : test_input_sizes) - { - std::vector> attempted_dims; - - for (int32_t b : test_batch_sizes) - for (auto f : test_feature_sizes) - for (int mask = 0; mask < 16; ++mask) //TODO: do we want to restrict it to some smaller subset like for (auto mask : { 0, 1, 3, 7, 15, 5, 10})? the problem is that because of the layout we might miss some interesting combinations since this is effectively hardcoded int he kernels - { - const int w = t.spatial[0]; - const int h = t.spatial[1]; - - const auto mb = mask & 0x8 ? b : 1; - const auto mf = mask & 0x4 ? f : 1; - const auto mh = mask & 0x2 ? h : 1; - const auto mw = mask & 0x1 ? w : 1; - - // avoid adding test cases with different masks leading to the same dimensions - if(attempted_dims.end() == std::find_if(attempted_dims.begin(), attempted_dims.end(), [=](const std::vector & arr) { return arr[0] == mb && arr[1] == mf && arr[2] == mh && arr[3] == mw; })) - { - std::vector tmp { mb, mf, mh, mw }; - attempted_dims.push_back(tmp); - - std::shared_ptr tp = std::make_shared(); - tp->data_type = dt; - - tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( b, f, w, h ))); - tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( mb, mf, mw, mh ))); - if (variant) - tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor( mb, mf, mw, mh ))); - - all_generic_params.emplace_back(tp); - } - } - } - - return all_generic_params; - } - - static std::vector, std::shared_ptr>> generate_all_test_params() - { - std::vector, std::shared_ptr>> res; - - for (int variant = 0; variant <= 1; ++variant) - { - auto tpv = generate_generic_test_params(variant); - auto pv = generate_specific_test_params(variant); - - for (auto & tp : tpv) - all_generic_params.emplace_back(tp); - - for (auto & p : pv) - all_layer_params.emplace_back(p); - - for (auto & tp : tpv) - for (auto & p: pv) - res.emplace_back(tp, p); - } - - return res; - } - - virtual bool is_format_supported(cldnn::format format) override - { - return format == cldnn::format::bfyx; - } - - template - memory generate_reference_typed(const std::vector & inputs) - { - assert(inputs.size() == 3 || inputs.size() == 2); - const bool bias_input_present = inputs.size() == 3; - - const memory & input = inputs[0]; - const memory & scale = inputs[1]; - const memory * bias = bias_input_present ? &inputs[2] : nullptr; - assert(!bias_input_present || bias); - - //Output is bfyx - auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, cldnn::format::bfyx, input.get_layout().size )); - - const auto in0_mem = input.pointer(); - const auto in1_mem = scale.pointer(); - const auto in2_mem_ptr = bias ? std::make_shared>(*bias) : nullptr; - const Type * const in2_mem = in2_mem_ptr ? in2_mem_ptr->data() : nullptr; //TODO: is the condition needed or is it nullptr anyway? - auto out_mem = output.pointer(); - - const auto input_sizes = input.get_layout().size.sizes(cldnn::format::bfyx); - - const int in0_b = input_sizes[0]; - const int in0_f = input_sizes[1]; - const int in0_h = input_sizes[2]; - const int in0_w = input_sizes[3]; - - { // asserting dims - const auto output_sizes = output.get_layout().size.sizes(cldnn::format::bfyx); - const int out_b = output_sizes[0]; (void) out_b; - const int out_f = output_sizes[1]; (void) out_f; - const int out_h = output_sizes[2]; (void) out_h; - const int out_w = output_sizes[3]; (void) out_w; - - const auto scale_sizes = scale.get_layout().size.sizes(cldnn::format::bfyx); - const int in1_b = scale_sizes[0]; (void) in1_b; - const int in1_f = scale_sizes[1]; (void) in1_f; - const int in1_h = scale_sizes[2]; (void) in1_h; - const int in1_w = scale_sizes[3]; (void) in1_w; - // input and output dims must match - assert(in0_b == out_b && in0_f == out_f && in0_h == out_h && in0_w == out_w); - - // scale/bias dims must be equal to in/out or be 1 for broadcast - assert(in1_b == 1 || in1_b == in0_b); - assert(in1_f == 1 || in1_f == in0_f); - assert(in1_h == 1 || in1_h == in0_h); - assert(in1_w == 1 || in1_w == in0_w); - - if (bias) - { - const auto bias_sizes = bias->get_layout().size.sizes(cldnn::format::bfyx); - const int in2_b = bias_sizes[0]; (void) in2_b; - const int in2_f = bias_sizes[1]; (void) in2_f; - const int in2_h = bias_sizes[2]; (void) in2_h; - const int in2_w = bias_sizes[3]; (void) in2_w; - - // scale/bias dims must be equal to in/out or be 1 for broadcast - assert(in2_b == 1 || in2_b == in1_b); - assert(in2_b == 1 || in2_f == in1_f); - assert(in2_b == 1 || in2_h == in1_h); - assert(in2_b == 1 || in2_w == in1_w); - } - } - - const auto input_desc = get_linear_memory_desc(input.get_layout()); - const auto output_desc = get_linear_memory_desc(output.get_layout()); - const auto scale_desc = get_linear_memory_desc(scale.get_layout()); - const auto bias_desc = - bias ? - get_linear_memory_desc(bias->get_layout()) : - memory_desc(); - - for (int n = 0; n < in0_b; ++n) - for (int c = 0; c < in0_f; ++c) - for (int y = 0; y < in0_h; ++y) - for (int x = 0; x < in0_w; ++x) - { - const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc); - const size_t in1_idx = get_linear_index_with_broadcast(scale.get_layout(), n, c, y, x, scale_desc); - const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, output_desc); - - out_mem[out_idx] = in0_mem[in0_idx] * in1_mem[in1_idx]; - - if (bias) - { - const size_t in2_idx = get_linear_index_with_broadcast(bias->get_layout(), n, c, y, x, bias_desc); - out_mem[out_idx] += in2_mem[in2_idx]; - } - } - return output; - } - - virtual memory generate_reference(const std::vector & inputs) override - { - if (generic_params->data_type == data_types::f32) - { - return generate_reference_typed(inputs); - } - else - { - return generate_reference_typed(inputs); - } - } - - static std::string custom_param_name(const ::testing::TestParamInfo, std::shared_ptr>>& info) - { - std::stringstream res; - - const auto & p = std::get<0>(info.param); - - assert (p->data_type == data_types::f32 || - p->data_type == data_types::f16); - - res << info.index - << "_" << (p->data_type == data_types::f32 ? "f32" : "f16"); - - for (unsigned i = 0; i < p->input_layouts.size(); ++i) - { - if (i == 0) res << "_Input"; - if (i == 1) res << "_ScaleInput"; - if (i == 2) res << "_BiasInput"; - - const auto chans = format::traits(p->fmt).order; - - for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j) - { - res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j]; - } - } - return res.str(); - } - -private: - static std::vector> all_generic_params; - static std::vector> all_layer_params; -}; - -std::vector> scale_test::all_layer_params = {}; -std::vector> scale_test::all_generic_params = {}; - -TEST_P(scale_test, SCALE) -{ - run_single_test(); -} - -INSTANTIATE_TEST_CASE_P(DISABLED_SCALE, - scale_test, - ::testing::ValuesIn(scale_test::generate_all_test_params()), - scale_test::custom_param_name); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp index e190e590241..02d5089f9c5 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_elements_update_gpu_test.cpp @@ -2,17 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -42,11 +40,11 @@ TEST(scatter_elements_update_gpu_fp16, d2411_axisF) { // 10.f, 11.f, 5.f, 4.f, // 1.f, 7.f, 12.f, 13.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Updates auto axis = cldnn::scatter_elements_update::scatter_elements_update_axis::along_f; set_values(input1, { @@ -65,9 +63,9 @@ TEST(scatter_elements_update_gpu_fp16, d2411_axisF) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_elements_update("scatter_elements_update", "InputData", "InputIndices", "InputUpdates", axis) ); @@ -81,7 +79,7 @@ TEST(scatter_elements_update_gpu_fp16, d2411_axisF) { auto outputs = network.execute(); auto output = outputs.at("scatter_elements_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 10.f, 11.f, 5.f, 4.f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_nd_update_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_nd_update_gpu_test.cpp index 26dc684f5e5..9977777fa0e 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_nd_update_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_nd_update_gpu_test.cpp @@ -2,29 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 2, 4, 3 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 2, 2, 4, 3, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 2, 4, 3 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 2, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 2, 2, 4, 3, 2 } }); // updates set_values(input1, { // 0 @@ -90,9 +88,9 @@ TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 3) ); @@ -107,7 +105,7 @@ TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -115,11 +113,11 @@ TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) { } TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 2, 4, 3 } }); // data 2x2x3x4x2 (bfzyx) - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 4, 1, 1, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 2, 4, 3 } }); // data 2x2x3x4x2 (bfzyx) + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 4, 1, 1, 2 } }); // updates set_values(input1, { // 0 @@ -174,9 +172,9 @@ TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -191,7 +189,7 @@ TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -199,11 +197,11 @@ TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) { } TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 4, 2 } }); // data 2x3x2x4 (bfyx) - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 4, 2 } }); // data 2x3x2x4 (bfyx) + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // updates set_values(input1, { FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), @@ -238,9 +236,9 @@ TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -255,7 +253,7 @@ TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -263,11 +261,11 @@ TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) { } TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 4 } }); // data 3x3x4 (bfy) - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 3, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 1, 4 } }); // data 3x3x4 (bfy) + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 3, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); // updates set_values(input1, { FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), @@ -309,9 +307,9 @@ TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -326,7 +324,7 @@ TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -334,11 +332,11 @@ TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) { } TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 3, 4, 2 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 1, 2, 2, 3, 4, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 3, 4, 2 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 1, 2, 2, 3, 4, 2 } }); // updates set_values(input1, { // 0, 0, 0 @@ -440,9 +438,9 @@ TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -457,7 +455,7 @@ TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -465,11 +463,11 @@ TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) { } TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 3, 4, 2 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 3, 4, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 3, 4, 2 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 3, 4, 2 } }); // updates set_values(input1, { // 0 @@ -537,9 +535,9 @@ TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -554,7 +552,7 @@ TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -562,11 +560,11 @@ TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) { } TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 4, 2 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 4, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 4, 2 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 4, 2 } }); // updates set_values(input1, { // 0 @@ -616,9 +614,9 @@ TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -633,7 +631,7 @@ TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -641,11 +639,11 @@ TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) { } TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 1, 2, 2, 3, 4, 2 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 1, 3, 4, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 1, 2, 2, 3, 4, 2 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 1, 3, 4, 2 } }); // updates set_values(input1, { //0,0 @@ -714,9 +712,9 @@ TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -731,7 +729,7 @@ TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -739,11 +737,11 @@ TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) { } TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 1, 2, 3, 4, 2 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 1, 3, 4 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 1, 2, 3, 4, 2 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 1, 3, 4 } }); // updates set_values(input1, { @@ -782,9 +780,9 @@ TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -799,7 +797,7 @@ TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -808,11 +806,11 @@ TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) { TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 4, 2 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 4, 1, 2 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 4, 2 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 4, 1, 2 } }); // updates set_values(input1, { @@ -848,9 +846,9 @@ TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -865,7 +863,7 @@ TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -873,11 +871,11 @@ TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) { } TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 4 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // updates set_values(input1, { @@ -913,9 +911,9 @@ TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -930,7 +928,7 @@ TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -938,11 +936,11 @@ TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) { } TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 1, 1, 1 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 1, 1, 1 } }); // updates set_values(input1, { @@ -968,9 +966,9 @@ TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -985,7 +983,7 @@ TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -993,11 +991,11 @@ TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) { } TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 4, 1 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 4, 1 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 3, 4, 1 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 4, 1 } }); // updates set_values(input1, { @@ -1043,9 +1041,9 @@ TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1060,7 +1058,7 @@ TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -1069,11 +1067,11 @@ TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) { TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // data - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 4, 1, 1 } }); // data + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // indices + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // updates set_values(input1, { @@ -1098,9 +1096,9 @@ TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1115,7 +1113,7 @@ TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -1123,11 +1121,11 @@ TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) { } TEST(scatter_nd_update_gpu_fp16_test1, data1_indice1_update1) { - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 8, 1, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 8, 1, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 1, 1, 1 } }); // Updates set_values(input1, { @@ -1147,9 +1145,9 @@ TEST(scatter_nd_update_gpu_fp16_test1, data1_indice1_update1) { }; topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1164,7 +1162,7 @@ TEST(scatter_nd_update_gpu_fp16_test1, data1_indice1_update1) { auto outputs = network.execute(); auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); @@ -1181,11 +1179,11 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2311) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 6, 6, 1, 6 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 6, 6, 1, 6 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Updates set_values(input1, { FLOAT16(100.f), FLOAT16(101.f), FLOAT16(102.f), FLOAT16(103.f), FLOAT16(104.f), FLOAT16(105.f), @@ -1242,9 +1240,9 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2311) { topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1260,7 +1258,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2311) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, 102.f, 103.f, 104.f, 105.f, @@ -1319,11 +1317,11 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2211) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 6, 6, 1, 6 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 6, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 6, 6, 1, 6 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 6, 1, 1 } }); // Updates set_values(input1, { @@ -1381,9 +1379,9 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2211) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1399,7 +1397,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2211) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, 102.f, 103.f, 104.f, 105.f, @@ -1458,11 +1456,11 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 6, 6, 1, 6 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 6, 1, 6 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 6, 6, 1, 6 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 6, 1, 6 } }); // Updates set_values(input1, { @@ -1531,9 +1529,9 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1549,7 +1547,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 777.f, 999.f, 999.f, 999.f, 999.f, 999.f, @@ -1609,11 +1607,11 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2411) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Updates set_values(input1, { @@ -1652,9 +1650,9 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2411) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1670,7 +1668,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2411) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -1712,11 +1710,11 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2311) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Updates set_values(input1, { @@ -1755,9 +1753,9 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2311) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1773,7 +1771,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2311) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -1815,11 +1813,11 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2211) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Updates set_values(input1, { @@ -1864,9 +1862,9 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2211) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1882,7 +1880,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2211) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -1924,11 +1922,11 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 3, 2, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 2, 3 } }); // Updates set_values(input1, { @@ -1981,9 +1979,9 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -1999,7 +1997,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 666.f, 666.f, @@ -2040,11 +2038,11 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i25111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 5, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 1, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 5, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 1, 1, 1, 1 } }); // Updates set_values(input1, { @@ -2115,9 +2113,9 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i25111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -2133,7 +2131,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i25111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, 102.f, @@ -2207,11 +2205,11 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i24111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 4, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 4, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 1, 1, 1 } }); // Updates set_values(input1, { @@ -2283,9 +2281,9 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i24111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -2301,7 +2299,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i24111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, 102.f, @@ -2375,11 +2373,11 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i23111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 1, 1, 3 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 1, 1, 3 } }); // Updates set_values(input1, { @@ -2454,9 +2452,9 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i23111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -2472,7 +2470,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i23111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, 102.f, @@ -2546,11 +2544,11 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i22111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 3, 1, 3, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 3, 1, 3, 2 } }); // Updates set_values(input1, { @@ -2637,9 +2635,9 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i22111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -2655,7 +2653,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i22111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, 102.f, @@ -2729,11 +2727,11 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i21111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 2, 3, 2, 3 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 3, 2, 3, 2, 3 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 2, 3, 2, 3 } }); // Updates set_values(input1, { @@ -2838,9 +2836,9 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i21111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -2856,7 +2854,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i21111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 555.f, 555.f, 555.f, @@ -2930,12 +2928,12 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i261111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); // memory order is bfxyzw - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 6, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 1, 1, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 6, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 1, 1, 1, 1, 1 } }); // Updates set_values(input1, { @@ -2998,9 +2996,9 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i261111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -3016,7 +3014,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i261111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -3082,12 +3080,12 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i251111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); // memory order is bfxyzw - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 5, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 5, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 1, 1 } }); // Updates set_values(input1, { @@ -3151,9 +3149,9 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i251111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -3169,7 +3167,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i251111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -3235,12 +3233,12 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i241111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); // memory order is bfxyzw - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 4, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 1, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 4, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 1, 2 } }); // Updates set_values(input1, { @@ -3307,9 +3305,9 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i241111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -3325,7 +3323,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i241111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -3392,12 +3390,12 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i231111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); // memory order is bfxyzw - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 3, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 3, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 2, 2 } }); // Updates set_values(input1, { @@ -3470,9 +3468,9 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i231111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -3488,7 +3486,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i231111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -3554,12 +3552,12 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i221111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); // memory order is bfxyzw - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 1, 2, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 1, 2, 2, 2 } }); // Updates set_values(input1, { @@ -3644,9 +3642,9 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i221111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -3662,7 +3660,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i221111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 100.f, 101.f, @@ -3727,12 +3725,12 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i211111) { // Input values in fp16 // - engine engine; + auto& engine = get_test_engine(); // memory order is bfxyzw - auto input1 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 1, 1, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 1, 1, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, { 2, 2, 2, 2, 2, 2 } }); // Updates set_values(input1, { @@ -3841,9 +3839,9 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i211111) { }); topology topology; - topology.add(input_layout("InputData", input1.get_layout())); - topology.add(input_layout("InputIndices", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputData", input1->get_layout())); + topology.add(input_layout("InputIndices", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_nd_update("scatter_nd_update", "InputData", "InputIndices", "InputUpdates", 2) ); @@ -3859,7 +3857,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i211111) { auto output = outputs.at("scatter_nd_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 777.f, 777.f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_update_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_update_gpu_test.cpp index 01f2c7f02ef..937695760e3 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_update_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scatter_update_gpu_test.cpp @@ -2,17 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include +#include "test_utils.h" -#include -#include -#include -#include -#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -38,14 +33,14 @@ TEST(scatter_update_gpu_fp16, d2411_axisB) { // 0.f, 0.f, 0.f, 0.f // // Output: - // 3.f, 6.f, 5.f, 4.f, + // 3.f, 6.f, 5.f, 4.f, // 1.f, 7.f, 2.f, 9.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 4, 1, 1 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_b; set_values(input1, { @@ -63,34 +58,34 @@ TEST(scatter_update_gpu_fp16, d2411_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - - + + network network(engine, topology); + + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); - + auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 3.f, 6.f, 5.f, 4.f, + 3.f, 6.f, 5.f, 4.f, 1.f, 7.f, 2.f, 9.f }; for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); - } + } } TEST(scatter_update_gpu_fp32, d8111_axisB) { @@ -114,11 +109,11 @@ TEST(scatter_update_gpu_fp32, d8111_axisB) { // 1.f, 11.f, 3.f, 10.f, 9.f, 6.f, 7.f, 12.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 8, 1, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 4, 1, 1, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 8, 1, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 4, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx, { 4, 1, 1, 1 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_b; set_values(input1, { @@ -134,32 +129,32 @@ TEST(scatter_update_gpu_fp32, d8111_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - - + + network network(engine, topology); + + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - - auto outputs = network.execute(); + + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1.f, 11.f, 3.f, 10.f, 9.f, 6.f, 7.f, 12.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], output_ptr[i]); - } + } } TEST(scatter_update_gpu_fp16, d4311_axisB) { @@ -193,11 +188,11 @@ TEST(scatter_update_gpu_fp16, d4311_axisB) { // 6.f, 6.f, 6.f, // 7.f, 7.f, 7.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 4, 3, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 4, 3, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_b; set_values(input1, { @@ -221,34 +216,34 @@ TEST(scatter_update_gpu_fp16, d4311_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - - auto outputs = network.execute(); + + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 9.f, 10.f, 11.f, + 9.f, 10.f, 11.f, 8.f, 8.f, 8.f, - 6.f, 6.f, 6.f, + 6.f, 6.f, 6.f, 7.f, 7.f, 7.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); - } + } } TEST(scatter_update_gpu_fp16, d2521_axisF) { @@ -302,24 +297,24 @@ TEST(scatter_update_gpu_fp16, d2521_axisF) { // 16.f, 17.f, // 141.f, 151.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 5, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 5, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 2, 2 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_f; set_values(input1, { - FLOAT16(0.0f), FLOAT16(1.0f), - FLOAT16(2.0f), FLOAT16(3.0f), + FLOAT16(0.0f), FLOAT16(1.0f), + FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), - FLOAT16(6.0f), FLOAT16(7.0f), + FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f), FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), - FLOAT16(14.0f), FLOAT16(15.0f), - FLOAT16(16.0f), FLOAT16(17.0f), + FLOAT16(14.0f), FLOAT16(15.0f), + FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f) }); @@ -329,35 +324,35 @@ TEST(scatter_update_gpu_fp16, d2521_axisF) { }); set_values(input3, { - FLOAT16(21.0f), FLOAT16(31.0f), + FLOAT16(21.0f), FLOAT16(31.0f), FLOAT16(41.0f), FLOAT16(51.0f), - FLOAT16(61.0f), FLOAT16(71.0f), + FLOAT16(61.0f), FLOAT16(71.0f), FLOAT16(81.0f), FLOAT16(91.0f), FLOAT16(101.0f), FLOAT16(111.0f), FLOAT16(121.0f), FLOAT16(131.0f), - FLOAT16(141.0f), FLOAT16(151.0f), + FLOAT16(141.0f), FLOAT16(151.0f), FLOAT16(161.0f), FLOAT16(171.0f) }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 21.f, 31.f, @@ -372,7 +367,7 @@ TEST(scatter_update_gpu_fp16, d2521_axisF) { 16.f, 17.f, 141.f, 151.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); } @@ -415,11 +410,11 @@ TEST(scatter_update_gpu_fp16, d2241_axisY) { // 90.f, 120.f, 100.f, 110.f, // 130.f, 160.f, 140.f, 150.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 4 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 1, 4 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 2, 2, 2 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_y; set_values(input1, { @@ -435,35 +430,35 @@ TEST(scatter_update_gpu_fp16, d2241_axisY) { }); set_values(input3, { - FLOAT16(0.0f), FLOAT16(20.0f), + FLOAT16(0.0f), FLOAT16(20.0f), FLOAT16(30.0f), FLOAT16(40.0f), - FLOAT16(50.0f), FLOAT16(60.0f), + FLOAT16(50.0f), FLOAT16(60.0f), FLOAT16(70.0f), FLOAT16(80.0f), FLOAT16(90.0f), FLOAT16(100.0f), FLOAT16(110.0f), FLOAT16(120.0f), - FLOAT16(130.0f), FLOAT16(140.0f), + FLOAT16(130.0f), FLOAT16(140.0f), FLOAT16(150.0f), FLOAT16(160.0f) }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 40.f, 20.f, 30.f, @@ -472,7 +467,7 @@ TEST(scatter_update_gpu_fp16, d2241_axisY) { 130.f, 160.f, 140.f, 150.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); } @@ -486,11 +481,11 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) { // Output : 8x2x20x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 8, 2, 1, 20 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 3, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 20, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 8, 2, 1, 20 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 3, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfyx, { 2, 3, 20, 2 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_b; set_values(input1, { @@ -510,8 +505,8 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) { FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f), - FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), - + FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), + FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f),FLOAT16(0.0f), FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f),FLOAT16(1.0f), FLOAT16(1.0f), @@ -525,43 +520,43 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) { }); set_values(input3, { - FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4), FLOAT16(5), FLOAT16(6), FLOAT16(7), FLOAT16(8), FLOAT16(9), FLOAT16(10), FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18), FLOAT16(19), + FLOAT16(0), FLOAT16(1), FLOAT16(2), FLOAT16(3), FLOAT16(4), FLOAT16(5), FLOAT16(6), FLOAT16(7), FLOAT16(8), FLOAT16(9), FLOAT16(10), FLOAT16(11), FLOAT16(12), FLOAT16(13), FLOAT16(14), FLOAT16(15), FLOAT16(16), FLOAT16(17), FLOAT16(18), FLOAT16(19), FLOAT16(20), FLOAT16(21), FLOAT16(22), FLOAT16(23), FLOAT16(24), FLOAT16(25), FLOAT16(26), FLOAT16(27), FLOAT16(28), FLOAT16(29), FLOAT16(30), FLOAT16(31), FLOAT16(32), FLOAT16(33), FLOAT16(34), FLOAT16(35), FLOAT16(36), FLOAT16(37), FLOAT16(38), FLOAT16(39), - + FLOAT16(40), FLOAT16(41), FLOAT16(42), FLOAT16(43), FLOAT16(44), FLOAT16(45), FLOAT16(46), FLOAT16(47), FLOAT16(48), FLOAT16(49), FLOAT16(50), FLOAT16(51), FLOAT16(52), FLOAT16(53), FLOAT16(54), FLOAT16(55), FLOAT16(56), FLOAT16(57), FLOAT16(58), FLOAT16(59), FLOAT16(60), FLOAT16(61), FLOAT16(62), FLOAT16(63), FLOAT16(64), FLOAT16(65), FLOAT16(66), FLOAT16(67), FLOAT16(68), FLOAT16(69), FLOAT16(70), FLOAT16(71), FLOAT16(72), FLOAT16(73), FLOAT16(74), FLOAT16(75), FLOAT16(76), FLOAT16(77), FLOAT16(78), FLOAT16(79), - + FLOAT16(80), FLOAT16(81), FLOAT16(82), FLOAT16(83), FLOAT16(84), FLOAT16(85), FLOAT16(86), FLOAT16(87), FLOAT16(88), FLOAT16(89), FLOAT16(90), FLOAT16(91), FLOAT16(92), FLOAT16(93), FLOAT16(94), FLOAT16(95), FLOAT16(96), FLOAT16(97), FLOAT16(98), FLOAT16(99), FLOAT16(100), FLOAT16(101), FLOAT16(102), FLOAT16(103), FLOAT16(104), FLOAT16(105), FLOAT16(106), FLOAT16(107), FLOAT16(108), FLOAT16(109), FLOAT16(110), FLOAT16(111), FLOAT16(112), FLOAT16(113), FLOAT16(114), FLOAT16(115), FLOAT16(116), FLOAT16(117), FLOAT16(118), FLOAT16(119), - + FLOAT16(120), FLOAT16(121), FLOAT16(122), FLOAT16(123), FLOAT16(124), FLOAT16(125), FLOAT16(126), FLOAT16(127), FLOAT16(128), FLOAT16(129), FLOAT16(130), FLOAT16(131), FLOAT16(132), FLOAT16(133), FLOAT16(134), FLOAT16(135), FLOAT16(136), FLOAT16(137), FLOAT16(138), FLOAT16(139), FLOAT16(140), FLOAT16(141), FLOAT16(142), FLOAT16(143), FLOAT16(144), FLOAT16(145), FLOAT16(146), FLOAT16(147), FLOAT16(148), FLOAT16(149), FLOAT16(150), FLOAT16(151), FLOAT16(152), FLOAT16(153), FLOAT16(154), FLOAT16(155), FLOAT16(156), FLOAT16(157), FLOAT16(158), FLOAT16(159), - + FLOAT16(160), FLOAT16(161), FLOAT16(162), FLOAT16(163), FLOAT16(164), FLOAT16(165), FLOAT16(166), FLOAT16(167), FLOAT16(168), FLOAT16(169), FLOAT16(170), FLOAT16(171), FLOAT16(172), FLOAT16(173), FLOAT16(174), FLOAT16(175), FLOAT16(176), FLOAT16(177), FLOAT16(178), FLOAT16(179), - FLOAT16(180), FLOAT16(181), FLOAT16(182), FLOAT16(183), FLOAT16(184), FLOAT16(185), FLOAT16(186), FLOAT16(187), FLOAT16(188), FLOAT16(189), FLOAT16(190), FLOAT16(191), FLOAT16(192), FLOAT16(193), FLOAT16(194), FLOAT16(195), FLOAT16(196), FLOAT16(197), FLOAT16(198), FLOAT16(199), + FLOAT16(180), FLOAT16(181), FLOAT16(182), FLOAT16(183), FLOAT16(184), FLOAT16(185), FLOAT16(186), FLOAT16(187), FLOAT16(188), FLOAT16(189), FLOAT16(190), FLOAT16(191), FLOAT16(192), FLOAT16(193), FLOAT16(194), FLOAT16(195), FLOAT16(196), FLOAT16(197), FLOAT16(198), FLOAT16(199), FLOAT16(200), FLOAT16(201), FLOAT16(202), FLOAT16(203), FLOAT16(204), FLOAT16(205), FLOAT16(206), FLOAT16(207), FLOAT16(208), FLOAT16(209), FLOAT16(210), FLOAT16(211), FLOAT16(212), FLOAT16(213), FLOAT16(214), FLOAT16(215), FLOAT16(216), FLOAT16(217), FLOAT16(218), FLOAT16(219), FLOAT16(220), FLOAT16(221), FLOAT16(222), FLOAT16(223), FLOAT16(224), FLOAT16(225), FLOAT16(226), FLOAT16(227), FLOAT16(228), FLOAT16(229), FLOAT16(230), FLOAT16(231), FLOAT16(232), FLOAT16(233), FLOAT16(234), FLOAT16(235), FLOAT16(236), FLOAT16(237), FLOAT16(238), FLOAT16(239) }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - + network network(engine, topology); - + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); - + auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, @@ -569,26 +564,26 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) { 40.f, 41.f, 42.f, 43.f, 44.f, 45.f, 46.f, 47.f, 48.f, 49.f, 50.f, 51.f, 52.f, 53.f, 54.f, 55.f, 56.f, 57.f, 58.f, 59.f, 60.f, 61.f, 62.f, 63.f, 64.f, 65.f, 66.f, 67.f, 68.f, 69.f, 70.f, 71.f, 72.f, 73.f, 74.f, 75.f, 76.f, 77.f, 78.f, 79.f, - + 120.f, 121.f, 122.f, 123.f, 124.f, 125.f, 126.f, 127.f, 128.f, 129.f,130.f, 131.f, 132.f, 133.f, 134.f, 135.f, 136.f, 137.f, 138.f, 139.f, 140.f, 141.f, 142.f, 143.f, 144.f, 145.f, 146.f, 147.f, 148.f, 149.f,150.f, 151.f, 152.f, 153.f, 154.f, 155.f, 156.f, 157.f, 158.f, 159.f, 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, 32.f, 33.f, 34.f , 35.f, 36.f, 37.f, 38.f, 39.f, - - 200.f, 201.f, 202.f, 203.f, 204.f, 205.f, 206.f, 207.f, 208.f, 209.f,210.f, 211.f, 212.f, 213.f, 214.f, 215.f, 216.f, 217.f, 218.f, 219.f, + + 200.f, 201.f, 202.f, 203.f, 204.f, 205.f, 206.f, 207.f, 208.f, 209.f,210.f, 211.f, 212.f, 213.f, 214.f, 215.f, 216.f, 217.f, 218.f, 219.f, 220.f, 221.f, 222.f, 223.f, 224.f, 225.f, 226.f,227.f, 228.f, 229.f, 230.f, 231.f, 232.f, 233.f, 234.f, 235.f, 236.f, 237.f, 238.f, 239.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f , 1.f, 1.f, 1.f, 1.f, 1.f, - 80.f, 81.f, 82.f, 83.f, 84.f, 85.f, 86.f, 87.f, 88.f, 89.f, 90.f, 91.f, 92.f, 93.f, 94.f, 95.f, 96.f, 97.f, 98.f, 99.f, + 80.f, 81.f, 82.f, 83.f, 84.f, 85.f, 86.f, 87.f, 88.f, 89.f, 90.f, 91.f, 92.f, 93.f, 94.f, 95.f, 96.f, 97.f, 98.f, 99.f, 100.f, 101.f, 102.f, 103.f, 104.f, 105.f, 106.f, 107.f, 108.f, 109.f,110.f, 111.f, 112.f, 113.f, 114.f, 115.f, 116.f, 117.f, 118.f, 119.f, - 160.f, 161.f, 162.f, 163.f, 164.f, 165.f, 166.f, 167.f, 168.f, 169.f, 170.f, 171.f, 172.f, 173.f, 174.f, 175.f, 176.f, 177.f, 178.f, 179.f, + 160.f, 161.f, 162.f, 163.f, 164.f, 165.f, 166.f, 167.f, 168.f, 169.f, 170.f, 171.f, 172.f, 173.f, 174.f, 175.f, 176.f, 177.f, 178.f, 179.f, 180.f, 181.f, 182.f, 183.f, 184.f, 185.f, 186.f, 187.f, 188.f, 189.f, 190.f, 191.f, 192.f, 193.f, 194.f, 195.f, 196.f, 197.f, 198.f, 199.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); } @@ -626,11 +621,11 @@ TEST(scatter_update_gpu_fp32, d2214_axisX) { // 90.f, 9.f, 80.f, 100.f, // 120.f, 13.f, 110.f, 130.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 4, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 4, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 1 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_x; set_values(input1, { @@ -652,23 +647,23 @@ TEST(scatter_update_gpu_fp32, d2214_axisX) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 30.f, 1.f, 20.f, 40.f, @@ -677,7 +672,7 @@ TEST(scatter_update_gpu_fp32, d2214_axisX) { 120.f, 13.f, 110.f, 130.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], output_ptr[i]); } @@ -718,11 +713,11 @@ TEST(scatter_update_gpu_int32, d6211_axisB) { // 9, 10, // 60, 70 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 6, 2, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 2, 1, 2 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::i32, format::bfyx, { 1, 2, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 6, 2, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 2, 1, 2 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 2, 2, 2 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_b; set_values(input1, { @@ -747,23 +742,23 @@ TEST(scatter_update_gpu_int32, d6211_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 1, 2, @@ -811,11 +806,11 @@ TEST(scatter_update_gpu_int32, d3151_axisY) { // 70, 6, 60, 50, 80, // 110, 11, 100, 90, 120 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 5 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 5 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 2, 2 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_y; set_values(input1, { @@ -839,30 +834,30 @@ TEST(scatter_update_gpu_int32, d3151_axisY) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 30, 1, 20, 200, 40, 70, 6, 60, 50, 80, 110, 11, 100, 90, 120 }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], output_ptr[i]); } @@ -880,7 +875,7 @@ TEST(scatter_update_gpu_fp32, d24111_axisF_bfzyx) { // 2.f, 0.f // // Updates: - // 1.f, 2.f, + // 1.f, 2.f, // 3.f, 4.f // // Dictionary: @@ -888,14 +883,14 @@ TEST(scatter_update_gpu_fp32, d24111_axisF_bfzyx) { // 0.f, 0.f, 0.f, 0.f // // Output: - // 2.f, 0.f, 1.f, 0.f, + // 2.f, 0.f, 1.f, 0.f, // 4.f, 0.f, 3.f, 0.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 1, 1, 2, 1 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 4, 1, 1 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 1, 1, 2, 1 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_f; set_values(input1, { @@ -913,29 +908,29 @@ TEST(scatter_update_gpu_fp32, d24111_axisF_bfzyx) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - - auto outputs = network.execute(); + + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { - 2.f, 0.f, 1.f, 0.f, + 2.f, 0.f, 1.f, 0.f, 4.f, 0.f, 3.f, 0.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], output_ptr[i]); } @@ -970,19 +965,19 @@ TEST(scatter_update_gpu_int32, d121251_bfwzyx_axisB) { // 0, 1, 2, 3, 4, // 5, 6, 7, 8, 9, // 10, 11, 12, 13, 14, - // 15, 16, 17, 18, 19 + // 15, 16, 17, 18, 19 // // Output: // 40, 30, 20, 3, 50, // 80, 70, 60, 8, 90, // 120, 110, 100, 13, 130, - // 160, 150, 140, 18, 170 + // 160, 150, 140, 18, 170 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfwzyx, tensor{ batch(1), feature(2), spatial(1, 5, 2, 1) }}); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::i32, format::bfwzyx, tensor{ batch(1), feature(2), spatial(2, 2, 2, 1) }}); // Updates + auto input1 = engine.allocate_memory({ data_types::i32, format::bfwzyx, tensor{ batch(1), feature(2), spatial(1, 5, 2, 1) }}); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::i32, format::bfwzyx, tensor{ batch(1), feature(2), spatial(2, 2, 2, 1) }}); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_y; set_values(input1, { @@ -1009,23 +1004,23 @@ TEST(scatter_update_gpu_int32, d121251_bfwzyx_axisB) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); - + auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 40, 30, 20, 3, 50, @@ -1052,7 +1047,7 @@ TEST(scatter_update_gpu_fp32, d21511_bfzyx_axisX) { // 0.f, 1.f // // Updates: - // 10.f, 20.f, + // 10.f, 20.f, // 30.f, 40.f, // 50.f, 60.f, // 70.f, 80.f @@ -1066,11 +1061,11 @@ TEST(scatter_update_gpu_fp32, d21511_bfzyx_axisX) { // 70.f, 80.f, 7.f, 50.f, 60.f // - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 1, 1, 1, 5 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indices - auto input3 = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 1, 1, 2, 2 } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 1, 1, 1, 5 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indices + auto input3 = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 1, 1, 2, 2 } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_z; set_values(input1, { @@ -1084,31 +1079,31 @@ TEST(scatter_update_gpu_fp32, d21511_bfzyx_axisX) { }); set_values(input3, { - 10.f, 20.f, + 10.f, 20.f, 30.f, 40.f, 50.f, 60.f, 70.f, 80.f }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - + network network(engine, topology); - - + + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 30.f, 40.f, 2.f, 10.f, 20.f, @@ -1133,16 +1128,16 @@ TEST(scatter_update_gpu_fp32, d1252_axisY_bfwzyx) { // 3.f, 4.f // // Updates: - // 20.f, 30.f, + // 20.f, 30.f, // 40.f, 50.f // - // 60.f, 70.f, + // 60.f, 70.f, // 80.f, 90.f // - // 100.f, 110.f, + // 100.f, 110.f, // 120.f, 130.f // - // 140.f, 150.f, + // 140.f, 150.f, // 160.f, 170.f // // Dictionary: @@ -1153,11 +1148,11 @@ TEST(scatter_update_gpu_fp32, d1252_axisY_bfwzyx) { // 40.f, 50.f, 2.f, 3.f, 20.f, 30.f, 60.f, 70.f, 80.f, 90.f, // 120.f, 130.f, 12.f, 13.f, 100.f, 110.f, 140.f, 150.f, 160.f, 170.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 5 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 2 } }); // Indices - auto input3 = memory::allocate(engine, { data_types::f32, format::bfwzyx, tensor{ batch(1), feature(2), spatial(2, 2, 1, 2) } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 5 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 2 } }); // Indices + auto input3 = engine.allocate_memory({ data_types::f32, format::bfwzyx, tensor{ batch(1), feature(2), spatial(2, 2, 1, 2) } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_y; set_values(input1, { @@ -1171,43 +1166,43 @@ TEST(scatter_update_gpu_fp32, d1252_axisY_bfwzyx) { }); set_values(input3, { - 20.f, 30.f, + 20.f, 30.f, 40.f, 50.f, - 60.f, 70.f, + 60.f, 70.f, 80.f, 90.f, - - 100.f, 110.f, + + 100.f, 110.f, 120.f, 130.f, - - 140.f, 150.f, + + 140.f, 150.f, 160.f, 170.f }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 40.f, 50.f, 2.f, 3.f, 20.f, 30.f, 60.f, 70.f, 80.f, 90.f, 120.f, 130.f, 12.f, 13.f, 100.f, 110.f, 140.f, 150.f, 160.f, 170.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], output_ptr[i]); } @@ -1240,11 +1235,11 @@ TEST(scatter_update_gpu_int32, d2115_axisX_bfwzyx) { // 0, 30, 20, 50, 40, // 5, 70, 60, 90, 80 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 1, 5, 1 }}); // Dictionary - auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::i32, format::bfwzyx, tensor{ batch(2), feature(1), spatial(1, 2, 2, 1) }}); // Updates + auto input1 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 5, 1 }}); // Dictionary + auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::i32, format::bfwzyx, tensor{ batch(2), feature(1), spatial(1, 2, 2, 1) }}); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_x; set_values(input1, { @@ -1265,29 +1260,29 @@ TEST(scatter_update_gpu_int32, d2115_axisX_bfwzyx) { }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0, 30, 20, 50, 40, 5, 70, 60, 90, 80 }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], output_ptr[i]); } @@ -1322,11 +1317,11 @@ TEST(scatter_update_gpu_fp16, d21214_bfzyx_axisX_bfwzyx) { // 8.f, 100.f, 90.f, 80.f, // 12.f, 130.f, 120.f, 110.f - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfzyx, { 2, 1, 4, 1, 2 } }); // Dictionary - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); // Indexes - auto input3 = memory::allocate(engine, { data_types::f16, format::bfwzyx, tensor{ batch(2), feature(1), spatial(3, 1, 1, 2) } }); // Updates + auto input1 = engine.allocate_memory({ data_types::f16, format::bfzyx, { 2, 1, 4, 1, 2 } }); // Dictionary + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); // Indexes + auto input3 = engine.allocate_memory({ data_types::f16, format::bfwzyx, tensor{ batch(2), feature(1), spatial(3, 1, 1, 2) } }); // Updates auto axis = cldnn::scatter_update::scatter_update_axis::along_x; set_values(input1, { @@ -1341,30 +1336,30 @@ TEST(scatter_update_gpu_fp16, d21214_bfzyx_axisX_bfwzyx) { }); set_values(input3, { - FLOAT16(20.0f), FLOAT16(30.0f), FLOAT16(40.0f), + FLOAT16(20.0f), FLOAT16(30.0f), FLOAT16(40.0f), FLOAT16(50.0f), FLOAT16(60.0f), FLOAT16(70.0f), - FLOAT16(80.0f), FLOAT16(90.0f), FLOAT16(100.0f), + FLOAT16(80.0f), FLOAT16(90.0f), FLOAT16(100.0f), FLOAT16(110.0f), FLOAT16(120.0f), FLOAT16(130.0f) }); topology topology; - topology.add(input_layout("InputDictionary", input1.get_layout())); - topology.add(input_layout("InputText", input2.get_layout())); - topology.add(input_layout("InputUpdates", input3.get_layout())); + topology.add(input_layout("InputDictionary", input1->get_layout())); + topology.add(input_layout("InputText", input2->get_layout())); + topology.add(input_layout("InputUpdates", input3->get_layout())); topology.add( scatter_update("scatter_update", "InputDictionary", "InputText", "InputUpdates", axis) ); - - network network(engine, topology); - + + network network(engine, topology); + network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); network.set_input_data("InputUpdates", input3); - + auto outputs = network.execute(); auto output = outputs.at("scatter_update").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 40.f, 30.f, 20.f, @@ -1372,7 +1367,7 @@ TEST(scatter_update_gpu_fp16, d21214_bfzyx_axisX_bfwzyx) { 8.f, 100.f, 90.f, 80.f, 12.f, 130.f, 120.f, 110.f }; - + for (size_t i = 0; i < expected_results.size(); ++i) { EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i])); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp index fff6997834b..c4e698f8210 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp @@ -3,30 +3,27 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/select.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" + +#include "test_utils.h" + +#include +#include "cldnn/primitives/select.hpp" using namespace cldnn; -using namespace tests; +using namespace ::tests; // select_gpu_f32 TEST(select_gpu_f32, select_basic) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -62,7 +59,7 @@ TEST(select_gpu_f32, select_basic) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -71,16 +68,16 @@ TEST(select_gpu_f32, select_basic) { } TEST(select_gpu_f32, select_basic_negative) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -116,7 +113,7 @@ TEST(select_gpu_f32, select_basic_negative) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -125,16 +122,16 @@ TEST(select_gpu_f32, select_basic_negative) { } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x2x1x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 1, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -202,7 +199,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x2x1x2) { -0.5f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -211,16 +208,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x2x1x2) { } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_1x1x1x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -278,7 +275,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_1x1x1x1) { -0.5f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -287,16 +284,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_1x1x1x1) { } TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x2x2x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf ,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 1 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf ,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 1 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -358,7 +355,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x2x2x1) { -0.5f, 8.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -367,16 +364,16 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x2x2x1) { } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_2x2x1x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -444,7 +441,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_2x2x1x2) { 8.f, -0.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -453,16 +450,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_2x2x1x2) { } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x2x1_bcast_in2_2x2x1x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 1 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -526,7 +523,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x2x1_bcast_in2_2x2x1 4.f, -0.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -535,16 +532,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x2x1_bcast_in2_2x2x1 } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x1x2x2_in1_1x2x2x2_in2_2x2x1x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -600,7 +597,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x1x2x2_in1_1x2x2x2_in -0.5f, 5.2f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -609,16 +606,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x1x2x2_in1_1x2x2x2_in } TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x2x1_in2_2x2x1x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 1 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf ,{ 2, 2, 1, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 1 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf ,{ 2, 2, 1, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -644,7 +641,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x 0.f, 0.1f, - 0.5f, + 0.5f, -0.f, -0.5f, @@ -676,7 +673,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x -2.f, 6.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -685,16 +682,16 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_1x1x1x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -752,7 +749,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_1x1x1x1) { 8.f, 1.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -761,16 +758,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_1x1x1x1) { } TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in2_2x2x2x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf ,{ 2, 2, 2, 1 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf ,{ 2, 2, 2, 1 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -800,7 +797,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in2_2x2x2x1) { 0.5f, 0.7f, 0.f, 0.f, - 0.f, 0.f, + 0.f, 0.f, -0.f, -0.1f, -0.f, -0.5f, @@ -832,7 +829,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in2_2x2x2x1) { 8.f, 10.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -841,16 +838,16 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in2_2x2x2x1) { } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x1x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -918,7 +915,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x1x2) { 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -927,16 +924,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x1x2) { } TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_1x1x1x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -994,7 +991,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_1x1x1x1) { 1.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1003,16 +1000,16 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_1x1x1x1) { } TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in1_2x2x2x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 1 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf ,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 1 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf ,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -1042,7 +1039,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in1_2x2x2x1) { 0.f, 0.f, 0.1f, 0.3f, - 0.5f, 0.7f, + 0.5f, 0.7f, -0.f, -0.1f, -0.f, -0.5f, @@ -1074,7 +1071,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in1_2x2x2x1) { 7.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1083,16 +1080,16 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in1_2x2x2x1) { } TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x2x1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 2, 2, 1 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf ,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf, { 2, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 2, 2, 1 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf ,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf, { 2, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input1", input1.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input1", input1->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input1", "input2")); set_values(input1, { @@ -1122,7 +1119,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x 0.f, 0.1f, - 0.5f, + 0.5f, -0.f, -0.5f, @@ -1154,7 +1151,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x -0.5f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1163,16 +1160,16 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x } TEST(select_gpu_f32, select_basic_comma) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1208,7 +1205,7 @@ TEST(select_gpu_f32, select_basic_comma) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1217,79 +1214,79 @@ TEST(select_gpu_f32, select_basic_comma) { } TEST(select_gpu_f32, select_basic_error_input_sizes) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 5, 6 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 3, 4, 5, 6 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); EXPECT_ANY_THROW(network(engine, topology)); } TEST(select_gpu_f32, select_basic_error_mask_sizes) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 5, 6 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 3, 4, 5, 6 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); EXPECT_ANY_THROW(network(engine, topology)); } TEST(select_gpu_f32, select_basic_error_input_types) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); EXPECT_ANY_THROW(network(engine, topology)); } TEST(select_gpu_f32, select_basic_error_input_formats) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); EXPECT_ANY_THROW(network(engine, topology)); } TEST(select_gpu_f32, select_basic_byxf) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::byxf,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1325,7 +1322,7 @@ TEST(select_gpu_f32, select_basic_byxf) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1334,16 +1331,16 @@ TEST(select_gpu_f32, select_basic_byxf) { } TEST(select_gpu_f32, select_basic_mask_f16) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f16, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1379,7 +1376,7 @@ TEST(select_gpu_f32, select_basic_mask_f16) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1388,16 +1385,16 @@ TEST(select_gpu_f32, select_basic_mask_f16) { } TEST(select_gpu_f32, select_basic_mask_i8) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::i8, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1433,7 +1430,7 @@ TEST(select_gpu_f32, select_basic_mask_i8) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1442,16 +1439,16 @@ TEST(select_gpu_f32, select_basic_mask_i8) { } TEST(select_gpu_f32, select_basic_mask_u8) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::u8, format::yxfb,{ 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1487,7 +1484,7 @@ TEST(select_gpu_f32, select_basic_mask_u8) { 15.f, 0.5f, 8.f, 12.f, 4.f, 6.5f, 8.f, -2.5f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 16; i++) { @@ -1496,16 +1493,16 @@ TEST(select_gpu_f32, select_basic_mask_u8) { } TEST(select_gpu_f32, select_basic_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1529,11 +1526,11 @@ TEST(select_gpu_f32, select_basic_1x1x2x2) { auto output = outputs.at("select").get_memory(); - float answers[4] = { + float answers[4] = { 0.5f, 2.5f, 2.f, 0.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1542,16 +1539,16 @@ TEST(select_gpu_f32, select_basic_1x1x2x2) { } TEST(select_gpu_f32, select_basic_bfyx_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1583,7 +1580,7 @@ TEST(select_gpu_f32, select_basic_bfyx_1x1x2x2) { 2.f, 0.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1592,16 +1589,16 @@ TEST(select_gpu_f32, select_basic_bfyx_1x1x2x2) { } TEST(select_gpu_f32, select_basic_byxf_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1633,7 +1630,7 @@ TEST(select_gpu_f32, select_basic_byxf_1x1x2x2) { 2.f, 0.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1643,16 +1640,16 @@ TEST(select_gpu_f32, select_basic_byxf_1x1x2x2) { // select_gpu_f16 TEST(select_gpu_f16, select_basic_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1684,7 +1681,7 @@ TEST(select_gpu_f16, select_basic_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1693,16 +1690,16 @@ TEST(select_gpu_f16, select_basic_1x1x2x2) { } TEST(select_gpu_f16, select_basic_mask_f32_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1734,7 +1731,7 @@ TEST(select_gpu_f16, select_basic_mask_f32_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1743,16 +1740,16 @@ TEST(select_gpu_f16, select_basic_mask_f32_1x1x2x2) { } TEST(select_gpu_f16, select_basic_mask_i8_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1784,7 +1781,7 @@ TEST(select_gpu_f16, select_basic_mask_i8_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1793,16 +1790,16 @@ TEST(select_gpu_f16, select_basic_mask_i8_1x1x2x2) { } TEST(select_gpu_f16, select_basic_mask_u8_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1834,7 +1831,7 @@ TEST(select_gpu_f16, select_basic_mask_u8_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1844,16 +1841,16 @@ TEST(select_gpu_f16, select_basic_mask_u8_1x1x2x2) { // select_gpu_i8 TEST(select_gpu_i8, select_basic_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1885,7 +1882,7 @@ TEST(select_gpu_i8, select_basic_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1894,16 +1891,16 @@ TEST(select_gpu_i8, select_basic_1x1x2x2) { } TEST(select_gpu_i8, select_basic_mask_f32_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1935,7 +1932,7 @@ TEST(select_gpu_i8, select_basic_mask_f32_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1944,16 +1941,16 @@ TEST(select_gpu_i8, select_basic_mask_f32_1x1x2x2) { } TEST(select_gpu_i8, select_basic_mask_f16_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -1985,7 +1982,7 @@ TEST(select_gpu_i8, select_basic_mask_f16_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -1994,16 +1991,16 @@ TEST(select_gpu_i8, select_basic_mask_f16_1x1x2x2) { } TEST(select_gpu_i8, select_basic_mask_u8_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -2035,7 +2032,7 @@ TEST(select_gpu_i8, select_basic_mask_u8_1x1x2x2) { 2, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -2045,16 +2042,16 @@ TEST(select_gpu_i8, select_basic_mask_u8_1x1x2x2) { // select_gpu_u8 TEST(select_gpu_u8, select_basic_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -2086,7 +2083,7 @@ TEST(select_gpu_u8, select_basic_1x1x2x2) { 255, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -2095,16 +2092,16 @@ TEST(select_gpu_u8, select_basic_1x1x2x2) { } TEST(select_gpu_u8, select_basic_mask_f32_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -2136,7 +2133,7 @@ TEST(select_gpu_u8, select_basic_mask_f32_1x1x2x2) { 255, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -2145,16 +2142,16 @@ TEST(select_gpu_u8, select_basic_mask_f32_1x1x2x2) { } TEST(select_gpu_u8, select_basic_mask_f16_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -2186,7 +2183,7 @@ TEST(select_gpu_u8, select_basic_mask_f16_1x1x2x2) { 255, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { @@ -2195,16 +2192,16 @@ TEST(select_gpu_u8, select_basic_mask_f16_1x1x2x2) { } TEST(select_gpu_u8, select_basic_mask_i8_1x1x2x2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); - auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto input2 = engine.allocate_memory({ data_types::u8, format::yxfb,{ 1, 1, 2, 2 } }); + auto mask = engine.allocate_memory({ data_types::i8, format::yxfb,{ 1, 1, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("input2", input2.get_layout())); - topology.add(input_layout("mask", mask.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("input2", input2->get_layout())); + topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", "mask", "input", "input2")); set_values(input, { @@ -2236,7 +2233,7 @@ TEST(select_gpu_u8, select_basic_mask_i8_1x1x2x2) { 255, 0 }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (int i = 0; i < 4; i++) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp index a1bbf198771..55a5e8fa897 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp @@ -3,24 +3,21 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include +#include "test_utils.h" + +#include +#include #include -#include using namespace cldnn; using namespace ::tests; TEST(shuffle_channels_fp32_gpu, d1_15_2_2_ax1_g5) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 15, 2, 2 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 15, 2, 2 } }); int32_t axis = 1; int32_t group = 5; @@ -34,7 +31,7 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_ax1_g5) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -46,7 +43,7 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_ax1_g5) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f, @@ -60,9 +57,9 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_ax1_g5) { } TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 15, 2, 2 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 15, 2, 2 } }); int32_t axis = -3; int32_t group = 5; @@ -76,7 +73,7 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -88,7 +85,7 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f, @@ -102,9 +99,9 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) { } TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 15, 2, 1, 2 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 15, 2, 1, 2 } }); int32_t axis = 0; int32_t group = 5; @@ -118,7 +115,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -130,7 +127,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f, @@ -144,9 +141,9 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) { } TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 15, 2, 1, 2 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 15, 2, 1, 2 } }); int32_t axis = -4; int32_t group = 5; @@ -160,7 +157,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -172,7 +169,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f, @@ -186,9 +183,9 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) { } TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 6 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 6 } }); int32_t axis = -2; int32_t group = 3; @@ -199,7 +196,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -211,7 +208,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, @@ -224,9 +221,9 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) { } TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 6, 1, 2 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 6, 1, 2 } }); int32_t axis = -3; int32_t group = 3; @@ -237,7 +234,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -249,7 +246,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, @@ -262,9 +259,9 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) { } TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 6 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 6 } }); int32_t axis = -2; int32_t group = 2; @@ -275,7 +272,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -287,7 +284,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, @@ -300,9 +297,9 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) { } TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 6, 1, 2 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 6, 1, 2 } }); int32_t axis = -3; int32_t group = 2; @@ -313,7 +310,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -325,7 +322,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, @@ -338,9 +335,9 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) { } TEST(shuffle_channels_fp32_gpu, d6_axm0_g2) { - engine engine; + auto& engine = get_test_engine(); - auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 6, 1, 1, 1 } }); + auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx, { 6, 1, 1, 1 } }); int32_t axis = 0; int32_t group = 2; @@ -349,7 +346,7 @@ TEST(shuffle_channels_fp32_gpu, d6_axm0_g2) { }); topology topology; - topology.add(input_layout("Input0", input0.get_layout())); + topology.add(input_layout("Input0", input0->get_layout())); topology.add( shuffle_channels("shuffle_channels", "Input0", group, axis) ); @@ -361,7 +358,7 @@ TEST(shuffle_channels_fp32_gpu, d6_axm0_g2) { auto outputs = network.execute(); auto output = outputs.at("shuffle_channels").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp index 803ca0b3ae9..2ecc932b41e 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp @@ -2,18 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "api/memory.hpp" -#include -#include "api/softmax.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include using namespace cldnn; using namespace std; -using namespace tests; +using namespace ::tests; class softmax_gpu_xb_f32_test_fixture: public ::testing::Test { public: @@ -27,14 +23,14 @@ public: float out_buffer[out_size]; float expected_buffer[out_size]; - const cldnn::engine& engine; - cldnn::memory input; + cldnn::engine& engine; + cldnn::memory::ptr input; //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}}); softmax_gpu_xb_f32_test_fixture() : engine(get_test_engine()) - ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}})) + , input(engine.allocate_memory({ data_types::f32, format::yxfb, { input_b, 1, input_x, 1}})) {} void compare_out_buffer_with_expected() { @@ -72,7 +68,7 @@ TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) { set_values(input, in_b); - network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input"))); + network network(engine, topology(input_layout("input", input->get_layout()), softmax("softmax", "input"))); network.set_input_data("input", input); auto outputs = network.execute(); @@ -81,10 +77,10 @@ TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (uint32_t i = 0; i < out_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } compare_out_buffer_with_expected(); } @@ -102,7 +98,7 @@ TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) { for(size_t i = 0; i < out_size; ++i) expected_buffer[i] = 0.1f; - network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input"))); + network network(engine, topology(input_layout("input", input->get_layout()), softmax("softmax", "input"))); network.set_input_data("input", input); auto outputs = network.execute(); @@ -111,10 +107,10 @@ TEST_F(softmax_gpu_xb_f32_test_fixture, input_same_values_batch_wise) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (uint32_t i = 0; i < out_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } compare_out_buffer_with_expected_batch_wise(); } @@ -157,7 +153,7 @@ TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) { for(size_t i = 0; i < out_size; ++i) out_buffer[i] = NAN; - network network(engine, topology(input_layout("input", input.get_layout()), softmax("softmax", "input"))); + network network(engine, topology(input_layout("input", input->get_layout()), softmax("softmax", "input"))); network.set_input_data("input", input); auto outputs = network.execute(); @@ -166,10 +162,10 @@ TEST_F(softmax_gpu_xb_f32_test_fixture, values_batch_wise) { auto output_prim = outputs.begin()->second.get_memory(); - auto output_ptr = output_prim.pointer(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); for (uint32_t i = 0; i < out_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } compare_out_buffer_with_expected_batch_wise(); } @@ -178,11 +174,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_fyx) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input")); set_values(input, { //bfyx @@ -208,11 +204,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_fyx) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[buf_size]; for (uint32_t i = 0; i < buf_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } float sum = 0; @@ -251,11 +247,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_y) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_y)); vector input_vec = { @@ -299,11 +295,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_y) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[buf_size]; for (uint32_t i = 0; i < buf_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } float temp_max = 0; @@ -345,11 +341,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_f) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_f)); vector input_vec = { @@ -387,11 +383,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_f) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[buf_size]; for (uint32_t i = 0; i < buf_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } float temp_max = 0; @@ -433,11 +429,11 @@ TEST(softmax_gpu_yxfb_f32, normalize_f) { static const int32_t x_size = 1, y_size = 2, feature_num = 1, batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_fyx)); set_values(input, { //yxfb @@ -470,11 +466,11 @@ TEST(softmax_gpu_yxfb_f32, normalize_f) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[buf_size]; for (uint32_t i = 0; i < buf_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } float expected_sum = 1.0f; @@ -510,11 +506,11 @@ TEST(softmax_gpu_bfzyx_f32, normalize_z) { // Input : 2x3x2x2x2 static const int32_t x_size = 2, y_size = 2, z_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size *y_size * z_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ batch_num, feature_num, x_size , y_size, z_size } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ batch_num, feature_num, x_size , y_size, z_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_z)); vector input_vec = { @@ -553,11 +549,11 @@ TEST(softmax_gpu_bfzyx_f32, normalize_z) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float out_buffer[buf_size]; for (uint32_t i = 0; i < buf_size; i++) { - out_buffer[i] = get_value(output_ptr, i); + out_buffer[i] = output_ptr[i]; } float temp_max = 0; @@ -602,11 +598,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_all) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {batch_num, feature_num, x_size, y_size}}); + auto input = engine.allocate_memory({data_types::f32, format::bfyx, {batch_num, feature_num, x_size, y_size}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_all)); set_values(input, {//bfyx @@ -627,11 +623,11 @@ TEST(softmax_gpu_bfyx_f32, normalize_all) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float sum = 0.0f; float expected_sum = 1.0f; for (uint32_t i = 0; i < buf_size; i++) { - sum += get_value(output_ptr, i); + sum += output_ptr[i]; } EXPECT_EQ(true, are_equal(sum, expected_sum)); } @@ -640,11 +636,11 @@ TEST(softmax_gpu_yxfb_f32, normalize_all) { // Input : 2x2x3x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {y_size, x_size, feature_num, batch_num}}); + auto input = engine.allocate_memory({data_types::f32, format::yxfb, {y_size, x_size, feature_num, batch_num}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_all)); set_values(input, {//yxfb @@ -665,11 +661,11 @@ TEST(softmax_gpu_yxfb_f32, normalize_all) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float sum = 0.0f; float expected_sum = 1.0f; for (uint32_t i = 0; i < buf_size; i++) { - sum += get_value(output_ptr, i); + sum += output_ptr[i]; } EXPECT_EQ(true, are_equal(sum, expected_sum)); } @@ -678,11 +674,11 @@ TEST(softmax_gpu_bfzyx_f32, normalize_all) { // Input : 2x3x2x2x2 static const int32_t x_size = 2, y_size = 2, z_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size * y_size * z_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f32, format::bfzyx, {batch_num, feature_num, x_size, y_size, z_size}}); + auto input = engine.allocate_memory({data_types::f32, format::bfzyx, {batch_num, feature_num, x_size, y_size, z_size}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_all)); set_values(input, {// z0y0x0 z0y0x1 z0y1x0 z0y1x1 z1y0x0 z1y0x1 z1y1x0 z1y1x1 @@ -702,11 +698,11 @@ TEST(softmax_gpu_bfzyx_f32, normalize_all) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float sum = 0.0f; float expected_sum = 1.0f; for (uint32_t i = 0; i < buf_size; i++) { - sum += get_value(output_ptr, i); + sum += output_ptr[i]; } EXPECT_EQ(true, are_equal(sum, expected_sum)); } @@ -715,11 +711,11 @@ TEST(softmax_gpu_bfyx_f16, normalize_all) { // Input : 2x3x2x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {batch_num, feature_num, x_size, y_size}}); + auto input = engine.allocate_memory({data_types::f16, format::bfyx, {batch_num, feature_num, x_size, y_size}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_all)); set_values(input, {//bfyx @@ -740,11 +736,11 @@ TEST(softmax_gpu_bfyx_f16, normalize_all) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float sum = 0.0f; float expected_sum = 1.0f; for (uint32_t i = 0; i < buf_size; i++) { - sum += float16_to_float32(get_value(output_ptr, i)); + sum += float16_to_float32(output_ptr[i]); } ASSERT_NEAR(sum, expected_sum, 0.001); } @@ -753,11 +749,11 @@ TEST(softmax_gpu_yxfb_f16, normalize_all) { // Input : 2x2x3x2 static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size * y_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::yxfb, {y_size, x_size, feature_num, batch_num}}); + auto input = engine.allocate_memory({data_types::f16, format::yxfb, {y_size, x_size, feature_num, batch_num}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_all)); set_values(input, {//yxfb @@ -778,11 +774,11 @@ TEST(softmax_gpu_yxfb_f16, normalize_all) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float sum = 0.0f; float expected_sum = 1.0f; for (uint32_t i = 0; i < buf_size; i++) { - sum += float16_to_float32(get_value(output_ptr, i)); + sum += float16_to_float32(output_ptr[i]); } ASSERT_NEAR(sum, expected_sum, 0.001); } @@ -791,11 +787,11 @@ TEST(softmax_gpu_bfzyx_f16, normalize_all) { // Input : 2x3x2x2x2 static const int32_t x_size = 2, y_size = 2, z_size = 2, feature_num = 3, batch_num = 2, buf_size = x_size * y_size * z_size * batch_num * feature_num; - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, {data_types::f16, format::bfzyx, {batch_num, feature_num, x_size, y_size, z_size}}); + auto input = engine.allocate_memory({data_types::f16, format::bfzyx, {batch_num, feature_num, x_size, y_size, z_size}}); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", "input", softmax::normalize_all)); set_values(input, {// z0y0x0 z0y0x1 z0y1x0 z0y1x1 z1y0x0 z1y0x1 z1y1x0 z1y1x1 @@ -815,11 +811,11 @@ TEST(softmax_gpu_bfzyx_f16, normalize_all) { EXPECT_EQ(outputs.begin()->first, "softmax"); auto output = outputs.at("softmax").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); float sum = 0.0f; float expected_sum = 1.0f; for (uint32_t i = 0; i < buf_size; i++) { - sum += float16_to_float32(get_value(output_ptr, i)); + sum += float16_to_float32(output_ptr[i]); } ASSERT_NEAR(sum, expected_sum, 0.001); } @@ -884,28 +880,26 @@ public: } template - memory generate_reference_typed(const std::vector & inputs) + memory::ptr generate_reference_typed(const std::vector & inputs) { assert(inputs.size() == 1); - const memory & input = inputs[0]; + const memory::ptr input = inputs[0]; //Output is bfyx - auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, input.get_layout().format, input.get_layout().size)); + auto output = engine.allocate_memory(cldnn::layout(input->get_layout().data_type, input->get_layout().format, input->get_layout().size)); -// const auto params = static_cast(layer_parmas); + cldnn::mem_lock in0_mem(input, get_test_stream()); + cldnn::mem_lock out_mem(output, get_test_stream()); - const auto in0_mem = input.pointer(); - auto out_mem = output.pointer(); + const int in0_b = input->get_layout().size.sizes()[0]; + const int in0_f = input->get_layout().size.sizes()[1]; + const int in0_h = input->get_layout().size.sizes()[3]; + const int in0_w = input->get_layout().size.sizes()[2]; - const int in0_b = input.get_layout().size.sizes()[0]; - const int in0_f = input.get_layout().size.sizes()[1]; - const int in0_h = input.get_layout().size.sizes()[3]; - const int in0_w = input.get_layout().size.sizes()[2]; - -// const int out_b = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0]; -// const int out_f = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1]; -// const int out_h = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2]; -// const int out_w = output.get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3]; +// const int out_b = output->get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[0]; +// const int out_f = output->get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[1]; +// const int out_h = output->get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[2]; +// const int out_w = output->get_layout().size.transform(cldnn::format::bfyx, 0).sizes()[3]; // assert(in0_b == out_b); // assert(in0_f == out_f); @@ -915,7 +909,7 @@ public: std::vector cached_exp_vals; cached_exp_vals.resize(in0_f); - const auto input_desc = get_linear_memory_desc(input.get_layout()); + const auto input_desc = get_linear_memory_desc(input->get_layout()); for (int n = 0; n < in0_b; ++n) for (int y = 0; y < in0_h; ++y) @@ -925,7 +919,7 @@ public: for (int c = 0; c < in0_f; ++c) { - const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc); + const size_t in0_idx = get_linear_index(input->get_layout(), n, c, y, x, input_desc); max_val = std::max(max_val, static_cast(in0_mem[in0_idx])); } @@ -934,7 +928,7 @@ public: for (int c = 0; c < in0_f; ++c) { - const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc); + const size_t in0_idx = get_linear_index(input->get_layout(), n, c, y, x, input_desc); float tmp = static_cast((Type)std::exp(static_cast(in0_mem[in0_idx]) - max_val)); Z += tmp; @@ -943,7 +937,7 @@ public: for (int c = 0; c < in0_f; ++c) { - const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, input_desc); + const size_t out_idx = get_linear_index(output->get_layout(), n, c, y, x, input_desc); out_mem[out_idx] = (Type)(cached_exp_vals[c] / Z); } } @@ -951,7 +945,7 @@ public: return output; } - virtual memory generate_reference(const std::vector & inputs) override + virtual memory::ptr generate_reference(const std::vector & inputs) override { if (generic_params->data_type == data_types::f32) { @@ -1008,4 +1002,3 @@ INSTANTIATE_TEST_CASE_P(DISABLED_SOFTMAX, softmax_test, ::testing::Combine(::testing::ValuesIn(softmax_test::generate_generic_test_params()), ::testing::ValuesIn(softmax_test::generate_specific_test_params())), softmax_test::custom_param_name); - diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_batch_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_batch_gpu_test.cpp index 9dec5cf8c73..cb46b013207 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_batch_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_batch_gpu_test.cpp @@ -3,17 +3,14 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include -#include +#include "test_utils.h" + +#include +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -26,8 +23,8 @@ TEST(space_to_batch_fp16_gpu, i1222_bs1222_pb0000_pe0000) { // Output : 8x1x1x1 // Input values in fp16 - engine engine; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,2,2,2} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, {1,2,2,2} }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), @@ -37,7 +34,7 @@ TEST(space_to_batch_fp16_gpu, i1222_bs1222_pb0000_pe0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -49,7 +46,7 @@ TEST(space_to_batch_fp16_gpu, i1222_bs1222_pb0000_pe0000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f @@ -70,8 +67,8 @@ TEST(space_to_batch_fp16_gpu, i1242_bs1221_pb0020_pe0000) { // Output : 4x1x3x2 // Input values in fp16 - engine engine; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,2,2,4} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, {1,2,2,4} }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -81,7 +78,7 @@ TEST(space_to_batch_fp16_gpu, i1242_bs1221_pb0020_pe0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,1}, 1), tensor(format::bfyx, {0,0,2,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -93,7 +90,7 @@ TEST(space_to_batch_fp16_gpu, i1242_bs1221_pb0020_pe0000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 1.f, 4.f, 5.f, @@ -117,8 +114,8 @@ TEST(space_to_batch_fp16_gpu, i2132_bs1222_pb0010_pe0100) { // Output : 16x1x2x1 // Input values in fp16 - engine engine; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {2,1,2,3} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, {2,1,2,3} }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -127,7 +124,7 @@ TEST(space_to_batch_fp16_gpu, i2132_bs1222_pb0010_pe0100) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,0}, 0), @@ -139,7 +136,7 @@ TEST(space_to_batch_fp16_gpu, i2132_bs1222_pb0010_pe0100) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 2.f, 0.f, 8.f, 0.f, 3.f, 0.f, 9.f, @@ -163,8 +160,8 @@ TEST(space_to_batch_fp16_gpu, i12132_bs12122_pb00010_pe00000) { // Output : 8x1x1x2x1 // Input values in fp16 - engine engine; - auto input = memory::allocate(engine, { data_types::f16, format::bfzyx, {1,2,2,3,1} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::bfzyx, {1,2,2,3,1} }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -173,7 +170,7 @@ TEST(space_to_batch_fp16_gpu, i12132_bs12122_pb00010_pe00000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfzyx, {1,2,1,2,2}, 1), tensor(format::bfzyx, {0,0,0,1,0}, 0), tensor(format::bfzyx, {0,0,0,0,0}, 0), @@ -185,7 +182,7 @@ TEST(space_to_batch_fp16_gpu, i12132_bs12122_pb00010_pe00000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 2.f, 0.f, 3.f, 0.f, 4.f, 1.f, 5.f, @@ -207,9 +204,9 @@ TEST(space_to_batch_fp16_gpu, i134121_bs142121_pb010100_pe000000) { // Output : 16x1x2x2x1x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{ batch(1), feature(3), spatial(1, 2, 1, 4) }; - auto input = memory::allocate(engine, { data_types::f16, format::bfwzyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f16, format::bfwzyx, input_shape }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -221,7 +218,7 @@ TEST(space_to_batch_fp16_gpu, i134121_bs142121_pb010100_pe000000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfwzyx, {1,4,2,1,2,1}, 1), tensor(format::bfwzyx, {0,1,0,1,0,0}, 0), tensor(format::bfwzyx, {0,0,0,0,0,0}, 0), @@ -233,7 +230,7 @@ TEST(space_to_batch_fp16_gpu, i134121_bs142121_pb010100_pe000000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, @@ -261,8 +258,8 @@ TEST(space_to_batch_fp16_gpu, i11611_bs1222_pb0010_pe0001_b_fs_yx_fsv16) { // Output : 8x8x1x1 // Input values in fp16 - engine engine; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,16,1,1} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, {1,16,1,1} }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -272,7 +269,7 @@ TEST(space_to_batch_fp16_gpu, i11611_bs1222_pb0010_pe0001_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f16)); topology.add(space_to_batch("space_to_batch", "input_fsv", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,1,0}, 0), @@ -286,7 +283,7 @@ TEST(space_to_batch_fp16_gpu, i11611_bs1222_pb0010_pe0001_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("stb_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, @@ -314,8 +311,8 @@ TEST(space_to_batch_fp16_gpu, i1812_bs1221_pb0010_pe0200_b_fs_yx_fsv16) { // Output : 4x5x1x2 // Input values in fp16 - engine engine; - auto input = memory::allocate(engine, { data_types::f16, format::bfyx, {1,8,2,1} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f16, format::bfyx, {1,8,2,1} }); set_values(input, { FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), @@ -325,7 +322,7 @@ TEST(space_to_batch_fp16_gpu, i1812_bs1221_pb0010_pe0200_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f16)); topology.add(space_to_batch("space_to_batch", "input_fsv", tensor(format::bfyx, {1,2,2,1}, 1), tensor(format::bfyx, {0,0,1,0}, 0), @@ -339,7 +336,7 @@ TEST(space_to_batch_fp16_gpu, i1812_bs1221_pb0010_pe0200_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("stb_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, @@ -363,8 +360,8 @@ TEST(space_to_batch_fp32_gpu, i1222_bs1222_pb0000_pe0000) { // Output : 8x1x1x1 // Input values in fp32 - engine engine; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,2,2,2} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -372,7 +369,7 @@ TEST(space_to_batch_fp32_gpu, i1222_bs1222_pb0000_pe0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -384,7 +381,7 @@ TEST(space_to_batch_fp32_gpu, i1222_bs1222_pb0000_pe0000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f @@ -405,8 +402,8 @@ TEST(space_to_batch_fp32_gpu, i1242_bs1221_pb0020_pe0000) { // Output : 4x1x3x2 // Input values in fp32 - engine engine; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,4} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,2,2,4} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -416,7 +413,7 @@ TEST(space_to_batch_fp32_gpu, i1242_bs1221_pb0020_pe0000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,1}, 1), tensor(format::bfyx, {0,0,2,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), @@ -428,7 +425,7 @@ TEST(space_to_batch_fp32_gpu, i1242_bs1221_pb0020_pe0000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 1.f, 4.f, 5.f, @@ -452,8 +449,8 @@ TEST(space_to_batch_fp32_gpu, i2132_bs1222_pb0010_pe0100) { // Output : 16x1x2x1 // Input values in fp32 - engine engine; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2,1,2,3} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {2,1,2,3} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -462,7 +459,7 @@ TEST(space_to_batch_fp32_gpu, i2132_bs1222_pb0010_pe0100) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfyx, {1,2,2,2}, 1), tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,0}, 0), @@ -474,7 +471,7 @@ TEST(space_to_batch_fp32_gpu, i2132_bs1222_pb0010_pe0100) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 2.f, 0.f, 8.f, 0.f, 3.f, 0.f, 9.f, @@ -498,8 +495,8 @@ TEST(space_to_batch_fp32_gpu, i12132_bs12122_pb00010_pe00000) { // Output : 8x1x1x2x1 // Input values in fp32 - engine engine; - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, {1,2,2,3,1} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, {1,2,2,3,1} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -508,7 +505,7 @@ TEST(space_to_batch_fp32_gpu, i12132_bs12122_pb00010_pe00000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfzyx, {1,2,1,2,2}, 1), tensor(format::bfzyx, {0,0,0,1,0}, 0), tensor(format::bfzyx, {0,0,0,0,0}, 0), @@ -520,7 +517,7 @@ TEST(space_to_batch_fp32_gpu, i12132_bs12122_pb00010_pe00000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 2.f, 0.f, 3.f, 0.f, 4.f, 1.f, 5.f, @@ -542,9 +539,9 @@ TEST(space_to_batch_fp32_gpu, i134121_bs142121_pb010100_pe000000) { // Output : 16x1x2x2x1x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); tensor input_shape = tensor{ batch(1), feature(3), spatial(1, 2, 1, 4) }; - auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, input_shape }); + auto input = engine.allocate_memory({ data_types::f32, format::bfwzyx, input_shape }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, @@ -554,7 +551,7 @@ TEST(space_to_batch_fp32_gpu, i134121_bs142121_pb010100_pe000000) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(space_to_batch("space_to_batch", "Input", tensor(format::bfwzyx, {1,4,2,1,2,1}, 1), tensor(format::bfwzyx, {0,1,0,1,0,0}, 0), tensor(format::bfwzyx, {0,0,0,0,0,0}, 0), @@ -566,7 +563,7 @@ TEST(space_to_batch_fp32_gpu, i134121_bs142121_pb010100_pe000000) { auto outputs = network.execute(); auto output = outputs.at("space_to_batch").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, @@ -594,8 +591,8 @@ TEST(space_to_batch_fp32_gpu, i11622_bs1421_pb0000_pe0000_b_fs_yx_fsv16) { // Output : 8x4x1x2 // Input values in fp32 - engine engine; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,16,2,2} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,16,2,2} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, @@ -609,7 +606,7 @@ TEST(space_to_batch_fp32_gpu, i11622_bs1421_pb0000_pe0000_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f32)); topology.add(space_to_batch("space_to_batch", "input_fsv", tensor(format::bfyx, {1,4,2,1}, 1), tensor(format::bfyx, {0,0,0,0}, 0), @@ -623,7 +620,7 @@ TEST(space_to_batch_fp32_gpu, i11622_bs1421_pb0000_pe0000_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("stb_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 16.f, 17.f, 32.f, 33.f, 48.f, 49.f, @@ -651,8 +648,8 @@ TEST(space_to_batch_fp32_gpu, i1623_bs1312_pb0001_pe0000_b_fs_yx_fsv16) { // Output : 6x2x2x2 // Input values in fp32 - engine engine; - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,6,3,2} }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,6,3,2} }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, @@ -663,7 +660,7 @@ TEST(space_to_batch_fp32_gpu, i1623_bs1312_pb0001_pe0000_b_fs_yx_fsv16) { }); topology topology; - topology.add(input_layout("Input", input.get_layout())); + topology.add(input_layout("Input", input->get_layout())); topology.add(reorder("input_fsv", "Input", format::b_fs_yx_fsv16, data_types::f32)); topology.add(space_to_batch("space_to_batch", "input_fsv", tensor(format::bfyx, {1,3,1,2}, 1), tensor(format::bfyx, {0,0,0,1}, 0), @@ -677,7 +674,7 @@ TEST(space_to_batch_fp32_gpu, i1623_bs1312_pb0001_pe0000_b_fs_yx_fsv16) { auto outputs = network.execute(); auto output = outputs.at("stb_to_bfyx").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 0.f, 4.f, 0.f, 19.f, 0.f, 22.f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_depth_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_depth_gpu_test.cpp index 76611b0f60f..6614d4d6b75 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_depth_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/space_to_depth_gpu_test.cpp @@ -3,16 +3,13 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include -#include -#include -#include +#include "test_utils.h" + +#include +#include #include -#include using namespace cldnn; using namespace ::tests; @@ -27,9 +24,9 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mbf) { // Output : 1x4x1x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 2, 2 } }); size_t block_size = 2; set_values(input1, { @@ -38,7 +35,7 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -50,7 +47,7 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f @@ -67,9 +64,9 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mbf) { // Output : 1x4x2x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 2, 4 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 2, 4 } }); size_t block_size = 2; set_values(input1, { @@ -80,7 +77,7 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -92,7 +89,7 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 4.0f, 1.0f, 5.0f, 2.0f, 6.0f, 3.0f, 7.0f @@ -109,9 +106,9 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mbf) { // Output : 1x8x3x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 4, 6 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 2, 4, 6 } }); size_t block_size = 2; set_values(input1, { @@ -128,7 +125,7 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -140,7 +137,7 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 2.0f, 8.0f, 10.0f, 16.0f, 18.0f, @@ -164,9 +161,9 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mbf) { // Output : 1x9x3x3 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 9, 9 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 9, 9 } }); size_t block_size = 3; set_values(input1, { @@ -190,7 +187,7 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -202,7 +199,7 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 6.0f, 27.0f, 30.0f, 33.0f, 54.0f, 57.0f, 60.0f, 1.0f, @@ -227,9 +224,9 @@ TEST(space_to_depth_fp32_gpu, d1122_bs2_mbf) { // Output : 1x4x1x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); size_t block_size = 2; set_values(input1, { @@ -237,7 +234,7 @@ TEST(space_to_depth_fp32_gpu, d1122_bs2_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -249,7 +246,7 @@ TEST(space_to_depth_fp32_gpu, d1122_bs2_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f @@ -266,9 +263,9 @@ TEST(space_to_depth_fp32_gpu, d1142_bs2_mbf) { // Output : 1x4x2x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 4 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 4 } }); size_t block_size = 2; set_values(input1, { @@ -276,7 +273,7 @@ TEST(space_to_depth_fp32_gpu, d1142_bs2_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -288,7 +285,7 @@ TEST(space_to_depth_fp32_gpu, d1142_bs2_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 4.0f, 1.0f, 5.0f, 2.0f, 6.0f, 3.0f, 7.0f @@ -305,9 +302,9 @@ TEST(space_to_depth_fp32_gpu, d1264_bs2_mbf) { // Output : 1x8x3x2 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 6 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 6 } }); size_t block_size = 2; set_values(input1, { @@ -324,7 +321,7 @@ TEST(space_to_depth_fp32_gpu, d1264_bs2_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -336,7 +333,7 @@ TEST(space_to_depth_fp32_gpu, d1264_bs2_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 2.0f, 8.0f, 10.0f, 16.0f, 18.0f, @@ -360,9 +357,9 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mbf) { // Output : 1x9x3x3 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); size_t block_size = 3; set_values(input1, { @@ -378,7 +375,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mbf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::blocks_first, block_size) ); @@ -390,7 +387,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mbf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 6.0f, 27.0f, 30.0f, 33.0f, 54.0f, 57.0f, 60.0f, 1.0f, @@ -419,9 +416,9 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mdf) { // Output : 1x4x1x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 2, 2 } }); size_t block_size = 2; set_values(input1, { @@ -430,7 +427,7 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -442,7 +439,7 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f @@ -459,9 +456,9 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mdf) { // Output : 1x4x2x1 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 2, 4 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 2, 4 } }); size_t block_size = 2; set_values(input1, { @@ -472,7 +469,7 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -484,7 +481,7 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 4.0f, 1.0f, 5.0f, 2.0f, 6.0f, 3.0f, 7.0f @@ -501,9 +498,9 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mdf) { // Output : 1x8x3x2 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 2, 4, 6 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 2, 4, 6 } }); size_t block_size = 2; set_values(input1, { @@ -520,7 +517,7 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -532,7 +529,7 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 2.0f, 8.0f, 10.0f, 16.0f, 18.0f, @@ -556,9 +553,9 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mdf) { // Output : 1x9x3x3 // Input values in fp16 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 1, 9, 9 } }); + auto input1 = engine.allocate_memory({ data_types::f16, format::bfyx, { 1, 1, 9, 9 } }); size_t block_size = 3; set_values(input1, { @@ -582,7 +579,7 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -594,7 +591,7 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 6.0f, 27.0f, 30.0f, 33.0f, 54.0f, 57.0f, 60.0f, 1.0f, @@ -619,9 +616,9 @@ TEST(space_to_depth_fp32_gpu, d1122_bs2_mdf) { // Output : 1x4x1x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); size_t block_size = 2; set_values(input1, { @@ -629,7 +626,7 @@ TEST(space_to_depth_fp32_gpu, d1122_bs2_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -641,7 +638,7 @@ TEST(space_to_depth_fp32_gpu, d1122_bs2_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.f, 1.f, 2.f, 3.f @@ -658,9 +655,9 @@ TEST(space_to_depth_fp32_gpu, d1142_bs2_mdf) { // Output : 1x4x2x1 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 4 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 4 } }); size_t block_size = 2; set_values(input1, { @@ -668,7 +665,7 @@ TEST(space_to_depth_fp32_gpu, d1142_bs2_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -680,7 +677,7 @@ TEST(space_to_depth_fp32_gpu, d1142_bs2_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 4.0f, 1.0f, 5.0f, 2.0f, 6.0f, 3.0f, 7.0f @@ -697,9 +694,9 @@ TEST(space_to_depth_fp32_gpu, d1264_bs2_mdf) { // Output : 1x8x3x2 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 6 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 6 } }); size_t block_size = 2; set_values(input1, { @@ -716,7 +713,7 @@ TEST(space_to_depth_fp32_gpu, d1264_bs2_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -728,7 +725,7 @@ TEST(space_to_depth_fp32_gpu, d1264_bs2_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 2.0f, 8.0f, 10.0f, 16.0f, 18.0f, @@ -752,9 +749,9 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf) { // Output : 1x9x3x3 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); size_t block_size = 3; set_values(input1, { @@ -770,7 +767,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add( space_to_depth("space_to_depth", "Input0", space_to_depth::depth_first, block_size) ); @@ -782,7 +779,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf) { auto outputs = network.execute(); auto output = outputs.at("space_to_depth").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 6.0f, 27.0f, 30.0f, 33.0f, 54.0f, 57.0f, 60.0f, 1.0f, @@ -807,9 +804,9 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf_fsv16) { // Output : 1x9x3x3 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); size_t block_size = 3; set_values(input1, { @@ -825,7 +822,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf_fsv16) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add(reorder("reorder", "Input0", format::b_fs_yx_fsv16, data_types::f32)); topology.add(space_to_depth("space_to_depth", "reorder", space_to_depth::depth_first, block_size)); topology.add(reorder("reorder_out", "space_to_depth", format::bfyx, data_types::f32)); @@ -837,7 +834,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf_fsv16) { auto outputs = network.execute(); auto output = outputs.at("reorder_out").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 6.0f, 27.0f, 30.0f, 33.0f, 54.0f, 57.0f, 60.0f, 1.0f, @@ -862,9 +859,9 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf_fsv4) { // Output : 1x9x3x3 // Input values in fp32 - engine engine; + auto& engine = get_test_engine(); - auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); + auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 9, 9 } }); size_t block_size = 3; set_values(input1, { @@ -880,7 +877,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf_fsv4) { }); topology topology; - topology.add(input_layout("Input0", input1.get_layout())); + topology.add(input_layout("Input0", input1->get_layout())); topology.add(reorder("reorder", "Input0", format::b_fs_yx_fsv4, data_types::f32)); topology.add(space_to_depth("space_to_depth", "reorder", space_to_depth::depth_first, block_size)); topology.add(reorder("reorder_out", "space_to_depth", format::bfyx, data_types::f32)); @@ -892,7 +889,7 @@ TEST(space_to_depth_fp32_gpu, d1199_bs3_mdf_fsv4) { auto outputs = network.execute(); auto output = outputs.at("reorder_out").get_memory(); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); std::vector expected_results = { 0.0f, 3.0f, 6.0f, 27.0f, 30.0f, 33.0f, 54.0f, 57.0f, 60.0f, 1.0f, diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/spatial_concatenate_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/spatial_concatenate_gpu_test.cpp index 27dcab87f8b..286588efefa 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/spatial_concatenate_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/spatial_concatenate_gpu_test.cpp @@ -4,23 +4,19 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/concatenation.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include "cldnn/primitives/concatenation.hpp" using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(spatial_concatenate_f32_gpu, test01) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -38,11 +34,11 @@ TEST(spatial_concatenate_f32_gpu, test01) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_x)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -50,16 +46,16 @@ TEST(spatial_concatenate_f32_gpu, test01) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0] + input2.get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0] + input2->get_layout().size.spatial[0]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -70,10 +66,10 @@ TEST(spatial_concatenate_f32_gpu, test01) { } TEST(spatial_concatenate_f32_gpu, test02) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -93,11 +89,11 @@ TEST(spatial_concatenate_f32_gpu, test02) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_y)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -105,16 +101,16 @@ TEST(spatial_concatenate_f32_gpu, test02) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1] + input2.get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1] + input2->get_layout().size.spatial[1]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -125,10 +121,10 @@ TEST(spatial_concatenate_f32_gpu, test02) { } TEST(spatial_concatenate_f32_gpu, test03) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfyx,{ 1,1,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -150,11 +146,11 @@ TEST(spatial_concatenate_f32_gpu, test03) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_y, padding({ 0, 0, 1, 1 }, 0.0f))); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -162,16 +158,16 @@ TEST(spatial_concatenate_f32_gpu, test03) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1] + input2.get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1] + input2->get_layout().size.spatial[1]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -182,10 +178,10 @@ TEST(spatial_concatenate_f32_gpu, test03) { } TEST(spatial_concatenate_f32_gpu, test04) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1,1,2,2 }, padding({ 0,0,0,0 }, { 0,0,1,0 }) }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1,1,2,2 }, padding({ 0,0,0,1 }, 0.0f) }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfyx,{ 1,1,2,2 }, padding({ 0,0,0,0 }, { 0,0,1,0 }) }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfyx,{ 1,1,2,2 }, padding({ 0,0,0,1 }, 0.0f) }); set_values(input1, { 1.0f, 2.0f, 0.0f, @@ -205,11 +201,11 @@ TEST(spatial_concatenate_f32_gpu, test04) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_x, padding({ 0,0,2,0 }, { 0,0,0,0 }))); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -217,16 +213,16 @@ TEST(spatial_concatenate_f32_gpu, test04) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0] + input2.get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0] + input2->get_layout().size.spatial[0]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -237,11 +233,11 @@ TEST(spatial_concatenate_f32_gpu, test04) { } TEST(spatial_concatenate_f32_gpu, inputs_3) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); - memory input3 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); + memory::ptr input3 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -264,12 +260,12 @@ TEST(spatial_concatenate_f32_gpu, inputs_3) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); - tpl.add(input_layout("in3", input3.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); + tpl.add(input_layout("in3", input3->get_layout())); tpl.add(concatenation("conc", { "in1", "in2", "in3" }, concatenation::along_x)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); net.set_input_data("in3", input3); @@ -278,16 +274,16 @@ TEST(spatial_concatenate_f32_gpu, inputs_3) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0] + input2.get_layout().size.spatial[0] + input3.get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0] + input2->get_layout().size.spatial[0] + input3->get_layout().size.spatial[0]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -298,11 +294,11 @@ TEST(spatial_concatenate_f32_gpu, inputs_3) { } TEST(spatial_concatenate_f32_gpu, inputs_3_uneven_axis_b) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 3,1,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); - memory input3 = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 2,1,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 3,1,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 1,1,2,2 } }); + memory::ptr input3 = engine.allocate_memory(layout{ data_types::f32, format::bfyx, { 2,1,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -352,12 +348,12 @@ TEST(spatial_concatenate_f32_gpu, inputs_3_uneven_axis_b) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); - tpl.add(input_layout("in3", input3.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); + tpl.add(input_layout("in3", input3->get_layout())); tpl.add(concatenation("conc", { "in1", "in2", "in3" }, concatenation::along_b)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); net.set_input_data("in3", input3); @@ -366,16 +362,16 @@ TEST(spatial_concatenate_f32_gpu, inputs_3_uneven_axis_b) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0] + input2.get_layout().size.batch[0] + input3.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0] + input2->get_layout().size.batch[0] + input3->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -386,10 +382,10 @@ TEST(spatial_concatenate_f32_gpu, inputs_3_uneven_axis_b) { } TEST(spatial_concatenate_f32_gpu, inputs3d_axis_x) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -413,11 +409,11 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_x) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_x)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -425,17 +421,17 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_x) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0] + input2.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[2], input1.get_layout().size.spatial[2]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0] + input2->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[2], input1->get_layout().size.spatial[2]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -446,10 +442,10 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_x) { } TEST(spatial_concatenate_f32_gpu, inputs3d_axis_y) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -477,11 +473,11 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_y) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_y)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -489,17 +485,17 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_y) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1] + input2.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[2], input1.get_layout().size.spatial[2]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1] + input2->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[2], input1->get_layout().size.spatial[2]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -510,10 +506,10 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_y) { } TEST(spatial_concatenate_f32_gpu, inputs3d_axis_z) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -541,11 +537,11 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_z) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_z)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -553,17 +549,17 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_z) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[2], input1.get_layout().size.spatial[2] + input2.get_layout().size.spatial[2]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[2], input1->get_layout().size.spatial[2] + input2->get_layout().size.spatial[2]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -574,10 +570,10 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_z) { } TEST(spatial_concatenate_f32_gpu, inputs3d_axis_b) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 2,1,2,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 2,1,2,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); set_values(input1, { 1.0f, 2.0f, @@ -616,11 +612,11 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_b) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { "in1", "in2" }, concatenation::along_b)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); @@ -628,17 +624,17 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_b) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0] + input2.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[2], input1.get_layout().size.spatial[2]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0] + input2->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[2], input1->get_layout().size.spatial[2]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) @@ -649,11 +645,11 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_axis_b) { } TEST(spatial_concatenate_f32_gpu, inputs3d_3_uneven_axis_b) { - engine eng; + auto& engine = get_test_engine(); - memory input1 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 3,1,2,2,2 } }); - memory input2 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); - memory input3 = memory::allocate(eng, layout{ data_types::f32, format::bfzyx, { 2,1,2,2,2 } }); + memory::ptr input1 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 3,1,2,2,2 } }); + memory::ptr input2 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 1,1,2,2,2 } }); + memory::ptr input3 = engine.allocate_memory(layout{ data_types::f32, format::bfzyx, { 2,1,2,2,2 } }); set_values(input1, { //b0 @@ -745,12 +741,12 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_3_uneven_axis_b) { }; topology tpl; - tpl.add(input_layout("in1", input1.get_layout())); - tpl.add(input_layout("in2", input2.get_layout())); - tpl.add(input_layout("in3", input3.get_layout())); + tpl.add(input_layout("in1", input1->get_layout())); + tpl.add(input_layout("in2", input2->get_layout())); + tpl.add(input_layout("in3", input3->get_layout())); tpl.add(concatenation("conc", { "in1", "in2", "in3" }, concatenation::along_b)); - network net(eng, tpl); + network net(engine, tpl); net.set_input_data("in1", input1); net.set_input_data("in2", input2); net.set_input_data("in3", input3); @@ -759,17 +755,17 @@ TEST(spatial_concatenate_f32_gpu, inputs3d_3_uneven_axis_b) { ASSERT_TRUE(outputs.size() == 1 && outputs.count("conc") == 1); auto output_mem = outputs.at("conc").get_memory(); - auto output_layout = output_mem.get_layout(); + auto output_layout = output_mem->get_layout(); - ASSERT_EQ(output_layout.size.batch[0], input1.get_layout().size.batch[0] + input2.get_layout().size.batch[0] + input3.get_layout().size.batch[0]); - ASSERT_EQ(output_layout.size.feature[0], input1.get_layout().size.feature[0]); - ASSERT_EQ(output_layout.size.spatial[0], input1.get_layout().size.spatial[0]); - ASSERT_EQ(output_layout.size.spatial[1], input1.get_layout().size.spatial[1]); - ASSERT_EQ(output_layout.size.spatial[2], input1.get_layout().size.spatial[2]); + ASSERT_EQ(output_layout.size.batch[0], input1->get_layout().size.batch[0] + input2->get_layout().size.batch[0] + input3->get_layout().size.batch[0]); + ASSERT_EQ(output_layout.size.feature[0], input1->get_layout().size.feature[0]); + ASSERT_EQ(output_layout.size.spatial[0], input1->get_layout().size.spatial[0]); + ASSERT_EQ(output_layout.size.spatial[1], input1->get_layout().size.spatial[1]); + ASSERT_EQ(output_layout.size.spatial[2], input1->get_layout().size.spatial[2]); - ASSERT_EQ(output_mem.get_layout().get_linear_size(), expected_output.size()); + ASSERT_EQ(output_mem->get_layout().get_linear_size(), expected_output.size()); { - auto out_ptr = output_mem.pointer(); + cldnn::mem_lock out_ptr(output_mem, get_test_stream()); size_t idx = 0; for (auto const& value : out_ptr) diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp index 0d420abfefa..10cb13acc31 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp @@ -2,23 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/split.hpp" -#include "api/scale.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include #include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; template std::vector generate_random_input(size_t b, size_t f, size_t y, size_t x, int min, int max) { @@ -34,7 +29,7 @@ std::vector generate_random_input(size_t b, size_t f, size_t y, size_t x, int } template -void check_feature_map(cldnn::pointer output_ptr, std::vector &input_vec, size_t batch_num, size_t feature_num, size_t y_size, size_t x_size, size_t feature_id, size_t factor) +void check_feature_map(T* output_ptr, std::vector &input_vec, size_t batch_num, size_t feature_num, size_t y_size, size_t x_size, size_t feature_id, size_t factor) { for (size_t b = 0; b < batch_num; ++b) { //B for (size_t y = 0; y < y_size; ++y) { //Y @@ -50,15 +45,15 @@ void check_feature_map(cldnn::pointer output_ptr, std::vector &input_vec, template void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vector split_offsets) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); cldnn::tensor reference_input_size = { batch_num, feature_num, x_size, y_size }; - cldnn::memory input = memory::allocate(engine, { type_to_data_type::value, format::bfyx, reference_input_size }); + cldnn::memory::ptr input = engine.allocate_memory({ type_to_data_type::value, format::bfyx, reference_input_size }); std::vector > input_ids_offsets; topology topology; - topology.add(input_layout("input", input.get_layout())); - + topology.add(input_layout("input", input->get_layout())); + // lambda exoression to create the primitive id for the splits auto create_split_id = [](size_t splitNum) { std::stringstream ss; @@ -68,7 +63,7 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec }; // Create the splits with the split ids for the topology - for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++) + for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++) { input_ids_offsets.push_back({ create_split_id(splitNum), split_offsets[splitNum]}); } @@ -85,7 +80,7 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec // The number of splits should match the expected number of splits EXPECT_EQ(outputs.size(), size_t(split_offsets.size())); - + std::vector expected_sizes; for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++) // Calculate the expected sizes { @@ -110,15 +105,15 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec expected_sizes.push_back(size); } - pointer input_ptr = input.pointer(); + cldnn::mem_lock input_ptr(input, get_test_stream()); for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++) { primitive_id split_id = "split:" + create_split_id(splitNum); - cldnn::memory output = outputs.at(split_id).get_memory(); - auto prim = output.get_layout(); + cldnn::memory::ptr output = outputs.at(split_id).get_memory(); + auto prim = output->get_layout(); EXPECT_EQ(prim.size, expected_sizes[splitNum]); - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); // Output tensor size auto output_batch = prim.size.batch[0]; @@ -131,25 +126,25 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec auto input_feature_offset = split_offsets[splitNum].feature[0]; auto input_y_offset = split_offsets[splitNum].spatial[1]; auto input_x_offset = split_offsets[splitNum].spatial[0]; - + // iterator to iterate through input buffer auto input_batch_itr = input_batch_offset; auto input_feature_itr = input_feature_offset; auto input_y_itr = input_y_offset; auto input_x_itr = input_x_offset; - + for (auto b = 0; b < output_batch; ++b) { // B - + // reset the input feature iterator - input_feature_itr = input_feature_offset; + input_feature_itr = input_feature_offset; for (auto f = 0; f < output_feature; f++) { // F - + // reset the input y iterator - input_y_itr = input_y_offset; + input_y_itr = input_y_offset; for (auto y = 0; y < output_y; y++) { // Y - + // reset the input x iterator - input_x_itr = input_x_offset; + input_x_itr = input_x_offset; for (auto x = 0; x < output_x; x++) { // X auto linear_id = input_x_itr + x_size * (input_y_itr + y_size * (input_feature_itr + feature_num * input_batch_itr)); // index in input auto output_linear_id = x + output_x * (y + output_y * (f + output_feature * b)); // index in output @@ -180,7 +175,7 @@ TEST(split_gpu_f32, split_1d_uneven_2_splits) { auto y_size = 3; std::vector split_offsets = { {0, 0, 0, 0}, - {0, 1, 0, 0} + {0, 1, 0, 0} }; split_test(batch_num, feature_num, x_size, y_size, split_offsets); @@ -201,7 +196,7 @@ TEST(split_gpu_i64, split_1d_uneven_2_splits) { auto y_size = 3; std::vector split_offsets = { {0, 0, 0, 0}, - {0, 1, 0, 0} + {0, 1, 0, 0} }; split_test(batch_num, feature_num, x_size, y_size, split_offsets); @@ -209,13 +204,13 @@ TEST(split_gpu_i64, split_1d_uneven_2_splits) { TEST(split_gpu_f32, basic_split_concat_optimization) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 25, 1, 256 } }); + auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 25, 1, 256 } }); tests::set_random_values(input); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); std::vector> offsets; std::vector ids; for (int i = 0; i < 25; i++) @@ -238,8 +233,8 @@ TEST(split_gpu_f32, basic_split_concat_optimization) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); - auto input_ptr = input.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); for (int i = 0; i < 25*256; ++i) { @@ -249,13 +244,13 @@ TEST(split_gpu_f32, basic_split_concat_optimization) { TEST(split_gpu_i64, basic_split_concat_optimization) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::i64,format::bfyx,{ 1, 25, 1, 256 } }); + auto input = engine.allocate_memory({ data_types::i64,format::bfyx,{ 1, 25, 1, 256 } }); tests::set_random_values(input); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); std::vector> offsets; std::vector ids; for (int i = 0; i < 25; i++) @@ -278,8 +273,8 @@ TEST(split_gpu_i64, basic_split_concat_optimization) { auto outputs = network.execute(); auto output = outputs.at("output").get_memory(); - auto output_ptr = output.pointer(); - auto input_ptr = input.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock input_ptr(input, get_test_stream()); for (int i = 0; i < 25*256; ++i) { @@ -523,17 +518,17 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_feature_bfyx) { // id: "out1", offsets: { 0, 1, 0, 0 } // id: "out2", offsets: { 0, 2, 0, 0 } - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 3; auto x_size = 4; auto y_size = 3; - auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(split("split", "input", { { "out0", { 0, 0, 0, 0 } }, @@ -556,8 +551,8 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_feature_bfyx) { { auto split_id = "split:out" + std::to_string(i); auto output = outputs.at(split_id).get_memory(); - auto output_ptr = output.pointer(); - check_feature_map(output_ptr, input_vec, batch_num, feature_num, y_size, x_size, i, 1); + cldnn::mem_lock output_ptr(output, get_test_stream()); + check_feature_map(output_ptr.data(), input_vec, batch_num, feature_num, y_size, x_size, i, 1); } } @@ -569,17 +564,17 @@ TEST(split_gpu_i64, basic_in2x3x2x2_split_feature_bfyx) { // id: "out1", offsets: { 0, 1, 0, 0 } // id: "out2", offsets: { 0, 2, 0, 0 } - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 3; auto x_size = 4; auto y_size = 3; - auto input = memory::allocate(engine, { data_types::i64,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); + auto input = engine.allocate_memory({ data_types::i64,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(split("split", "input", { { "out0", { 0, 0, 0, 0 } }, @@ -602,8 +597,8 @@ TEST(split_gpu_i64, basic_in2x3x2x2_split_feature_bfyx) { { auto split_id = "split:out" + std::to_string(i); auto output = outputs.at(split_id).get_memory(); - auto output_ptr = output.pointer(); - check_feature_map(output_ptr, input_vec, batch_num, feature_num, y_size, x_size, i, 1); + cldnn::mem_lock output_ptr(output, get_test_stream()); + check_feature_map(output_ptr.data(), input_vec, batch_num, feature_num, y_size, x_size, i, 1); } } @@ -616,23 +611,23 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_scale_feature_bfyx) { // id: "out2", offsets: { 0, 2, 0, 0 } // Additional scale layer at the end - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); auto batch_num = 6; auto feature_num = 3; auto x_size = 4; auto y_size = 3; - auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); - auto scale_input0 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto scale_input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto scale_input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ batch_num, feature_num, x_size, y_size } }); + auto scale_input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto scale_input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto scale_input2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); - topology.add(input_layout("scale_input0", scale_input0.get_layout())); - topology.add(input_layout("scale_input1", scale_input1.get_layout())); - topology.add(input_layout("scale_input2", scale_input2.get_layout())); + topology.add(input_layout("input", input->get_layout())); + topology.add(input_layout("scale_input0", scale_input0->get_layout())); + topology.add(input_layout("scale_input1", scale_input1->get_layout())); + topology.add(input_layout("scale_input2", scale_input2->get_layout())); topology.add(split("split", "input", { { "out0",{ 0, 0, 0, 0 } }, @@ -649,7 +644,7 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_scale_feature_bfyx) { set_values(scale_input1, scale_input_vec1); std::vector scale_input_vec2 = { 3.f }; set_values(scale_input2, scale_input_vec2); - + std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); @@ -668,7 +663,7 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_scale_feature_bfyx) { { auto split_id = "scale" + std::to_string(i); auto output = outputs.at(split_id).get_memory(); - auto output_ptr = output.pointer(); - check_feature_map(output_ptr, input_vec, batch_num, feature_num, y_size, x_size, i, i + 1); + cldnn::mem_lock output_ptr(output, get_test_stream()); + check_feature_map(output_ptr.data(), input_vec, batch_num, feature_num, y_size, x_size, i, i + 1); } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp index b679ac71a6b..ed1dfa1110a 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/streams_test.cpp @@ -2,50 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; -static engine_configuration streams_config(false, - false, - false, - "", - "", - true, - "", - "", - priority_mode_types::disabled, - throttle_mode_types::disabled, - true, - 2); +TEST(gpu_streams, can_create_networks_for_stream) { + auto& engine = get_test_engine(); -TEST(gpu_streams, DISABLED_can_allocate_memory_for_stream) -{ - engine _engine(streams_config); - - ASSERT_NO_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}))); - ASSERT_NO_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 0)); - ASSERT_NO_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 1)); - ASSERT_ANY_THROW(memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 2)); - - auto mem0 = memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 0); - ASSERT_EQ(mem0.get_net_id(), 0); - auto mem1 = memory::allocate(_engine, layout(data_types::f32, format::bfyx, {1, 2, 3, 4}), 1); - ASSERT_EQ(mem1.get_net_id(), 1); -} - -TEST(gpu_streams, can_create_networks_for_stream) -{ - engine _engine(streams_config); - - auto input = memory::allocate(_engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); set_values(input, { 1.0f, -2.0f, -3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, -6.0f, @@ -58,9 +27,9 @@ TEST(gpu_streams, can_create_networks_for_stream) 1.0f, 1.0f, 1.0f, -0.5f, 1.0f }; topology topology( - input_layout("input", input.get_layout()), + input_layout("input", input->get_layout()), activation("relu", "input", activation_func::relu_negative_slope, activation_additional_params{ 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); - network network(_engine, topology, build_options()); + network network(engine, topology, build_options()); network.set_input_data("input", input); auto outputs = network.execute(); @@ -68,10 +37,8 @@ TEST(gpu_streams, can_create_networks_for_stream) EXPECT_EQ(outputs.begin()->first, "relu"); auto output_memory = outputs.at("relu").get_memory(); - auto output_layout = output_memory.get_layout(); - auto output_ptr = output_memory.pointer(); - - EXPECT_EQ(output_memory.get_net_id(), network.get_id()); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); int y_size = output_layout.size.spatial[1]; int x_size = output_layout.size.spatial[0]; @@ -89,9 +56,9 @@ TEST(gpu_streams, can_create_networks_for_stream) } TEST(gpu_streams, check_networks_can_use_the_same_weights) { - engine _engine(streams_config); + auto& engine = get_test_engine(); - auto weights = memory::allocate(_engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); VVF output_vec = { { 20.0f, 27.0f, 38.0f }, @@ -105,12 +72,12 @@ TEST(gpu_streams, check_networks_can_use_the_same_weights) { convolution("conv", "input", { "weights" }, { 1,1,1,2 })); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); - program prog(_engine, topology, build_options()); + program prog(engine, topology, build_options()); network network0(prog, 0); network network1(prog, 1); - auto input0 = memory::allocate(_engine, input0_layout, network0.get_id()); - auto input1 = memory::allocate(_engine, input0_layout, network1.get_id()); + auto input0 = engine.allocate_memory(input0_layout); + auto input1 = engine.allocate_memory(input0_layout); set_values(input0, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(input1, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); @@ -126,12 +93,10 @@ TEST(gpu_streams, check_networks_can_use_the_same_weights) { auto output_memory0 = outputs0.at("conv").get_memory(); auto output_memory1 = outputs1.at("conv").get_memory(); - auto output_layout = output_memory0.get_layout(); - auto output_ptr0 = output_memory0.pointer(); - auto output_ptr1 = output_memory1.pointer(); + auto output_layout = output_memory0->get_layout(); + cldnn::mem_lock output_ptr0(output_memory0, get_test_stream()); + cldnn::mem_lock output_ptr1(output_memory1, get_test_stream()); - EXPECT_EQ(output_memory0.get_net_id(), network0.get_id()); - EXPECT_EQ(output_memory1.get_net_id(), network1.get_id()); auto wmem0 = network0.get_output_memory("weights"); auto wmem1 = network1.get_output_memory("weights"); @@ -153,3 +118,71 @@ TEST(gpu_streams, check_networks_can_use_the_same_weights) { } } } + +TEST(gpu_streams, check_networks_use_unique_mutable_data_per_stream) { + auto& engine = get_test_engine(); + + auto weights = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + + VVF output_vec = { + { 20.0f, 27.0f, 38.0f }, + { 17.0f, 19.0f, 19.0f } }; + + layout input0_layout(data_types::f32, format::bfyx, { 1, 1, 5, 4 }); + + topology topology( + input_layout("input", input0_layout), + mutable_data("weights", weights), + convolution("conv", "input", { "weights" }, { 1,1,1,2 })); + + set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); + program prog(engine, topology, build_options()); + network network0(prog, 0); + network network1(prog, 1); + + auto input0 = engine.allocate_memory(input0_layout); + auto input1 = engine.allocate_memory(input0_layout); + set_values(input0, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); + set_values(input1, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); + + network0.set_input_data("input", input0); + network1.set_input_data("input", input1); + + auto outputs0 = network0.execute(); + auto outputs1 = network1.execute(); + EXPECT_EQ(outputs0.size(), size_t(1)); + EXPECT_EQ(outputs1.size(), size_t(1)); + EXPECT_EQ(outputs0.begin()->first, "conv"); + EXPECT_EQ(outputs1.begin()->first, "conv"); + + auto output_memory0 = outputs0.at("conv").get_memory(); + auto output_memory1 = outputs1.at("conv").get_memory(); + auto output_layout = output_memory0->get_layout(); + cldnn::mem_lock output_ptr0(output_memory0, get_test_stream()); + cldnn::mem_lock output_ptr1(output_memory1, get_test_stream()); + + auto wmem0 = network0.get_output_memory("weights"); + auto wmem1 = network1.get_output_memory("weights"); + + // check that each stream has unique weights data + ASSERT_NE(wmem0, wmem1); + + // check that initial memory is reused by the primary stream + ASSERT_EQ(wmem0, weights); + + int y_size = output_layout.size.spatial[1]; + int x_size = output_layout.size.spatial[0]; + int f_size = output_layout.size.feature[0]; + int b_size = output_layout.size.batch[0]; + EXPECT_EQ(output_layout.format, format::bfyx); + EXPECT_EQ(y_size, 2); + EXPECT_EQ(x_size, 3); + EXPECT_EQ(f_size, 1); + EXPECT_EQ(b_size, 1); + for (int y = 0; y < y_size; ++y) { + for (int x = 0; x < x_size; ++x) { + EXPECT_EQ(output_vec[y][x], output_ptr0[y * x_size + x]); + EXPECT_EQ(output_vec[y][x], output_ptr1[y * x_size + x]); + } + } +} diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp index 2634f03962a..aa4ec3aa0d3 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp @@ -3,17 +3,15 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include -#include "api/strided_slice.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" -#include + +#include "test_utils.h" + +#include +#include +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { // Input (BFYX): 2x2x2x2 @@ -22,11 +20,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { // Stride (BFYX): 1x1x1x1 // Output (BFYX): 2x2x2x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -43,7 +41,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -63,7 +61,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full) { std::vector answers = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -79,11 +77,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) { // Stride (BFYX): 1x1x1x1 // Output (BFYX): 2x2x2x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -100,7 +98,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -120,7 +118,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full) { std::vector answers = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -136,11 +134,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) { // Stride (BFYX): 1x1x1x1 // Output (BFYX): 2x2x2x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -157,7 +155,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -179,7 +177,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_ignore) { 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -195,11 +193,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) { // Stride (BFYX): 1x1x1x1 // Output (BFYX): 2x2x2x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -216,7 +214,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -238,7 +236,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_ignore) { 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -254,11 +252,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) { // Stride (BFYX): 1x1x1x1 // Output (BFYX): 1x1x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -275,7 +273,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -294,7 +292,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_single) { std::vector answers = { 15.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -310,11 +308,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) { // Stride (BFYX): 1x1x1x1 // Output (BFYX): 1x1x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -331,7 +329,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -350,7 +348,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_single) { std::vector answers = { 15.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -366,11 +364,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { // Stride (BFYX): 1x1x2x1 // Output (BFYX): 2x2x2x3 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 4 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 4 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, @@ -391,7 +389,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -413,7 +411,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x3_stride) { 24.f, 25.f, 26.f, 30.f, 31.f, 32.f, 36.f, 37.f, 38.f, 42.f, 43.f, 44.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -429,11 +427,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) { // Stride (BFYX): 1x1x2x1 // Output (BFYX): 2x2x2x3 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 4 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 3, 4 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, @@ -454,7 +452,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -476,7 +474,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x3_stride) { 24.f, 25.f, 26.f, 30.f, 31.f, 32.f, 36.f, 37.f, 38.f, 42.f, 43.f, 44.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -492,11 +490,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { // Stride (BFYX): 1x1x1x2 // Output (BFYX): 1x2x4x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 4, 4 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 4, 4 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -530,7 +528,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -559,7 +557,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x4_part_stride) { 61.0f, 63.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -575,11 +573,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) { // Stride (BFYX): 1x1x1x2 // Output (BFYX): 1x2x4x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 4, 4 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 4, 4 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, @@ -613,7 +611,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -642,7 +640,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x4_part_stride) { 61.0f, 63.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -656,11 +654,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) { // New_axis_mask: 1 // Output (BFYX): 1x2x2x4 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 4 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 4 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, @@ -677,7 +675,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -699,7 +697,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x4x1_new_axis_mask) { 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -712,11 +710,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) { // New_axis_mask: 1 // Output (BFYX): 1x2x2x4 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 4 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 4 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, @@ -733,7 +731,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -755,7 +753,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x4x1_new_axis_mask) { 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -768,11 +766,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) { // New_axis_mask: 101 // Output (BFYX): 1x2x1x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f @@ -788,7 +786,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -809,7 +807,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1_new_axis_mask_2) { 0.0f, 1.0f, 2.0f, 3.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -822,11 +820,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) { // New_axis_mask: 101 // Output (BFYX): 1x2x1x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f @@ -842,7 +840,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -863,7 +861,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1_new_axis_mask_2) { 0.0f, 1.0f, 2.0f, 3.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -875,11 +873,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) { // Input (BFYX): 2x2x1x1 // Output (BFYX): 2x2x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 2, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f @@ -895,7 +893,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -916,7 +914,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x1x1) { 0.0f, 1.0f, 2.0f, 3.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -928,11 +926,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) { // Input (BFYX): 2x2x1x1 // Output (BFYX): 2x2x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 2, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 2, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 2, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 2, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 2, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 2, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f @@ -948,7 +946,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -969,7 +967,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x1x1) { 0.0f, 1.0f, 2.0f, 3.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -981,11 +979,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 1x2x2x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f @@ -1001,7 +999,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1022,7 +1020,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1) { 0.0f, 1.0f, 2.0f, 3.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -1034,11 +1032,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 1x2x2x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f @@ -1054,7 +1052,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1075,7 +1073,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1) { 0.0f, 1.0f, 2.0f, 3.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -1087,11 +1085,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f @@ -1107,7 +1105,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1128,7 +1126,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2) { 0.0f, 4.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -1140,11 +1138,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f @@ -1160,7 +1158,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1181,7 +1179,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2) { 0.0f, 4.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -1196,11 +1194,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) { // Stride (BFYX): -1x1x1x1 // Output (BFYX): 2x2x2x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -1217,7 +1215,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1237,7 +1235,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x2_full_negative_stride) { std::vector answers = { 12.f, 13.f, 14.f, 15.f, 8.f, 9.f, 10.f, 11.f, 4.f, 5.f, 6.f, 7.f, 0.f, 1.f, 2.f, 3.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -1253,11 +1251,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) { // Stride (BFYX): -1x1x1x1 // Output (BFYX): 2x2x2x2 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 4, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, @@ -1274,7 +1272,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1294,7 +1292,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x2_full_negative_stride) { std::vector answers = { 12.f, 13.f, 14.f, 15.f, 8.f, 9.f, 10.f, 11.f, 4.f, 5.f, 6.f, 7.f, 0.f, 1.f, 2.f, 3.f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); ASSERT_EQ(output_ptr.size(), answers.size()); for (size_t i = 0; i < answers.size(); ++i) @@ -1307,11 +1305,11 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); + auto begin = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i32, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f @@ -1327,7 +1325,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1348,7 +1346,7 @@ TEST(strided_slice_gpu_f32_i32, test_2x2x2x1x1_2_negative_all) { 0.0f, 4.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { @@ -1360,11 +1358,11 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) { // Input (BFZYX): 2x2x2x1x1 // Output (BFZYX): 2x1x1x1x1 - const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); - auto begin = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto end = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); - auto strides = memory::allocate(engine, { data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto& engine = get_test_engine(); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 2, 2, 1, 1, 2 } }); + auto begin = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto end = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); + auto strides = engine.allocate_memory({ data_types::i64, format::bfyx, { 3, 1, 1, 1 } }); set_values(input, { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f @@ -1380,7 +1378,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) { }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("input2", begin)); topology.add(data("input3", end)); topology.add(data("input4", strides)); @@ -1401,7 +1399,7 @@ TEST(strided_slice_gpu_f32_i64, test_2x2x2x1x1_2_negative_all) { 0.0f, 4.0f }; - auto output_ptr = output.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); for (size_t i = 0; i < answers.size(); ++i) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/tensor_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/tensor_test.cpp index 22abf3fbc81..c7563f4e861 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/tensor_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/tensor_test.cpp @@ -2,8 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include "test_utils.h" TEST(tensor_api, order_new_notation) { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp index b734a9e7b4f..b80f4eedd6d 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp @@ -3,22 +3,19 @@ // /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/tile.hpp" -#include -#include -#include -#include "test_utils/test_utils.h" + +#include "test_utils.h" + +#include +#include #include using namespace cldnn; -using namespace tests; +using namespace ::tests; template -void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num_tiles) +void tile_ref(const memory::ptr input, memory::ptr output, tile::tile_axis axis, int num_tiles) { auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair { @@ -33,13 +30,13 @@ void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num } }; - const pointer src = input.pointer(); - pointer dst = output.pointer(); + cldnn::mem_lock src(input, get_test_stream()); + cldnn::mem_lock dst(output, get_test_stream()); const data_t* psrc = src.data(); data_t* pdst = dst.data(); - auto sizes = get_sizes(input.get_layout().size, axis); + auto sizes = get_sizes(input->get_layout().size, axis); int outer_dim = sizes.first; int inner_dim = sizes.second; @@ -58,13 +55,13 @@ void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num } TEST(tile_gpu, basic_in1x2x2x2_axis_b) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(tile("tile", "input", tensor(2, 2, 2, 2))); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f, @@ -78,22 +75,22 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i; } } TEST(tile_gpu, basic_in1x2x2x2_axis_f) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 2, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 4, 2, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(tile("tile", "input", tensor(1, 4, 2, 2))); std::vector input_vec = { 1.f, 0.f, @@ -110,22 +107,22 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i; } } TEST(tile_gpu, basic_in1x2x2x2_axis_y) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 4 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(tile("tile", "input", tensor(1, 2, 2, 4))); std::vector input_vec = { 1.f, 0.f, @@ -142,22 +139,22 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_y) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i; } } TEST(tile_gpu, basic_in1x2x2x2_axis_x) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(tile("tile", "input", tensor(1, 2, 4, 2))); std::vector input_vec = { 1.f, 0.f, @@ -174,22 +171,22 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i; } } TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 2 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 2 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 2 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(tile("tile", "input", tensor(1, 2, 4, 2))); std::vector input_vec = { 1.f, 0.f, 5.f, 1.5f}; @@ -202,22 +199,22 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i; } } TEST(tile_gpu, basic_in1x2x2x2_axis_z) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 2, 2, 2, 2 } }); - auto output_ref = memory::allocate(engine, { data_types::f32, format::bfzyx,{ 1, 2, 2, 2, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 2, 2, 2 } }); + auto output_ref = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 2, 2, 4 } }); topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(tile("tile", "input", tensor(1, 2, 2, 2, 4))); std::vector input_vec = { @@ -239,10 +236,10 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_z) { auto outputs = network.execute(); auto output = outputs.at("tile").get_memory(); - auto output_ptr = output.pointer(); - auto output_ref_ptr = output_ref.pointer(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); - for (unsigned int i = 0; i < output_ref.count(); ++i) { + for (unsigned int i = 0; i < output_ref->count(); ++i) { EXPECT_EQ(output_ptr[i], output_ref_ptr[i]) << "Index=" << i; } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp index 11e34e2f601..42eae68031c 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp @@ -2,25 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include + #include -#include "api/memory.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include @@ -55,6 +53,7 @@ protected: public: // return false for invalid output_layout virtual bool AddPrimitive(cldnn::topology& topology, cldnn::primitive_id id, cldnn::layout output_layout, std::deque& input_layouts) = 0; + virtual ~topology_layer_type() = default; }; static std::vector> layer_types; static cldnn::topology* CreateTopology(cldnn::layout output_layout, const std::vector generator_vec) @@ -112,7 +111,7 @@ protected: static void AddRandomMemory(cldnn::topology& topology, cldnn::primitive_id id, cldnn::layout layout) { //todo: allocate mem, randomize values by type, add to topology - auto mem_primitive = cldnn::memory::allocate(topology_test::engine, layout); + auto mem_primitive = topology_test::engine.allocate_memory(layout); switch (layout.data_type) { case cldnn::data_types::f32: @@ -368,8 +367,8 @@ public: EXPECT_NE(topology, nullptr); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); - cldnn::engine temp_engine;// using temp_engine since reusing the same one does not free all resources (network build becomes slower and slower) - cldnn::network network(temp_engine, *topology, options); + auto& engine = tests::get_test_engine(); + cldnn::network network(engine, *topology, options); auto outputs = network.execute(); EXPECT_NE(outputs.find(topology_generator::output_layer_id), outputs.end()); @@ -443,11 +442,11 @@ protected: cldnn::layout output_layout; std::vector generator; - static const cldnn::engine& engine; + static cldnn::engine& engine; static std::vector all_output_layouts;//just for tear-down }; -const cldnn::engine& topology_test::engine = tests::get_test_engine(); +cldnn::engine& topology_test::engine = tests::get_test_engine(); std::vector topology_test::all_output_layouts = {}; std::vector> topology_test::topology_generator::layer_types = { diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp index bb613fccd13..94689c2ce4b 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp @@ -4,18 +4,14 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include -#include "api/memory.hpp" -#include -#include "api/concatenation.hpp" -#include -#include -#include -#include -#include "test_utils/test_utils.h" +#include "test_utils.h" + +#include +#include "cldnn/primitives/concatenation.hpp" +#include using namespace cldnn; -using namespace tests; +using namespace ::tests; /* This set of tests has been designed to check the correctness of trim_to_outputs optimization pass @@ -29,14 +25,14 @@ using namespace tests; ---> conv2 (to be eliminated) */ TEST(trim_to_outputs, one_node_to_eliminate_case1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; build_opt.set_option(cldnn::build_option::outputs({ "conv1" })); build_opt.set_option(build_option::optimize_data(false)); // to avoid adding reorders - auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } }); - auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); - auto bias = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + auto bias = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.1f }); set_values(weights, { 2.1f }); @@ -45,7 +41,7 @@ TEST(trim_to_outputs, one_node_to_eliminate_case1) { std::vector out_data = { 3.91f }; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights", weights)); topology.add(data("bias", bias)); topology.add(cldnn::convolution("conv1", { "input" }, { "weights" }, { "bias" })); @@ -61,7 +57,7 @@ TEST(trim_to_outputs, one_node_to_eliminate_case1) { for (auto& it : outputs) { - auto output_ptr = it.second.get_memory().pointer(); + cldnn::mem_lock output_ptr(it.second.get_memory(), get_test_stream()); for (size_t cntr = 0; cntr < out_data.size(); cntr++) { EXPECT_NEAR(output_ptr[cntr], out_data[cntr], 1e-4); @@ -78,16 +74,16 @@ Network structure: input -> conv1 (output) ---> conv2 (to be eliminated along with its weights and bias) */ TEST(trim_to_outputs, one_node_to_eliminate_case2) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; build_opt.set_option(cldnn::build_option::outputs({ "conv1" })); build_opt.set_option(build_option::optimize_data(false)); // to avoid adding reorders - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 1 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto bias1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto bias2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto bias1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto bias2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.1f }); set_values(weights1, { 2.1f }); @@ -98,7 +94,7 @@ TEST(trim_to_outputs, one_node_to_eliminate_case2) { std::vector out_data = { 3.91f }; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights1", weights1)); topology.add(data("bias1", bias1)); topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }, { "bias1" })); @@ -116,7 +112,7 @@ TEST(trim_to_outputs, one_node_to_eliminate_case2) { for (auto& it : outputs) { - auto output_ptr = it.second.get_memory().pointer(); + cldnn::mem_lock output_ptr(it.second.get_memory(), get_test_stream()); for (size_t cntr = 0; cntr < out_data.size(); cntr++) { @@ -135,16 +131,16 @@ Network structure: input ---> conv1 --- ---> conv4 (output) Convolutions conv2, conv3 should be optimized out along with weights23 shered by conv2 and conv3. */ TEST(trim_to_outputs, two_nodes_to_eliminate_case1) { - const auto& engine = get_test_engine(); + auto& engine = get_test_engine(); build_options build_opt; build_opt.set_option(cldnn::build_option::outputs({ "conv4" })); build_opt.set_option(build_option::optimize_data(false)); // to avoid adding reorders - auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 1 } }); - auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto weights23 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto weights4 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - auto bias = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto weights23 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto weights4 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto bias = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); set_values(input, { 1.1f }); set_values(weights1, { 2.1f }); @@ -155,7 +151,7 @@ TEST(trim_to_outputs, two_nodes_to_eliminate_case1) { std::vector out_data = { 9.42f }; topology topology; - topology.add(input_layout("input", input.get_layout())); + topology.add(input_layout("input", input->get_layout())); topology.add(data("weights1", weights1)); topology.add(data("bias", bias)); topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }, { "bias" })); @@ -175,7 +171,7 @@ TEST(trim_to_outputs, two_nodes_to_eliminate_case1) { for (auto& it : outputs) { - auto output_ptr = it.second.get_memory().pointer(); + cldnn::mem_lock output_ptr(it.second.get_memory(), get_test_stream()); for (size_t cntr = 0; cntr < out_data.size(); cntr++) { @@ -184,4 +180,3 @@ TEST(trim_to_outputs, two_nodes_to_eliminate_case1) { EXPECT_EQ(it.first, "conv4"); } } - diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp b/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp deleted file mode 100644 index d8b2c73cc11..00000000000 --- a/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp +++ /dev/null @@ -1,386 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "instrumentation.h" - -#include -#include -#include -#include -#include - -namespace instrumentation { - // initalize dumping directory for whole run - const std::string logger::dump_dir = DUMP_DIRECTORY; - - static float convert_half_to_float(cldnn::half_t val) - { - return static_cast(val); - } - - float convert_element(float f) - { - return f; - } - - float convert_element(cldnn::half_t h) - { - return convert_half_to_float(h); - } - - template - void dump_byxf(const cldnn::memory& mem, bool single_batch, cldnn::tensor::value_type batch_id, bool single_feature, cldnn::tensor::value_type feature_id, std::vector> & streams) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - unsigned int input_it = 0; - for (cldnn::tensor::value_type b = 0; b < mem_arg.size.batch[0]; b++) - { - for (cldnn::tensor::value_type y = 0; y < mem_arg.size.spatial[1]; y++) - { - for (cldnn::tensor::value_type x = 0; x < mem_arg.size.spatial[0]; x++) - { - for (cldnn::tensor::value_type f = 0; f < mem_arg.size.feature[0]; f++) - { - if ((!single_batch || b == batch_id) && (!single_feature || f == feature_id)) - { - streams[b][f] << convert_element(mem_ptr[input_it]) << " "; - if (x == mem_arg.size.spatial[0] - 1) - { - streams[b][f] << std::endl; - } - } - input_it++; - } - } - } - } - } - - template - void dump_bfyx(const cldnn::memory& mem, bool single_batch, cldnn::tensor::value_type batch_id, bool single_feature, cldnn::tensor::value_type feature_id, std::vector> & streams) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - unsigned int input_it = 0; - for (cldnn::tensor::value_type b = 0; b < mem_arg.size.batch[0]; b++) - { - for (cldnn::tensor::value_type f = 0; f < mem_arg.size.feature[0]; f++) - { - for (cldnn::tensor::value_type y = 0; y < mem_arg.size.spatial[1]; y++) - { - for (cldnn::tensor::value_type x = 0; x < mem_arg.size.spatial[0]; x++) - { - if ((!single_batch || b == batch_id) && (!single_feature || f == feature_id)) - { - streams[b][f] << convert_element(mem_ptr[input_it]) << " "; - if (x == mem_arg.size.spatial[0] - 1) - { - streams[b][f] << std::endl; - } - } - input_it++; - } - } - } - } - } - - template - void dump_yxfb(const cldnn::memory& mem, bool single_batch, cldnn::tensor::value_type batch_id, bool single_feature, cldnn::tensor::value_type feature_id, std::vector> & streams) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - unsigned int input_it = 0; - for (cldnn::tensor::value_type y = 0; y < mem_arg.size.spatial[1]; y++) - { - for (cldnn::tensor::value_type x = 0; x < mem_arg.size.spatial[0]; x++) - { - for (cldnn::tensor::value_type f = 0; f < mem_arg.size.feature[0]; f++) - { - for (cldnn::tensor::value_type b = 0; b < mem_arg.size.batch[0]; b++) - { - if ((!single_batch || b == batch_id) && (!single_feature || f == feature_id)) - { - streams[b][f] << convert_element(mem_ptr[input_it]) << " "; - if (x == mem_arg.size.spatial[0] - 1) - { - streams[b][f] << std::endl; - } - } - input_it++; - } - } - } - } - } - - template - void dump_xb(const cldnn::memory& mem, bool single_batch, cldnn::tensor::value_type batch_id, std::vector> & streams) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - unsigned int input_it = 0; - for (cldnn::tensor::value_type x = 0; x < mem_arg.size.spatial[0]; x++) - { - for (cldnn::tensor::value_type b = 0; b < mem_arg.size.batch[0]; b++) - { - if (!single_batch || b == batch_id) - { - streams[b][0] << convert_element(mem_ptr[input_it]) << std::endl; - } - input_it++; - } - } - } - - template - void dump_bx(const cldnn::memory& mem, bool single_batch, cldnn::tensor::value_type batch_id, std::vector> & streams) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - unsigned int input_it = 0; - for (cldnn::tensor::value_type b = 0; b < mem_arg.size.batch[0]; b++) - { - for (cldnn::tensor::value_type x = 0; x < mem_arg.size.spatial[0]; x++) - { - if (!single_batch || b == batch_id) - { - streams[b][0] << convert_element(mem_ptr[input_it]) << std::endl; - } - input_it++; - } - } - } - - template - void dump_yxio(const cldnn::memory& mem, std::stringstream & stream) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - auto i_size = mem_arg.size.batch[0]; - auto o_size = mem_arg.size.feature[0]; - auto x_size = mem_arg.size.spatial[0]; - auto y_size = mem_arg.size.spatial[1]; - unsigned int input_it = 0; - for (cldnn::tensor::value_type o = 0; o < o_size; o++) - { - for (cldnn::tensor::value_type i = 0; i < i_size; i++) - { - for (cldnn::tensor::value_type x = 0; x < x_size; x++) - { - for (cldnn::tensor::value_type y = 0; y < y_size; y++) - { - stream<< convert_element(mem_ptr[input_it]) << " "; - input_it++; - } - stream<< std::endl; - } - } - } - } - - template - void dump_oiyx(const cldnn::memory& mem, std::stringstream & stream) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - auto i_size = mem_arg.size.batch[0]; - auto o_size = mem_arg.size.feature[0]; - auto x_size = mem_arg.size.spatial[0]; - auto y_size = mem_arg.size.spatial[1]; - unsigned int input_it = 0; - for (cldnn::tensor::value_type x = 0; x < x_size; x++) - { - for (cldnn::tensor::value_type y = 0; y < y_size; y++) - { - for (cldnn::tensor::value_type i = 0; i < i_size; i++) - { - for (cldnn::tensor::value_type o = 0; o < o_size; o++) - { - stream << convert_element(mem_ptr[input_it]) << " "; - input_it++; - } - stream << std::endl; - } - } - } - } - - template - void dump_os_iyx_osv16(const cldnn::memory& mem, std::stringstream & stream) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - auto i_size = mem_arg.size.batch[0]; - auto o_size = mem_arg.size.feature[0]; - auto x_size = mem_arg.size.spatial[0]; - auto y_size = mem_arg.size.spatial[1]; - auto weights_size = i_size * o_size * x_size * y_size; //count() also counts feature[1] - int slice_value = 16; - cldnn::tensor::value_type it = 0; - while (it < weights_size) - { - stream << convert_element(mem_ptr[it]) << " "; - it++; - if (it % slice_value == 0) //separate every bsv with a new line - stream << std::endl; - }; - } - - template - void dump_bs_xs_xsv8_bsv8(const cldnn::memory& mem, std::stringstream & stream) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - auto i_size = mem_arg.size.batch[0]; //batch = input feature map - auto x_size = mem_arg.size.spatial[0]; // spatial_x = output feature map - auto weights_size = mem_arg.size.count(); - int xsv = 8, bsv = 8; - unsigned int input_it = 0, input_i_it= 0 , input_o_it = 0; - for (cldnn::tensor::value_type it = 0; it < weights_size; it++) - { - stream << convert_element(mem_ptr[input_it]) << " "; - input_i_it++; - if (input_i_it % bsv == 0) //separete every input slice with a new line - { - stream << std::endl; - input_o_it++; - input_i_it = 0; - } - input_it = input_o_it*bsv + input_i_it; - - if (input_it % (xsv*bsv) == 0) // seperate every block (8x8) with a new line - stream << std::endl; - } - } - - template - void dump_bs_x_bsv16(const cldnn::memory& mem, std::stringstream & stream) - { - auto mem_arg = mem.get_layout(); - auto mem_ptr = mem.pointer(); - - auto i_size = mem_arg.size.batch[0]; //batch = input feature map - auto x_size = mem_arg.size.spatial[0]; // spatial_x = output feature map - auto weights_size = mem_arg.size.count(); - int bsv = 16; - cldnn::tensor::value_type it = 0; - while (it < weights_size) - { - stream << convert_element(mem_ptr[it]) << " "; - it++; - if (it % bsv == 0) //separate every bsv with a new line - stream << std::endl; - } - } - - template - void dump(const cldnn::memory& mem, std::stringstream& stream) - { - auto mem_ptr = mem.pointer(); - - auto&& pitches = mem.get_layout().get_pitches(); - auto&& size = mem.get_layout().size; - for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) - { - stream << "============= BATCH " << b << " ============\n\n"; - for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) - { - stream << "feature " << f << ":\n"; - for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) - { - for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) - { - unsigned int input_it = b*pitches.batch[0] + f*pitches.feature[0] + y*pitches.spatial[1] + x*pitches.spatial[0]; - stream << convert_element(mem_ptr[input_it]) << " "; - input_it++; - } - stream << '\n'; - } - stream << std::endl; - } - } - } - - template - void dump(const cldnn::memory& mem, std::vector>& dump_strings) - { - auto mem_ptr = mem.pointer(); - std::stringstream stream; - - auto&& pitches = mem.get_layout().get_pitches(); - auto&& size = mem.get_layout().size; - for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) - { - for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) - { - for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) - { - for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) - { - unsigned int input_it = b*pitches.batch[0] + f*pitches.feature[0] + y*pitches.spatial[1] + x*pitches.spatial[0]; - stream << convert_element(mem_ptr[input_it]) << " "; - input_it++; - } - stream << std::endl; - dump_strings[b][f] = stream.str(); - } - } - } - } - - void logger::log_memory_to_file(const cldnn::memory& mem, std::string prefix, bool single_batch, cldnn::tensor::value_type batch_id, cldnn::tensor::value_type feature_id) - { - auto batch = mem.get_layout().size.batch[0]; - auto feature = mem.get_layout().size.feature[0]; - auto eng_type = "gpu" ; - std::vector> dump_strings(batch); - for(cldnn::tensor::value_type b = 0; b < batch; b++) - { - dump_strings[b].resize(feature); - } - - if (mem.get_layout().data_type == cldnn::data_types::f32) - dump(mem, dump_strings); - else - dump(mem, dump_strings); - - for (cldnn::tensor::value_type b = 0; b < batch; b++) - for (cldnn::tensor::value_type f = 0; f < feature; f++) - { - if (!single_batch || (b == batch_id && f == feature_id)) - { - std::string filename((dump_dir + "/" + prefix + "_" + eng_type + "_b" + std::to_string(b) + "_f" + std::to_string(f) + ".txt")); - std::ofstream file_stream(filename); - file_stream << dump_strings[b][f]; - file_stream.close(); - } - } - } - - void logger::log_weights_to_file(const cldnn::memory& mem, std::string prefix) - { - std::stringstream stream; - - if (mem.get_layout().data_type == cldnn::data_types::f32) - dump(mem, stream); - else - dump(mem, stream); - - std::string filename((dump_dir + "/" + prefix + ".txt")); - std::ofstream file_stream(filename); - file_stream << stream.str(); - file_stream.close(); - } -} diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.h b/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.h deleted file mode 100644 index 8bb10535632..00000000000 --- a/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include -#include -#include -#include "api/memory.hpp" - -#define DUMP_DIRECTORY "./" - -namespace instrumentation { - - template - std::string to_string(const std::chrono::duration val) { - namespace ch = std::chrono; - const ch::microseconds us(1); - const ch::milliseconds ms(1); - const ch::seconds s(1); - const std::chrono::duration abs_val(std::abs(val.count())); - - std::ostringstream os; - os << std::setprecision(3) << std::fixed; - if (abs_val > s) os << ch::duration_cast>(val).count() << " s"; - else if (abs_val > ms) os << ch::duration_cast>(val).count() << " ms"; - else if (abs_val > us) os << ch::duration_cast>(val).count() << " us"; - else os << ch::duration_cast(val).count() << " ns"; - return os.str(); - } - - struct logger - { - static void log_memory_to_file(const cldnn::memory&, std::string prefix = "", bool single_batch = false, cldnn::tensor::value_type batch_id = 0, cldnn::tensor::value_type feature_id = 0); - static void log_weights_to_file(const cldnn::memory&, std::string prefix = ""); - private: - static const std::string dump_dir; - }; -} diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h b/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h index e83e3b50335..efc9d7b6cbf 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h +++ b/inference-engine/thirdparty/clDNN/tests/test_utils/network_test.h @@ -7,14 +7,15 @@ #include #include "test_utils/test_utils.h" -#include "api/data.hpp" -#include "api/engine.hpp" -#include "api/fully_connected.hpp" -#include "api/input_layout.hpp" -#include "api/layout.hpp" -#include "api/memory.hpp" -#include "api/primitive.hpp" -#include "api/tensor.hpp" +#include +#include +#include +#include + +#include +#include +#include +#include #include #include @@ -57,7 +58,7 @@ struct typed_comparator { // ===================================================================================================================== // Reference tensor struct reference_tensor { - virtual void compare(cldnn::memory actual) = 0; + virtual void compare(cldnn::memory::ptr actual) = 0; }; template @@ -68,22 +69,23 @@ struct reference_tensor_typed : reference_tensor { using vector_type = VF; reference_tensor_typed(vector_type data) : reference(std::move(data)) {} - void compare(cldnn::memory actual) override { - auto ptr = actual.pointer(); + void compare(cldnn::memory::ptr actual) override { + cldnn::mem_lock ptr(actual, get_test_stream()); + for (size_t bi = 0; bi < reference.size(); ++bi) { auto coords = cldnn::tensor(cldnn::batch(bi), cldnn::feature(0), cldnn::spatial(0, 0, 0, 0)); - size_t offset = actual.get_layout().get_linear_offset(coords); + size_t offset = actual->get_layout().get_linear_offset(coords); auto& ref = reference[bi]; auto& val = ptr[offset]; TYPED_EXPECT_EQ(ref, val) << " at bi=" << bi; } } - void fill_memory(cldnn::memory mem) { - auto ptr = mem.pointer(); + void fill_memory(cldnn::memory::ptr mem) { + cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t bi = 0; bi < reference.size(); ++bi) { auto coords = cldnn::tensor(cldnn::batch(bi), cldnn::feature(0), cldnn::spatial(0, 0, 0, 0)); - size_t offset = mem.get_layout().get_linear_offset(coords); + size_t offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = reference[bi]; } } @@ -100,12 +102,12 @@ struct reference_tensor_typed : reference_tensor { using vector_type = VVF; reference_tensor_typed(vector_type data) : reference(std::move(data)) {} - void compare(cldnn::memory actual) override { - auto ptr = actual.pointer(); + void compare(cldnn::memory::ptr actual) override { + cldnn::mem_lock ptr(actual, get_test_stream()); for (size_t bi = 0; bi < reference.size(); ++bi) { for (size_t fi = 0; fi < reference[0].size(); ++fi) { auto coords = cldnn::tensor(cldnn::batch(bi), cldnn::feature(fi), cldnn::spatial(0, 0, 0, 0)); - size_t offset = actual.get_layout().get_linear_offset(coords); + size_t offset = actual->get_layout().get_linear_offset(coords); auto& ref = reference[bi][fi]; auto& val = ptr[offset]; TYPED_EXPECT_EQ(ref, val) << "at bi=" << bi << " fi=" << fi; @@ -113,12 +115,12 @@ struct reference_tensor_typed : reference_tensor { } } - void fill_memory(cldnn::memory mem) { - auto ptr = mem.pointer(); + void fill_memory(cldnn::memory::ptr mem) { + cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t bi = 0; bi < reference.size(); ++bi) { for (size_t fi = 0; fi < reference[0].size(); ++fi) { auto coords = cldnn::tensor(cldnn::batch(bi), cldnn::feature(fi), cldnn::spatial(0, 0, 0, 0)); - size_t offset = mem.get_layout().get_linear_offset(coords); + size_t offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = reference[bi][fi]; } } @@ -135,14 +137,14 @@ template struct reference_tensor_typed : reference_tensor { using vector_type = VVVVF; reference_tensor_typed(vector_type data) : reference(std::move(data)) {} - void compare(cldnn::memory actual) override { - auto ptr = actual.pointer(); + void compare(cldnn::memory::ptr actual) override { + cldnn::mem_lock ptr(actual, get_test_stream()); for (size_t bi = 0; bi < reference.size(); ++bi) { for (size_t fi = 0; fi < reference[0].size(); ++fi) { for (size_t yi = 0; yi < reference[0][0].size(); ++yi) { for (size_t xi = 0; xi < reference[0][0][0].size(); ++xi) { auto coords = cldnn::tensor(cldnn::batch(bi), cldnn::feature(fi), cldnn::spatial(xi, yi, 0, 0)); - size_t offset = actual.get_layout().get_linear_offset(coords); + size_t offset = actual->get_layout().get_linear_offset(coords); auto& ref = reference[bi][fi][yi][xi]; auto& val = ptr[offset]; TYPED_EXPECT_EQ(ref, val) << "at bi=" << bi << " fi=" << fi << " yi=" << yi << " xi=" << xi; @@ -152,14 +154,14 @@ struct reference_tensor_typed : reference_tensor { } } - void fill_memory(cldnn::memory mem) { - auto ptr = mem.pointer(); + void fill_memory(cldnn::memory::ptr mem) { + cldnn::mem_lock ptr(mem, get_test_stream()); for (size_t bi = 0; bi < reference.size(); ++bi) { for (size_t fi = 0; fi < reference[0].size(); ++fi) { for (size_t yi = 0; yi < reference[0][0].size(); ++yi) { for (size_t xi = 0; xi < reference[0][0][0].size(); ++xi) { auto coords = cldnn::tensor(cldnn::batch(bi), cldnn::feature(fi), cldnn::spatial(xi, yi, 0, 0)); - size_t offset = mem.get_layout().get_linear_offset(coords); + size_t offset = mem->get_layout().get_linear_offset(coords); ptr[offset] = reference[bi][fi][yi][xi]; } } @@ -257,6 +259,7 @@ struct reference_node_interface { virtual reference_tensor& get_reference() = 0; virtual cldnn::primitive_id get_id() = 0; + virtual ~reference_node_interface() = default; }; template @@ -275,7 +278,7 @@ struct reference_node : reference_node_interface { class network_test { public: - explicit network_test(cldnn::engine eng) : eng(eng) {} + explicit network_test(cldnn::engine& eng) : eng(eng) {} template typename reference_node::ptr add_input_layout(cldnn::primitive_id id, @@ -285,7 +288,7 @@ public: auto shape = output.get_shape(); auto lt = cldnn::layout(cldnn::type_to_data_type::value, fmt, shape); topo.add(cldnn::input_layout(id, lt)); - auto mem = cldnn::memory::allocate(eng, lt); + auto mem = eng.allocate_memory(lt); output.fill_memory(mem); inputs.emplace(id, mem); return add_node(id, std::move(output), {}); @@ -298,7 +301,7 @@ public: auto output = reference_tensor_typed(std::move(data)); auto shape = output.get_shape(); auto lt = cldnn::layout(cldnn::type_to_data_type::value, fmt, shape); - auto mem = cldnn::memory::allocate(eng, lt); + auto mem = eng.allocate_memory(lt); output.fill_memory(mem); topo.add(cldnn::data(id, mem)); return add_node(id, std::move(output), {}); @@ -384,10 +387,10 @@ protected: return node; } - cldnn::engine eng; + cldnn::engine& eng; cldnn::topology topo; std::map forced_impls; - std::map inputs; + std::map inputs; std::set outputs; }; diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp index 86f0cfd7805..bfa964350dd 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp @@ -4,415 +4,366 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// -#include "api/memory.hpp" -#include -#include -#include -#include -#include -#include #include "test_utils.h" #include "float16.h" -#include "instrumentation.h" #include using namespace cldnn; -namespace tests -{ - const std::string graph_dump_dir = DUMP_DIRECTORY; +namespace tests { - generic_test::generic_test() : generic_params(std::get<0>(GetParam())), layer_params(std::get<1>(GetParam())), max_ulps_diff_allowed(4), random_values(true), dump_graphs(false), dump_memory(false) - { - } - void generic_test::run_single_test() - { - assert((generic_params->data_type == data_types::f32) || (generic_params->data_type == data_types::f16)); - if (dump_graphs) - { - generic_params->network_build_options.set_option(cldnn::build_option::graph_dumps_dir(DUMP_DIRECTORY)); - } - topology topology; - topology.add_primitive(layer_params); +generic_test::generic_test() + : generic_params(std::get<0>(GetParam())) + , layer_params(std::get<1>(GetParam())) + , max_ulps_diff_allowed(4) + , random_values(true) { } - std::vector input_mems; - std::vector input_layouts_names = {}; +void generic_test::run_single_test() { + assert((generic_params->data_type == data_types::f32) || (generic_params->data_type == data_types::f16)); + topology topology; + topology.add_primitive(layer_params); - size_t multipler = 0; - for (size_t i = 0 ; i < generic_params->input_layouts.size() ; i++) - { - input_mems.push_back( memory::allocate(engine, generic_params->input_layouts[i]) ); + std::vector input_mems; + std::vector input_layouts_names = {}; - if (random_values) - { - if (generic_params->data_type == data_types::f32) - { - tests::set_random_values(input_mems[i], true, 7, 10); + size_t multipler = 0; + for (size_t i = 0 ; i < generic_params->input_layouts.size() ; i++) { + input_mems.push_back( engine.allocate_memory(generic_params->input_layouts[i]) ); + + if (random_values) { + if (generic_params->data_type == data_types::f32) { + tests::set_random_values(input_mems[i], true, 7, 10); + } else { + tests::set_random_values(input_mems[i], true, 5, 10); + } + } else { + size_t size = generic_params->input_layouts[i].size.batch[0] * generic_params->input_layouts[i].size.feature[0]; + + if (generic_params->data_type == data_types::f32) { + std::vector values; + for (size_t j = 1; j <= size; j++) { + values.push_back(static_cast(multipler + j)); } - else - { - tests::set_random_values(input_mems[i], true, 5, 10); + tests::set_values_per_batch_and_feature(input_mems[i], values); + multipler = values.size(); + } else { + std::vector values; + for (size_t j = 1; j <= size; j++) { + values.push_back(FLOAT16(static_cast(multipler + j))); } - } - else - { - size_t size = generic_params->input_layouts[i].size.batch[0] * generic_params->input_layouts[i].size.feature[0]; - - if (generic_params->data_type == data_types::f32) - { - std::vector values; - for (size_t j = 1; j <= size; j++) - { - values.push_back(static_cast(multipler + j)); - } - tests::set_values_per_batch_and_feature(input_mems[i], values); - multipler = values.size(); - } - else - { - std::vector values; - for (size_t j = 1; j <= size; j++) - { - values.push_back(FLOAT16(static_cast(multipler + j))); - } - tests::set_values_per_batch_and_feature(input_mems[i], values); - multipler = values.size(); - } - } - std::string input_name = "input" + std::to_string(i); - if ( (i == 0) && generic_params->network_build_options.get()->enabled() ) - { - // Add reorder after the first input in case of optimize data flag since it might change the input layout. - input_name = "input0_init"; - } - - // First input is provided to the network as input_layout. - // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. - if ( (i == 0) || !generic_params->network_build_options.get()->enabled()) - { - topology.add(input_layout(input_name, input_mems[i].get_layout())); - input_layouts_names.push_back(input_name); - } - else - { - topology.add(data(input_name, input_mems[i])); - } - - if (!is_format_supported(generic_params->fmt)) - { - ASSERT_THROW(network bad(engine, topology), std::exception); - return; + tests::set_values_per_batch_and_feature(input_mems[i], values); + multipler = values.size(); } } - - if (generic_params->network_build_options.get()->enabled()) - { + std::string input_name = "input" + std::to_string(i); + if ((i == 0) && generic_params->network_build_options.get()->enabled()) { // Add reorder after the first input in case of optimize data flag since it might change the input layout. - topology.add(reorder("input0", "input0_init", input_mems[0].get_layout())); + input_name = "input0_init"; } - if (layer_params->input[0] == "reorder0") - { - // Add reorder layer with output padding as input to the tested layer. - topology.add(reorder("reorder0", "input0", input_mems[0].get_layout().with_padding(padding{ { 0, 0, 1, 3 },{ 0, 0, 5, 2 } }))); + // First input is provided to the network as input_layout. + // Other inputs are provided as input_layout if optimize data flag is off. Otherwise they are provided as data. + if ((i == 0) || !generic_params->network_build_options.get()->enabled()) { + topology.add(input_layout(input_name, input_mems[i]->get_layout())); + input_layouts_names.push_back(input_name); + } else { + topology.add(data(input_name, input_mems[i])); } - prepare_input_for_test(input_mems); - - network network(engine, topology, generic_params->network_build_options); - - for (size_t i = 0 ; i < input_layouts_names.size() ; i++) - { - network.set_input_data(input_layouts_names[i], input_mems[i]); - } - - auto outputs = network.execute(); - EXPECT_EQ(outputs.size(), size_t(1)); - - auto output = outputs.begin()->second.get_memory(); - - auto output_ref = generate_reference(input_mems); - - - if (dump_memory) - { - std::string prefix = test_info.name(); - for (size_t i = 0; i < generic_params->input_layouts.size(); i++) - { - ::instrumentation::logger::log_memory_to_file(input_mems[i], prefix + "input" + std::to_string(i)); - } - for (size_t i = 0; i < outputs.size(); i++) - { - ::instrumentation::logger::log_memory_to_file(output, prefix + "output" + std::to_string(i)); - } - } - - if (output.get_layout().data_type == data_types::f32) - { - compare_buffers(output, output_ref); - } - else - { - compare_buffers(output, output_ref); + if (!is_format_supported(generic_params->fmt)) { + ASSERT_THROW(network bad(engine, topology), std::exception); + return; } } - template - void generic_test::compare_buffers(const memory& out, const memory& ref) - { - auto out_layout = out.get_layout(); - auto ref_layout = ref.get_layout(); - - EXPECT_EQ(out_layout.size, ref_layout.size); - EXPECT_EQ(out_layout.data_type, ref_layout.data_type); - EXPECT_EQ(get_expected_output_tensor(), out_layout.size); - EXPECT_EQ(out_layout.get_linear_size(), ref_layout.get_linear_size()); - EXPECT_EQ(out_layout.data_padding, ref_layout.data_padding); - - auto output_size = out_layout.size; - - int batch_size = output_size.batch[0]; - int feature_size = output_size.feature[0]; - int y_size = output_size.spatial[1]; - int x_size = output_size.spatial[0]; - - auto res_data = out.pointer(); - auto ref_data = ref.pointer(); - - const auto out_desc = get_linear_memory_desc(out_layout); - const auto ref_desc = get_linear_memory_desc(ref_layout); - - for (int b = 0; b < batch_size; b++) - { - for (int f = 0; f < feature_size; f++) - { - for (int y = 0; y < y_size; y++) - { - for (int x = 0; x < x_size; x++) - { - size_t res_index = get_linear_index(out_layout, b, f, y, x, out_desc); - size_t ref_index = get_linear_index(ref_layout, b, f, y, x, ref_desc); - - EXPECT_TRUE(floating_point_equal(res_data[res_index], ref_data[ref_index], max_ulps_diff_allowed)) - << "Expected " << (float)res_data[res_index] << " to be almost equal (within " << max_ulps_diff_allowed << " ULP's) to " << (float)ref_data[ref_index] - << " (ref index = " << ref_index << ", B " << b << ", F "<< f << ", Y " << y << ", X " << x << ")!"; - - if (HasFailure()) - { - return; - } - } - } - } - } + if (generic_params->network_build_options.get()->enabled()) { + // Add reorder after the first input in case of optimize data flag since it might change the input layout. + topology.add(reorder("input0", "input0_init", input_mems[0]->get_layout())); } - static size_t calc_offfset(const layout & layout, const pitches& p) - { - auto lower_padding = layout.data_padding.lower_size(); - if (layout.format == format::bfzyx) { - return - p.b * lower_padding.batch[0] + - p.f * lower_padding.feature[0] + - p.z * lower_padding.spatial[2] + - p.y * lower_padding.spatial[1] + - p.x * lower_padding.spatial[0]; - } - else { - return - p.b * lower_padding.batch[0] + - p.f * lower_padding.feature[0] + - p.y * lower_padding.spatial[1] + - p.x * lower_padding.spatial[0]; - } + if (layer_params->input[0] == "reorder0") { + // Add reorder layer with output padding as input to the tested layer. + topology.add(reorder("reorder0", "input0", input_mems[0]->get_layout().with_padding(padding{ { 0, 0, 1, 3 },{ 0, 0, 5, 2 } }))); } - memory_desc generic_test::get_linear_memory_desc(const layout & layout) - { - pitches p; + prepare_input_for_test(input_mems); - switch (layout.format) - { - case format::bfyx: - { - p.x = 1; - p.y = layout.get_buffer_size().sizes(format::bfyx)[3] * p.x; - p.f = layout.get_buffer_size().sizes(format::bfyx)[2] * p.y; - p.b = layout.get_buffer_size().sizes(format::bfyx)[1] * p.f; - break; - } - case format::yxfb: - { - p.b = 1; - p.f = layout.get_buffer_size().sizes(format::yxfb)[3] * p.b; - p.x = layout.get_buffer_size().sizes(format::yxfb)[2] * p.f; - p.y = layout.get_buffer_size().sizes(format::yxfb)[1] * p.x; - break; - } - case format::fyxb: - { - p.b = 1; - p.x = layout.get_buffer_size().sizes(format::fyxb)[3] * p.b; - p.y = layout.get_buffer_size().sizes(format::fyxb)[2] * p.x; - p.f = layout.get_buffer_size().sizes(format::fyxb)[1] * p.y; - break; - } - case format::byxf: - { - p.f = 1; - p.x = layout.get_buffer_size().sizes(format::byxf)[3] * p.f; - p.y = layout.get_buffer_size().sizes(format::byxf)[2] * p.x; - p.b = layout.get_buffer_size().sizes(format::byxf)[1] * p.y; - break; - } - case format::bfzyx: - { - p.x = 1; - p.y = layout.get_buffer_size().sizes(format::bfzyx)[4] * p.x; - p.z = layout.get_buffer_size().sizes(format::bfzyx)[3] * p.y; - p.f = layout.get_buffer_size().sizes(format::bfzyx)[2] * p.z; - p.b = layout.get_buffer_size().sizes(format::bfzyx)[1] * p.f; - break; - } - default: - { - throw std::runtime_error("Format not supported yet."); - } - } + network network(engine, topology, generic_params->network_build_options); - return{ p, calc_offfset(layout, p) }; + for (size_t i = 0 ; i < input_layouts_names.size() ; i++) { + network.set_input_data(input_layouts_names[i], input_mems[i]); } - size_t generic_test::get_linear_index(const layout&, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc) - { - return - desc.offset + - b*desc.pitch.b + - f*desc.pitch.f + - y*desc.pitch.y + - x*desc.pitch.x; + auto outputs = network.execute(); + EXPECT_EQ(outputs.size(), size_t(1)); + + auto output = outputs.begin()->second.get_memory(); + + auto output_ref = generate_reference(input_mems); + + if (output->get_layout().data_type == data_types::f32) { + compare_buffers(output, output_ref); + } else { + compare_buffers(output, output_ref); } - - size_t generic_test::get_linear_index(const layout&, size_t b, size_t f, size_t z, size_t y, size_t x, const memory_desc& desc) - { - return - desc.offset + - b*desc.pitch.b + - f*desc.pitch.f + - z*desc.pitch.z + - y*desc.pitch.y + - x*desc.pitch.x; - } - - size_t generic_test::get_linear_index_with_broadcast(const layout& in_layout, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc) - { - return - desc.offset + - (b % in_layout.size.batch[0]) * desc.pitch.b + - (f % in_layout.size.feature[0]) * desc.pitch.f + - (y % in_layout.size.spatial[1]) * desc.pitch.y + - (x % in_layout.size.spatial[0]) * desc.pitch.x; - } - - //Default implementation. Should be overridden in derived class otherwise. - cldnn::tensor generic_test::get_expected_output_tensor() - { - return generic_params->input_layouts[0].size; - } - - std::vector> generic_test::generate_generic_test_params(std::vector>& all_generic_params) - { - // , { format::yx,{ 531,777 } } , { format::yx,{ 4096,1980 } } , - //{ format::bfyx,{ 1,1,1,1 } } , { format::bfyx,{ 1,1,2,2 } } , { format::yx,{ 3,3 } } , { format::yx,{ 4,4 } } , { format::bfyx,{ 1,1,5,5 } } , { format::yx,{ 6,6 } } , { format::yx,{ 7,7 } } , - //{ format::yx,{ 8,8 } } , { format::yx,{ 9,9 } } , { format::yx,{ 10,10 } } , { format::yx,{ 11,11 } } , { format::yx,{ 12,12 } } , { format::yx,{ 13,13 } } , - //{ format::yx,{ 14,14 } } , { format::yx,{ 15,15 } } , { format::yx,{ 16,16 } } }; - - auto data_types = test_data_types(); - - for (cldnn::data_types data_type : data_types) - { - for (cldnn::format fmt : test_input_formats) - { - for (int batch_size : test_batch_sizes) - { - for (int feature_size : test_feature_sizes) - { - for (tensor input_size : test_input_sizes) - { - all_generic_params.emplace_back(new test_params(data_type, fmt, batch_size, feature_size, input_size)); - } - } - } - } - } - - return all_generic_params; - } - - const cldnn::engine & get_test_engine() - { - static const cldnn::engine engine; - return engine; - } - - const std::string test_dump::name() const - { - std::string temp = name_str; - std::replace(temp.begin(), temp.end(), '/', '_'); - return temp; - } - - const std::string test_dump::test_case_name() const - { - size_t pos = test_case_name_str.find("/"); - if (pos > test_case_name_str.length()) - { - pos = 0; - } - std::string temp = test_case_name_str.substr(pos); - return temp; - } - - std::string test_params::print_tensor(cldnn::tensor t) - { - std::stringstream str; - for (size_t i = 0; i < t.sizes(format::bfyx).size(); i++) - { - str << t.sizes(format::bfyx)[i] << " "; - } - str << "]"; - return str.str(); - } - - std::string test_params::print() - { - std::stringstream str; - str << "Data type: " << data_type_traits::name(data_type) << std::endl; - - for (int j = 0 ; j < (int)input_layouts.size(); j++) - { - const cldnn::tensor& t = input_layouts[j].size; - - str << "Input " << j << ": " << print_tensor(t) << std::endl; - } - return str.str(); - } - - std::vector generic_test::test_data_types() - { - std::vector result; - result.push_back(cldnn::data_types::f32); - - if(get_test_engine().get_info().supports_fp16) - { - result.push_back(cldnn::data_types::f16); - } - return result; - } - - std::vector generic_test::test_input_formats = { cldnn::format::bfyx , cldnn::format::yxfb, cldnn::format::fyxb, cldnn::format::byxf }; - std::vector generic_test::test_batch_sizes = { 1, 2 };// 4, 8, 16}; - std::vector generic_test::test_feature_sizes = { 1, 2 };// , 3, 15}; - std::vector generic_test::test_input_sizes = { { 1, 1, 100, 100 } ,{ 1, 1, 277, 277 } ,{ 1, 1, 400, 600 } }; - } + +template +void generic_test::compare_buffers(const memory::ptr out, const memory::ptr ref) { + auto out_layout = out->get_layout(); + auto ref_layout = ref->get_layout(); + + EXPECT_EQ(out_layout.size, ref_layout.size); + EXPECT_EQ(out_layout.data_type, ref_layout.data_type); + EXPECT_EQ(get_expected_output_tensor(), out_layout.size); + EXPECT_EQ(out_layout.get_linear_size(), ref_layout.get_linear_size()); + EXPECT_EQ(out_layout.data_padding, ref_layout.data_padding); + + auto output_size = out_layout.size; + + int batch_size = output_size.batch[0]; + int feature_size = output_size.feature[0]; + int y_size = output_size.spatial[1]; + int x_size = output_size.spatial[0]; + + mem_lock res_data(out, get_test_stream()); + mem_lock ref_data(ref, get_test_stream()); + + const auto out_desc = get_linear_memory_desc(out_layout); + const auto ref_desc = get_linear_memory_desc(ref_layout); + + for (int b = 0; b < batch_size; b++) { + for (int f = 0; f < feature_size; f++) { + for (int y = 0; y < y_size; y++) { + for (int x = 0; x < x_size; x++) { + size_t res_index = get_linear_index(out_layout, b, f, y, x, out_desc); + size_t ref_index = get_linear_index(ref_layout, b, f, y, x, ref_desc); + + EXPECT_TRUE(floating_point_equal(res_data[res_index], ref_data[ref_index], max_ulps_diff_allowed)) + << "Expected " << (float)res_data[res_index] << " to be almost equal (within " + << max_ulps_diff_allowed << " ULP's) to " << (float)ref_data[ref_index] + << " (ref index = " << ref_index << ", B " << b << ", F "<< f << ", Y " << y << ", X " << x << ")!"; + + if (HasFailure()) { + return; + } + } + } + } + } +} + +static size_t calc_offfset(const layout & layout, const pitches& p) { + auto lower_padding = layout.data_padding.lower_size(); + if (layout.format == format::bfzyx) { + return + p.b * lower_padding.batch[0] + + p.f * lower_padding.feature[0] + + p.z * lower_padding.spatial[2] + + p.y * lower_padding.spatial[1] + + p.x * lower_padding.spatial[0]; + } else { + return + p.b * lower_padding.batch[0] + + p.f * lower_padding.feature[0] + + p.y * lower_padding.spatial[1] + + p.x * lower_padding.spatial[0]; + } +} + +memory_desc generic_test::get_linear_memory_desc(const layout & layout) { + pitches p; + + switch (layout.format) { + case format::bfyx: { + p.x = 1; + p.y = layout.get_buffer_size().sizes(format::bfyx)[3] * p.x; + p.f = layout.get_buffer_size().sizes(format::bfyx)[2] * p.y; + p.b = layout.get_buffer_size().sizes(format::bfyx)[1] * p.f; + break; + } + case format::yxfb: { + p.b = 1; + p.f = layout.get_buffer_size().sizes(format::yxfb)[3] * p.b; + p.x = layout.get_buffer_size().sizes(format::yxfb)[2] * p.f; + p.y = layout.get_buffer_size().sizes(format::yxfb)[1] * p.x; + break; + } + case format::fyxb: { + p.b = 1; + p.x = layout.get_buffer_size().sizes(format::fyxb)[3] * p.b; + p.y = layout.get_buffer_size().sizes(format::fyxb)[2] * p.x; + p.f = layout.get_buffer_size().sizes(format::fyxb)[1] * p.y; + break; + } + case format::byxf: { + p.f = 1; + p.x = layout.get_buffer_size().sizes(format::byxf)[3] * p.f; + p.y = layout.get_buffer_size().sizes(format::byxf)[2] * p.x; + p.b = layout.get_buffer_size().sizes(format::byxf)[1] * p.y; + break; + } + case format::bfzyx: { + p.x = 1; + p.y = layout.get_buffer_size().sizes(format::bfzyx)[4] * p.x; + p.z = layout.get_buffer_size().sizes(format::bfzyx)[3] * p.y; + p.f = layout.get_buffer_size().sizes(format::bfzyx)[2] * p.z; + p.b = layout.get_buffer_size().sizes(format::bfzyx)[1] * p.f; + break; + } + default: { + throw std::runtime_error("Format not supported yet."); + } + } + + return {p, calc_offfset(layout, p)}; +} + +size_t generic_test::get_linear_index(const layout&, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc) +{ + return + desc.offset + + b*desc.pitch.b + + f*desc.pitch.f + + y*desc.pitch.y + + x*desc.pitch.x; +} + +size_t generic_test::get_linear_index(const layout&, size_t b, size_t f, size_t z, size_t y, size_t x, const memory_desc& desc) +{ + return + desc.offset + + b*desc.pitch.b + + f*desc.pitch.f + + z*desc.pitch.z + + y*desc.pitch.y + + x*desc.pitch.x; +} + +size_t generic_test::get_linear_index_with_broadcast(const layout& in_layout, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc) +{ + return + desc.offset + + (b % in_layout.size.batch[0]) * desc.pitch.b + + (f % in_layout.size.feature[0]) * desc.pitch.f + + (y % in_layout.size.spatial[1]) * desc.pitch.y + + (x % in_layout.size.spatial[0]) * desc.pitch.x; +} + +//Default implementation. Should be overridden in derived class otherwise. +cldnn::tensor generic_test::get_expected_output_tensor() +{ + return generic_params->input_layouts[0].size; +} + +std::vector> generic_test::generate_generic_test_params(std::vector>& all_generic_params) +{ + // , { format::yx,{ 531,777 } } , { format::yx,{ 4096,1980 } } , + //{ format::bfyx,{ 1,1,1,1 } } , { format::bfyx,{ 1,1,2,2 } } , { format::yx,{ 3,3 } } , { format::yx,{ 4,4 } } , { format::bfyx,{ 1,1,5,5 } } , { format::yx,{ 6,6 } } , { format::yx,{ 7,7 } } , + //{ format::yx,{ 8,8 } } , { format::yx,{ 9,9 } } , { format::yx,{ 10,10 } } , { format::yx,{ 11,11 } } , { format::yx,{ 12,12 } } , { format::yx,{ 13,13 } } , + //{ format::yx,{ 14,14 } } , { format::yx,{ 15,15 } } , { format::yx,{ 16,16 } } }; + + auto data_types = test_data_types(); + + for (cldnn::data_types data_type : data_types) + { + for (cldnn::format fmt : test_input_formats) + { + for (int batch_size : test_batch_sizes) + { + for (int feature_size : test_feature_sizes) + { + for (tensor input_size : test_input_sizes) + { + all_generic_params.emplace_back(new test_params(data_type, fmt, batch_size, feature_size, input_size)); + } + } + } + } + } + + return all_generic_params; +} + +cldnn::engine_configuration get_test_engine_config() { + const bool enable_profiling = false; + const cldnn::queue_types queue_type = cldnn::queue_types::out_of_order; + std::string sources_dumps_dir = ""; + priority_mode_types priority_mode = priority_mode_types::disabled; + throttle_mode_types throttle_mode = throttle_mode_types::disabled; + bool use_memory_pool = true; + bool use_unified_shared_memory = true; + return engine_configuration(enable_profiling, queue_type, sources_dumps_dir, priority_mode, throttle_mode, use_memory_pool, use_unified_shared_memory); +} + +std::shared_ptr create_test_engine() { + return cldnn::engine::create(engine_types::ocl, runtime_types::ocl, get_test_engine_config()); +} + +cldnn::engine& get_test_engine() { + static std::shared_ptr test_engine = nullptr; + if (!test_engine) { + test_engine = create_test_engine(); + } + return *test_engine; +} + +cldnn::stream& get_test_stream() { + static std::shared_ptr test_stream = nullptr; + if (!test_stream) + test_stream = get_test_engine().create_stream(); + return *test_stream; +} + +const std::string test_dump::name() const { + std::string temp = name_str; + std::replace(temp.begin(), temp.end(), '/', '_'); + return temp; +} + +const std::string test_dump::test_case_name() const { + size_t pos = test_case_name_str.find("/"); + if (pos > test_case_name_str.length()) { + pos = 0; + } + std::string temp = test_case_name_str.substr(pos); + return temp; +} + +std::string test_params::print_tensor(cldnn::tensor t) { + std::stringstream str; + for (size_t i = 0; i < t.sizes(format::bfyx).size(); i++) { + str << t.sizes(format::bfyx)[i] << " "; + } + str << "]"; + return str.str(); +} + +std::string test_params::print() { + std::stringstream str; + str << "Data type: " << data_type_traits::name(data_type) << std::endl; + + for (int j = 0 ; j < (int)input_layouts.size(); j++) { + const cldnn::tensor& t = input_layouts[j].size; + + str << "Input " << j << ": " << print_tensor(t) << std::endl; + } + return str.str(); +} + +std::vector generic_test::test_data_types() { + std::vector result; + result.push_back(cldnn::data_types::f32); + + if (get_test_engine().get_device_info().supports_fp16) { + result.push_back(cldnn::data_types::f16); + } + return result; +} + +std::vector generic_test::test_input_formats = { cldnn::format::bfyx , cldnn::format::yxfb, cldnn::format::fyxb, cldnn::format::byxf }; +std::vector generic_test::test_batch_sizes = { 1, 2 };// 4, 8, 16}; +std::vector generic_test::test_feature_sizes = { 1, 2 };// , 3, 15}; +std::vector generic_test::test_input_sizes = { { 1, 1, 100, 100 } ,{ 1, 1, 277, 277 } ,{ 1, 1, 400, 600 } }; + +} // namespace tests diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h index 436df5abb1e..bfe6cfc1798 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h +++ b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h @@ -6,35 +6,49 @@ #pragma once -#include "api/memory.hpp" -#include "api/tensor.hpp" -#include "api/program.hpp" -#include "api/network.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "float16.h" +#include "random_gen.h" +#include "uniform_quantized_real_distribution.hpp" +#include + #include #include #include #include #include -#include -#include -#include "float16.h" -#include "random_gen.h" -#include "api/concatenation.hpp" -#include "api/lrn.hpp" -#include "api/roi_pooling.hpp" -#include "api/scale.hpp" -#include "api/softmax.hpp" -#include "api/reorder.hpp" -#include "api/normalize.hpp" -#include "api/convolution.hpp" -#include "api/activation.hpp" -#include "api/pooling.hpp" - #include #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) namespace tests { + +std::shared_ptr create_test_engine(); +cldnn::engine& get_test_engine(); +cldnn::stream& get_test_stream(); + #define USE_RANDOM_SEED 0 #if USE_RANDOM_SEED std::random_device rnd_device; @@ -228,12 +242,12 @@ VVVVVVF generate_random_6d(size_t a, size_t b, size_t c, size_t d, size_t e, return v; } -template void set_value(const cldnn::pointer& ptr, uint32_t index, T value) { ptr[index] = value; } -template T get_value(const cldnn::pointer& ptr, uint32_t index) { return ptr[index]; } +template void set_value(T* ptr, uint32_t index, T value) { ptr[index] = value; } +template T get_value(T* ptr, uint32_t index) { return ptr[index]; } template -void set_values(const cldnn::memory& mem, std::initializer_list args ){ - auto ptr = mem.pointer(); +void set_values(cldnn::memory::ptr mem, std::initializer_list args) { + cldnn::mem_lock ptr(mem, get_test_stream()); auto it = ptr.begin(); for(auto x : args) @@ -241,8 +255,8 @@ void set_values(const cldnn::memory& mem, std::initializer_list args ){ } template -void set_values(const cldnn::memory& mem, std::vector args) { - auto ptr = mem.pointer(); +void set_values(cldnn::memory::ptr mem, std::vector args) { + cldnn::mem_lock ptr(mem, get_test_stream()); auto it = ptr.begin(); for (auto x : args) @@ -250,19 +264,14 @@ void set_values(const cldnn::memory& mem, std::vector args) { } template -void set_values_per_batch_and_feature(const cldnn::memory& mem, std::vector args) -{ - auto mem_ptr = mem.pointer(); - auto&& pitches = mem.get_layout().get_pitches(); - auto&& size = mem.get_layout().size; - for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) - { - for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) - { - for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) - { - for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) - { +void set_values_per_batch_and_feature(cldnn::memory::ptr mem, std::vector args) { + cldnn::mem_lock mem_ptr(mem, get_test_stream()); + auto&& pitches = mem->get_layout().get_pitches(); + auto&& size = mem->get_layout().size; + for (cldnn::tensor::value_type b = 0; b < size.batch[0]; ++b) { + for (cldnn::tensor::value_type f = 0; f < size.feature[0]; ++f) { + for (cldnn::tensor::value_type y = 0; y < size.spatial[1]; ++y) { + for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x) { unsigned int input_it = b*pitches.batch[0] + f*pitches.feature[0] + y*pitches.spatial[1] + x*pitches.spatial[0]; mem_ptr[input_it] = args[b*size.feature[0] + f]; } @@ -274,42 +283,37 @@ void set_values_per_batch_and_feature(const cldnn::memory& mem, std::vector a template::value || std::is_same::value>::type* = nullptr> -void set_random_values(const cldnn::memory& mem, bool sign = false, unsigned significand_bit = 8, unsigned scale = 1) +void set_random_values(cldnn::memory::ptr mem, bool sign = false, unsigned significand_bit = 8, unsigned scale = 1) { - auto ptr = mem.pointer(); + cldnn::mem_lock ptr(mem, get_test_stream()); std::mt19937 gen; - for (auto it = ptr.begin(); it != ptr.end(); ++it) - { + for (auto it = ptr.begin(); it != ptr.end(); ++it) { *it = rnd_generators::gen_number(gen, significand_bit, sign, false, scale); } } template::value>::type* = nullptr> -void set_random_values(const cldnn::memory& mem) +void set_random_values(cldnn::memory::ptr mem) { - auto ptr = mem.pointer(); + cldnn::mem_lock ptr(mem, get_test_stream()); std::mt19937 gen; static std::uniform_int_distribution uid(std::numeric_limits::min(), std::numeric_limits::max()); - for (auto it = ptr.begin(); it != ptr.end(); ++it) - { + for (auto it = ptr.begin(); it != ptr.end(); ++it) { *it = uid(gen); } } // Tries to construct a network, checking if an expected error appears -inline void check_exception_massage(const cldnn::engine& engine, cldnn::topology& topology, std::string msg_to_find) -{ +inline void check_exception_massage(cldnn::engine& engine, cldnn::topology& topology, std::string msg_to_find) { try { cldnn::network(engine, topology); - } - catch (std::exception & exc) { + } catch (std::exception & exc) { std::string msg(exc.what()); if (msg.find(msg_to_find) != std::string::npos) { throw; - } - else { + } else { printf("%s\n", exc.what()); } } @@ -372,20 +376,15 @@ inline bool floating_point_equal(float x, float y, int max_ulps_diff = 4) { } } -class test_params -{ +class test_params { public: - test_params() : - fmt(cldnn::format::bfyx) - { - } + test_params() : fmt(cldnn::format::bfyx) { } test_params(cldnn::data_types dt, cldnn::format input_format, int32_t batch_size, int32_t feature_size, cldnn::tensor input_size, cldnn::build_options const& options = cldnn::build_options()) : data_type(dt), fmt(input_format), - network_build_options(options) - { + network_build_options(options) { cldnn::tensor t = cldnn::tensor(batch_size, feature_size, input_size.spatial[0], input_size.spatial[1] ); input_layouts.push_back( cldnn::layout(dt, fmt, t) ); } @@ -402,38 +401,32 @@ public: static std::string print_tensor(cldnn::tensor tensor); }; -struct pitches -{ +struct pitches { size_t b, f, y, x, z; }; -struct memory_desc -{ +struct memory_desc { pitches pitch; size_t offset; }; -const cldnn::engine & get_test_engine(); - -struct test_dump -{ +struct test_dump { const std::string name() const; const std::string test_case_name() const; + private: const std::string test_case_name_str = ::testing::UnitTest::GetInstance()->current_test_info()->test_case_name(); const std::string name_str = ::testing::UnitTest::GetInstance()->current_test_info()->name(); }; -class generic_test : public ::testing::TestWithParam, std::shared_ptr>> -{ - +class generic_test : public ::testing::TestWithParam, std::shared_ptr>> { public: generic_test(); void run_single_test(); template - void compare_buffers(const cldnn::memory& out, const cldnn::memory& ref); + void compare_buffers(const cldnn::memory::ptr out, const cldnn::memory::ptr ref); static size_t get_linear_index(const cldnn::layout & layout, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc); static size_t get_linear_index(const cldnn::layout & layout, size_t b, size_t f, size_t z, size_t y, size_t x, const memory_desc& desc); @@ -456,7 +449,7 @@ public: }; protected: - const cldnn::engine& engine = get_test_engine(); + cldnn::engine& engine = get_test_engine(); std::shared_ptr generic_params; test_dump test_info; std::shared_ptr layer_params; @@ -464,10 +457,10 @@ protected: bool random_values; // if set memory buffers will be filled with random values bool dump_graphs; // if set tests will dump graphs to file bool dump_memory; // if set memory buffers will be dumped to file - virtual cldnn::memory generate_reference(const std::vector& inputs) = 0; + virtual cldnn::memory::ptr generate_reference(const std::vector& inputs) = 0; // Allows the test to override the random input data that the framework generates - virtual void prepare_input_for_test(std::vector& /*inputs*/) { } + virtual void prepare_input_for_test(std::vector& /*inputs*/) { } static std::vector test_data_types(); static std::vector test_input_formats; @@ -480,8 +473,7 @@ protected: // When a test assertion such as EXPECT_EQ fails, Google-Test prints the argument values to help with debugging. // It does this using a user - extensible value printer. // This function will be used to print the test params in case of an error. -inline void PrintTupleTo(const std::tuple, std::shared_ptr>& t, ::std::ostream* os) -{ +inline void PrintTupleTo(const std::tuple, std::shared_ptr>& t, ::std::ostream* os) { std::stringstream str; auto test_param = std::get<0>(t); @@ -495,13 +487,10 @@ inline void PrintTupleTo(const std::tuple, std::sha //TODO: do layers not have param dumping? we could consider adding it - if (primitive->type == cldnn::concatenation::type_id()) - { + if (primitive->type == cldnn::concatenation::type_id()) { auto dc = std::static_pointer_cast(primitive); (void)dc; - } - else if(primitive->type == cldnn::lrn::type_id()) - { + } else if(primitive->type == cldnn::lrn::type_id()) { auto lrn = std::static_pointer_cast(primitive); std::string norm_region = (lrn->norm_region == cldnn::lrn_norm_region_across_channel) ? "across channel" : "within channel"; str << "Norm region: " << norm_region @@ -509,9 +498,7 @@ inline void PrintTupleTo(const std::tuple, std::sha << " Alpha: " << lrn->alpha << " Beta: " << lrn->beta << " K: " << lrn->k; - } - else if(primitive->type == cldnn::roi_pooling::type_id()) - { + } else if(primitive->type == cldnn::roi_pooling::type_id()) { auto p = std::static_pointer_cast(primitive); str << "Pooling mode: " << (p->mode == cldnn::pooling_mode::max ? "MAX" : "AVG") << " Pooled width: " << p->pooled_width @@ -520,51 +507,35 @@ inline void PrintTupleTo(const std::tuple, std::sha << " Spatial bins x: " << p->spatial_bins_x << " Spatial bins y: " << p->spatial_bins_y << " Output dim: " << p->output_dim; - } - else if(primitive->type == cldnn::scale::type_id()) - { + } else if(primitive->type == cldnn::scale::type_id()) { auto s = std::static_pointer_cast(primitive); (void)s; - } - else if(primitive->type == cldnn::softmax::type_id()) - { + } else if(primitive->type == cldnn::softmax::type_id()) { auto sm = std::static_pointer_cast(primitive); (void)sm; - } - else if (primitive->type == cldnn::reorder::type_id()) - { + } else if (primitive->type == cldnn::reorder::type_id()) { auto reorder = std::static_pointer_cast(primitive); str << "Output data type: " << cldnn::data_type_traits::name(*reorder->output_data_type) << " Mean: " << reorder->mean << "Subtract per feature: " << "TODO" /*std::vector subtract_per_feature*/; - } - else if (primitive->type == cldnn::normalize::type_id()) - { + } else if (primitive->type == cldnn::normalize::type_id()) { auto normalize = std::static_pointer_cast(primitive); std::string norm_region = normalize->across_spatial ? "across_spatial" : "within_spatial"; str << "Norm region: " << norm_region << " Epsilon: " << normalize->epsilon << " Scale input id: " << normalize->scale_input; - } - else if (primitive->type == cldnn::convolution::type_id()) - { + } else if (primitive->type == cldnn::convolution::type_id()) { auto convolution = std::static_pointer_cast(primitive); str << "Stride x: " << convolution->stride.spatial[0] << " Stride y: " << convolution->stride.spatial[1] << " Dilation x: " << convolution->dilation.spatial[0] << " Dilation y: " << convolution->dilation.spatial[1] << " Input offset x: " << convolution->input_offset.spatial[0] << " Input offset y: " << convolution->input_offset.spatial[1]; - } - else if (primitive->type == cldnn::activation::type_id()) - { + } else if (primitive->type == cldnn::activation::type_id()) { auto activation = std::static_pointer_cast(primitive); str << "Negative slope: " << activation->additional_params.a << " Negative slope input id: " << activation->additional_params_input; - } - else if (primitive->type == cldnn::pooling::type_id()) - { + } else if (primitive->type == cldnn::pooling::type_id()) { auto pooling = std::static_pointer_cast(primitive); std::string pooling_mode = (pooling->mode == cldnn::pooling_mode::max) ? "max" : "average"; str << "Pooling mode: " << pooling_mode << " Input offset x: " << pooling->input_offset.spatial[0] << " Input offset y: " << pooling->input_offset.spatial[1] << " Stride x: " << pooling->stride.spatial[0] << " Stride y: " << pooling->stride.spatial[1] << " Size x: " << pooling->size.spatial[0] << " Size y: " << pooling->size.spatial[1]; - } - else - { + } else { throw std::runtime_error("Not implemented yet for this primitive."); } @@ -577,130 +548,130 @@ T div_up(const T a, const U b) { return (a + b - 1) / b; } -inline void print_bin_blob(cldnn::memory& mem, std::string name) -{ - auto&& size = mem.get_layout().size; +// inline void print_bin_blob(cldnn::memory& mem, std::string name) +// { +// auto&& size = mem.get_layout().size; - std::cerr << name; - std::cerr << " shape: "; - std::cerr << size.batch[0] << " "; - std::cerr << size.feature[0] << " "; - std::cerr << size.spatial[1] << " "; - std::cerr << size.spatial[0] << " "; - std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; +// std::cerr << name; +// std::cerr << " shape: "; +// std::cerr << size.batch[0] << " "; +// std::cerr << size.feature[0] << " "; +// std::cerr << size.spatial[1] << " "; +// std::cerr << size.spatial[0] << " "; +// std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - auto mem_ptr = mem.pointer(); +// auto mem_ptr = mem.pointer(); - bool packed_ic = mem.get_layout().format == cldnn::format::b_fs_yx_32fp ? 1 : 0; - int B = size.batch[0]; - int C = size.feature[0]; - int H = size.spatial[1]; - int W = size.spatial[0]; +// bool packed_ic = mem.get_layout().format == cldnn::format::b_fs_yx_32fp ? 1 : 0; +// int B = size.batch[0]; +// int C = size.feature[0]; +// int H = size.spatial[1]; +// int W = size.spatial[0]; - for (cldnn::tensor::value_type b = 0; b < B; ++b) - { - for (cldnn::tensor::value_type f = 0; f < C; ++f) - { - for (cldnn::tensor::value_type y = 0; y < H; ++y) - { - for (cldnn::tensor::value_type x = 0; x < W; ++x) - { - if (!packed_ic) - { - size_t input_it = b * C*H*W + f * W*H + y * W + x; - size_t elem = input_it / 32; - size_t bit = input_it % 32; - std::cerr << ((mem_ptr[elem] & (1 << bit)) >> bit) << " "; - } - else - { - size_t input_it = b * (C / 32)*W*H + (f / 32)*W*H + y * W + x; - size_t bit = f % 32; - std::cerr << ((mem_ptr[input_it] & (1 << bit)) >> bit) << " "; - } - } - std::cerr << std::endl; - } - std::cerr << std::endl; - } - std::cerr << "==============" << std::endl; - } -} +// for (cldnn::tensor::value_type b = 0; b < B; ++b) +// { +// for (cldnn::tensor::value_type f = 0; f < C; ++f) +// { +// for (cldnn::tensor::value_type y = 0; y < H; ++y) +// { +// for (cldnn::tensor::value_type x = 0; x < W; ++x) +// { +// if (!packed_ic) +// { +// size_t input_it = b * C*H*W + f * W*H + y * W + x; +// size_t elem = input_it / 32; +// size_t bit = input_it % 32; +// std::cerr << ((mem_ptr[elem] & (1 << bit)) >> bit) << " "; +// } +// else +// { +// size_t input_it = b * (C / 32)*W*H + (f / 32)*W*H + y * W + x; +// size_t bit = f % 32; +// std::cerr << ((mem_ptr[input_it] & (1 << bit)) >> bit) << " "; +// } +// } +// std::cerr << std::endl; +// } +// std::cerr << std::endl; +// } +// std::cerr << "==============" << std::endl; +// } +// } -inline void print_bin_blob_packed(cldnn::memory& mem, std::string name) -{ - auto&& size = mem.get_layout().size; +// inline void print_bin_blob_packed(cldnn::memory& mem, std::string name) +// { +// auto&& size = mem.get_layout().size; - std::cerr << name; - std::cerr << " shape: "; - std::cerr << size.batch[0] << " "; - std::cerr << size.feature[0] << " "; - std::cerr << size.spatial[1] << " "; - std::cerr << size.spatial[0] << " "; - std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; +// std::cerr << name; +// std::cerr << " shape: "; +// std::cerr << size.batch[0] << " "; +// std::cerr << size.feature[0] << " "; +// std::cerr << size.spatial[1] << " "; +// std::cerr << size.spatial[0] << " "; +// std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - auto mem_ptr = mem.pointer(); +// auto mem_ptr = mem.pointer(); - int B = size.batch[0]; - int C = size.feature[0]; - int H = size.spatial[1]; - int W = size.spatial[0]; +// int B = size.batch[0]; +// int C = size.feature[0]; +// int H = size.spatial[1]; +// int W = size.spatial[0]; - for (cldnn::tensor::value_type b = 0; b < B; ++b) - { - for (cldnn::tensor::value_type f = 0; f < div_up(C, 32); ++f) - { - for (cldnn::tensor::value_type y = 0; y < H; ++y) - { - for (cldnn::tensor::value_type x = 0; x < W; ++x) - { - size_t input_it = b * div_up(C, 32)*W*H + f * W*H + y * W + x; - std::cerr << mem_ptr[input_it] << " "; - } - std::cerr << std::endl; - } - std::cerr << std::endl; - } - std::cerr << "==============" << std::endl; - } -} +// for (cldnn::tensor::value_type b = 0; b < B; ++b) +// { +// for (cldnn::tensor::value_type f = 0; f < div_up(C, 32); ++f) +// { +// for (cldnn::tensor::value_type y = 0; y < H; ++y) +// { +// for (cldnn::tensor::value_type x = 0; x < W; ++x) +// { +// size_t input_it = b * div_up(C, 32)*W*H + f * W*H + y * W + x; +// std::cerr << mem_ptr[input_it] << " "; +// } +// std::cerr << std::endl; +// } +// std::cerr << std::endl; +// } +// std::cerr << "==============" << std::endl; +// } +// } -inline void print_blob(cldnn::memory& mem, std::string name) -{ - auto&& size = mem.get_layout().size; +// inline void print_blob(cldnn::memory& mem, std::string name) +// { +// auto&& size = mem.get_layout().size; - std::cerr << name; - std::cerr << " shape: "; - std::cerr << size.batch[0] << " "; - std::cerr << size.feature[0] << " "; - std::cerr << size.spatial[1] << " "; - std::cerr << size.spatial[0] << " "; - std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; +// std::cerr << name; +// std::cerr << " shape: "; +// std::cerr << size.batch[0] << " "; +// std::cerr << size.feature[0] << " "; +// std::cerr << size.spatial[1] << " "; +// std::cerr << size.spatial[0] << " "; +// std::cerr << "(" << size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0] << ")" << std::endl; - auto mem_ptr = mem.pointer(); +// auto mem_ptr = mem.pointer(); - int B = size.batch[0]; - int C = size.feature[0]; - int H = size.spatial[1]; - int W = size.spatial[0]; +// int B = size.batch[0]; +// int C = size.feature[0]; +// int H = size.spatial[1]; +// int W = size.spatial[0]; + +// for (cldnn::tensor::value_type b = 0; b < B; ++b) +// { +// for (cldnn::tensor::value_type f = 0; f < C; ++f) +// { +// for (cldnn::tensor::value_type y = 0; y < H; ++y) +// { +// for (cldnn::tensor::value_type x = 0; x < W; ++x) +// { +// size_t input_it = b * C*W*H + f * W*H + y * W + x; +// std::cerr << std::setw(4) << mem_ptr[input_it] << " "; +// } +// std::cerr << std::endl; +// } +// std::cerr << std::endl; +// } +// std::cerr << "==============" << std::endl; +// } +// } - for (cldnn::tensor::value_type b = 0; b < B; ++b) - { - for (cldnn::tensor::value_type f = 0; f < C; ++f) - { - for (cldnn::tensor::value_type y = 0; y < H; ++y) - { - for (cldnn::tensor::value_type x = 0; x < W; ++x) - { - size_t input_it = b * C*W*H + f * W*H + y * W + x; - std::cerr << std::setw(4) << mem_ptr[input_it] << " "; - } - std::cerr << std::endl; - } - std::cerr << std::endl; - } - std::cerr << "==============" << std::endl; - } -} } // namespace tests - diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/uniform_quantized_real_distribution.hpp b/inference-engine/thirdparty/clDNN/tests/test_utils/uniform_quantized_real_distribution.hpp index ea817fae29e..464e15db83f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_utils/uniform_quantized_real_distribution.hpp +++ b/inference-engine/thirdparty/clDNN/tests/test_utils/uniform_quantized_real_distribution.hpp @@ -20,7 +20,7 @@ #include #include -namespace cldnn { namespace tests { namespace distributions { +namespace tests { namespace distributions { /// @cond PRIVATE namespace detail @@ -812,7 +812,7 @@ public: return os; } - + /// @brief Deserialize from stream. /// @@ -1537,4 +1537,5 @@ private: underlying_dist_type _base_distribution; ///< Base distribution used to generate input numbers. }; -}}} // namespace cldnn { namespace tests { namespace distributions +} // namespace distributions +} // namespace tests diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt b/inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt deleted file mode 100644 index 663f50000cc..00000000000 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -# ========================================= Name / Output settings ===================================== - -set(CLDNN_BUILD__PROJ "clDNN_tests_core_internal") -set(CLDNN_BUILD__PROJ_LABEL "${CLDNN_BUILD__PROJ}") -set(CLDNN_BUILD__PROJ_OUTPUT_NAME "${CLDNN_BUILD__PROJ}${CLDNN__OUT_CPU_SUFFIX}") - -# =========================================== Compiler options ========================================= -intel_config_flag_apply_settings(CompilerOptions CMAKE_CXX_FLAGS ALL_PATTERN "" - SET - StandardCxx11 - RttiEnabled - ) - -if (NOT MSVC) - intel_config_flag_apply_settings(CompilerOptions CMAKE_CXX_FLAGS ALL_PATTERN "" - SET_RAW - "-Wno-error=conversion-null" - "-Wno-error=type-limits" - ) -endif () - -find_package(OpenMP) -if (OPENMP_FOUND) - add_definitions(-DOPENMP_FOUND) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() - -# ================================== Compiler preprocessor definitions ================================= - -# ========================================= Source/Header files ======================================== - -set(__CLDNN_Label__main "") -file(GLOB __CLDNN_Sources__main - "${CMAKE_CURRENT_SOURCE_DIR}/*.h" - "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" - ) - -set(__CLDNN_Directory__test_cases "${CMAKE_CURRENT_SOURCE_DIR}/test_cases") -set(__CLDNN_Label__test_cases "test cases") -file(GLOB __CLDNN_Sources__test_cases - "${__CLDNN_Directory__test_cases}/*.h" - "${__CLDNN_Directory__test_cases}/*.hpp" - "${__CLDNN_Directory__test_cases}/*.cpp" - ) - -set(__CLDNN_Directory__test_utils "${CMAKE_CURRENT_SOURCE_DIR}/../tests/test_utils") -set(__CLDNN_Label__test_utils "test utils") -file(GLOB __CLDNN_Sources__test_utils - "${__CLDNN_Directory__test_utils}/*.h" - "${__CLDNN_Directory__test_utils}/*.hpp" - "${__CLDNN_Directory__test_utils}/*.cpp" - ) - -set(__CLDNN_Directory__gtest "${CLDNN__GTEST_DIR}") -set(__CLDNN_Label__gtest "google test framework") -file(GLOB __CLDNN_Sources__gtest - "${__CLDNN_Directory__gtest}/*.cc" - ) - -if(UNIX) - set_source_files_properties(${__CLDNN_Sources__gtest} PROPERTIES - COMPILE_FLAGS "-Wno-undef") -endif() - -set(__CLDNN_AllSources - ${__CLDNN_Sources__main} - ${__CLDNN_Sources__test_cases} - ${__CLDNN_Sources__test_utils} - ${__CLDNN_Sources__gtest} - ) -# Helping with some generators. -set_property(SOURCE ${__CLDNN_Sources__cg_cache} PROPERTY GENERATED TRUE) - - -# =============================================== Filters ============================================== - -source_group("${__CLDNN_Label__main}" FILES ${__CLDNN_Sources__main}) -source_group("${__CLDNN_Label__test_cases}" FILES ${__CLDNN_Sources__test_cases}) -source_group("${__CLDNN_Label__test_utils}" FILES ${__CLDNN_Sources__test_utils}) - -# ===================================== Include/Link directories ======================================= - -include_directories( - "${CLDNN__MAIN_DIR}" - "${CLDNN__MAIN_DIR}/src" - "${CLDNN__MAIN_DIR}/src/include" - "${CLDNN__KERNEL_SELECTOR_DIR}/core" - "${CLDNN__KERNEL_SELECTOR_DIR}/core/common" - "${CLDNN__KERNEL_SELECTOR_DIR}/common" - "${__CLDNN_Directory__test_utils}" - "${CMAKE_CURRENT_SOURCE_DIR}" - ) - -include_directories(SYSTEM - "${CLDNN__GTEST_DIR}") - -# =================================== Link targets and dependencies ==================================== - -# Tests executable. -add_executable("${CLDNN_BUILD__PROJ}" - ${__CLDNN_AllSources} - ) - -if(COMMAND set_ie_threading_interface_for) - set_ie_threading_interface_for("${CLDNN_BUILD__PROJ}") -endif() - -set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}") -set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME "${CLDNN_BUILD__PROJ_OUTPUT_NAME}") - -# Set library dependencies -target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE "${CLDNN_BUILD__PROJ__clDNN}") - -if(WIN32) - target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE setupapi) -elseif((NOT ANDROID) AND (UNIX)) - target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE pthread) -endif() -target_link_libraries("${CLDNN_BUILD__PROJ}" PRIVATE ${CLDNN__SYSTEM_LINK_LIBRARIES}) - -# =================================== Custom pre- and post-steps ======================================= - -if(CLDNN__RUN_TESTS) - add_custom_command(TARGET "${CLDNN_BUILD__PROJ}" POST_BUILD - WORKING_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}" - COMMAND "${CLDNN_BUILD__PROJ}" - COMMENT "Executing tests..." - ) -endif() - -# ====================================================================================================== diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/main.cpp b/inference-engine/thirdparty/clDNN/tests_core_internal/main.cpp deleted file mode 100644 index 7e583e9b068..00000000000 --- a/inference-engine/thirdparty/clDNN/tests_core_internal/main.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "gtest/gtest.h" - -int main(int argc, char* argv[]) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} \ No newline at end of file From b6d7af12ae84653c622b62a46756e5ddd030b523 Mon Sep 17 00:00:00 2001 From: Anna Khakimova Date: Wed, 16 Jun 2021 10:07:46 +0300 Subject: [PATCH 29/43] PP: Color conversion kernels refactoring (#5985) * [PP] POC of vectorised kernels refactoring - base/framework changes * [PP] POC of vectorised kernels refactoring - ChanToPlane implementation * [PP] POC of vectorised kernels refactoring - ChanToPlane, switched order of dispatching to first choose ISA type * [PP] POC of vectorised kernels refactoring - ChanToPlane, moved choose ISA stage to kernel package generation * Preparing * NV12ToRGB kernel refactoring * * I420ToRGB kernel refactoring Co-authored-by: Anton Potapov --- .../ie_preprocess_gapi_kernels_neon.cpp | 31 +- .../ie_preprocess_gapi_kernels_neon.hpp | 31 +- .../ie_preprocess_gapi_kernels_avx2.cpp | 31 +- .../ie_preprocess_gapi_kernels_avx2.hpp | 42 +- .../ie_preprocess_gapi_kernels_avx512.cpp | 31 +- .../ie_preprocess_gapi_kernels_avx512.hpp | 39 +- .../ie_preprocess_gapi_kernels_sse42.cpp | 30 +- .../ie_preprocess_gapi_kernels_sse42.hpp | 30 +- .../ie_preprocess_gapi_kernels.cpp | 585 ++++++++++-------- .../ie_preprocess_gapi_kernels_impl.hpp | 31 + .../ie_preprocess_gapi_kernels_simd_impl.hpp | 136 ++-- 11 files changed, 517 insertions(+), 500 deletions(-) diff --git a/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp b/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp index 779db927c32..8290fda25ed 100644 --- a/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp +++ b/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.cpp @@ -90,21 +90,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[], splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); } -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width) { - calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width); -} - -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width) { - calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width); -} - void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap, int xmaxdf, const short xindex[], const Q0_16 xalpha[], @@ -119,14 +104,6 @@ void calcRowArea_32F(float dst[], const float *src[], const Size& inSz, calcRowArea_impl(dst, src, inSz, outSz, yalpha, ymap, xmaxdf, xindex, xalpha, vbuf); } -void copyRow_8U(const uint8_t in[], uint8_t out[], int length) { - copyRow_8U_impl(in, out, length); -} - -void copyRow_32F(const float in[], float out[], int length) { - copyRow_32F_impl(in, out, length); -} - // Resize (bi-linear, 32F) void calcRowLinear_32F(float* dst[], const float* src0[], @@ -708,6 +685,14 @@ void calcRowLinear_8UC1(uint8_t* dst[], } } } // namespace neon + +template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, int chan, int chs, uint8_t* out, const int length); +template void chanToPlaneRowImpl(neon_tag, const float* in, int chan, int chs, float * out, const int length); + +template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); + +template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.hpp b/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.hpp index f3b21f73a95..4e0b82a6259 100644 --- a/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.hpp +++ b/inference-engine/src/preprocessing/arm_neon/ie_preprocess_gapi_kernels_neon.hpp @@ -167,26 +167,31 @@ void splitRow_32FC4(const float in[], float out3[], int length); -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width); - void calculate_i420_to_rgb(const uchar **srcY, const uchar *srcU, const uchar *srcV, uchar **dstRGBx, int width); -void copyRow_8U(const uint8_t in[], - uint8_t out[], - int length); - -void copyRow_32F(const float in[], - float out[], - int length); - } // namespace neon + +template +void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length); + +extern template void chanToPlaneRowImpl(neon_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length); +extern template void chanToPlaneRowImpl(neon_tag, const float* in, const int chan, const int chs, float * out, const int length); + +template +void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); + +extern template void nv12ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); + +template +void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); + +extern template void i420ToRgbRowImpl(neon_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp index 22d86e10538..aefd2e2dfc3 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.cpp @@ -107,21 +107,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[], splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); } -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width) { - calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width); -} - -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width) { - calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width); -} - void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap, int xmaxdf, const short xindex[], const Q0_16 xalpha[], @@ -555,13 +540,6 @@ void calcRowLinear_8U(C4, std::array, 4> &dst, calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); } -void copyRow_8U(const uint8_t in[], uint8_t out[], int length) { - copyRow_8U_impl(in, out, length); -} -void copyRow_32F(const float in[], float out[], int length) { - copyRow_32F_impl(in, out, length); -} - void calcRowLinear_32F(float *dst[], const float *src0[], const float *src1[], @@ -575,6 +553,15 @@ void calcRowLinear_32F(float *dst[], } } // namespace avx + +template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length); +template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float* out, const int length); + +template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* uv_row, + uint8_t** out_rows, const int buf_width); + +template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.hpp b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.hpp index 7226a51ddca..512121b0f3d 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.hpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx2/ie_preprocess_gapi_kernels_avx2.hpp @@ -181,27 +181,29 @@ void splitRow_32FC4(const float in[], float out2[], float out3[], int length); - -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width); - -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width); - -void copyRow_8U(const uint8_t in[], - uint8_t out[], - int length); - -void copyRow_32F(const float in[], - float out[], - int length); - } // namespace avx + + +template +void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length); + +extern template void chanToPlaneRowImpl(avx2_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length); +extern template void chanToPlaneRowImpl(avx2_tag, const float* in, const int chan, const int chs, float * out, const int length); + +template +void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, + uint8_t** out_rows, const int buf_width); + +extern template void nv12ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, + const uint8_t* uv_row, uint8_t** out_rows, + const int buf_width); + +template +void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); + +extern template void i420ToRgbRowImpl(avx2_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp index 05925d45c19..78b74f532d6 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.cpp @@ -101,21 +101,6 @@ void splitRow_32FC4(const float in[], float out0[], float out1[], splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); } -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width) { - calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width); -} - -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width) { - calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width); -} - void calcRowArea_8U(uchar dst[], const uchar *src[], const Size& inSz, const Size& outSz, Q0_16 yalpha, const MapperUnit8U &ymap, int xmaxdf, const short xindex[], const Q0_16 xalpha[], @@ -636,14 +621,6 @@ void calcRowLinear_8U(C4, std::array, 4> &dst, calcRowLinear_8UC_Impl(dst, src0, src1, alpha, clone, mapsx, beta, tmp, inSz, outSz, lpi); } -void copyRow_8U(const uint8_t in[], uint8_t out[], int length) { - copyRow_8U_impl(in, out, length); -} - -void copyRow_32F(const float in[], float out[], int length) { - copyRow_32F_impl(in, out, length); -} - void calcRowLinear_32F(float *dst[], const float *src0[], const float *src1[], @@ -657,6 +634,14 @@ void calcRowLinear_32F(float *dst[], } } // namespace avx512 + +template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length); +template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length); + +template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); + +template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.hpp b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.hpp index 2323d9345d4..8d2778781a3 100644 --- a/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.hpp +++ b/inference-engine/src/preprocessing/cpu_x86_avx512/ie_preprocess_gapi_kernels_avx512.hpp @@ -180,27 +180,26 @@ void splitRow_32FC4(const float in[], float out2[], float out3[], int length); - -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width); - -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width); - -void copyRow_8U(const uint8_t in[], - uint8_t out[], - int length); - -void copyRow_32F(const float in[], - float out[], - int length); - } // namespace avx512 + + +template +void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length); + +extern template void chanToPlaneRowImpl(avx512_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length); +extern template void chanToPlaneRowImpl(avx512_tag, const float* in, const int chan, const int chs, float* out, const int length); + +template +void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); + +extern template void nv12ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); + +template +void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); + +extern template void i420ToRgbRowImpl(avx512_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp index 86fb156931e..4823e335371 100644 --- a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp +++ b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp @@ -1365,33 +1365,13 @@ void splitRow_32FC4(const float in[], splitRow_32FC4_Impl(in, out0, out1, out2, out3, length); } -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width) { - calculate_nv12_to_rgb_impl(srcY, srcUV, dstRGBx, width); -} +template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan, const int chs, uint8_t* out, const int length); +template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan, const int chs, float* out, const int length); -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width) { - calculate_i420_to_rgb_impl(srcY, srcU, srcV, dstRGBx, width); -} - -void copyRow_8U(const uint8_t in[], - uint8_t out[], - int length) { - copyRow_8U_impl(in, out, length); -} - -void copyRow_32F(const float in[], - float out[], - int length) { - copyRow_32F_impl(in, out, length); -} +template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); +template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp index 4e1eb471cd5..8726013357e 100644 --- a/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp +++ b/inference-engine/src/preprocessing/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp @@ -180,25 +180,25 @@ void splitRow_32FC4(const float in[], float out3[], int length); -void calculate_nv12_to_rgb(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width); +template +void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, + T* out, const int length); -void calculate_i420_to_rgb(const uchar **srcY, - const uchar *srcU, - const uchar *srcV, - uchar **dstRGBx, - int width); +extern template void chanToPlaneRowImpl(sse42_tag, const uint8_t* in, const int chan, + const int chs, uint8_t* out, const int length); +extern template void chanToPlaneRowImpl(sse42_tag, const float* in, const int chan, + const int chs, float* out, const int length); +template +void nv12ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); -void copyRow_8U(const uint8_t in[], - uint8_t out[], - int length); +extern template void nv12ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* uv_row, uint8_t** out_rows, const int buf_width); -void copyRow_32F(const float in[], - float out[], - int length); +template +void i420ToRgbRowImpl(isa_tag_t, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); +extern template void i420ToRgbRowImpl(sse42_tag, const uint8_t** y_rows, const uint8_t* u_row, + const uint8_t* v_row, uint8_t** out_rows, const int buf_width); } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp index 0dd374bd3ff..4cf4fffd74b 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.cpp @@ -468,15 +468,86 @@ struct type_to_type {}; template struct type_dispatch_impl; +//FIXME: add test for type_dispatch template class typelist, typename... type> struct type_dispatch_impl> { template static result_t dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_value_t&& type_to_value, default_t default_value) { result_t res = default_value; - std::initializer_list ({(type_id == type_to_id(type_to_type{}) ? (res = type_to_value(type_to_type{})), 0 : 0)...}); + bool matched = false; + std::initializer_list ({ + !matched && (type_id == type_to_id(type_to_type{})) ? + (matched = true, res = type_to_value(type_to_type{})), 0 + : 0 + ... + }); return res; } + + template + static result_t dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value) { + result_t res = default_value; + + bool matched = false; + std::initializer_list ({ + !matched && pred(type_to_type{}) ? + (matched = true, res = type_to_value(type_to_type{})), 0 + : 0 + ... + }); + return res; + } +}; + +template +struct concat; + +template +using concat_t = typename concat::type; + +template class left_list, typename ... left_types, template class right_list, typename ... right_types> +struct concat, right_list>{ + using type = left_list; +}; + +template< class T, class U > +using is_same_t = typename std::is_same::type; + +template struct if_c_impl; + +template struct if_c_impl { + using type = T; +}; + +template struct if_c_impl { + using type = E; +}; + +template +using if_c = typename if_c_impl::type; + +template +using if_ = typename if_c_impl::type; + +template +struct remove; + +template +using remove_t = typename remove::type; + + +template class list, typename head_t, typename ... types, typename t> +struct remove, t> { + using type = concat_t< + if_, list<>, list>, + remove_t, t> + >; +}; + +template class list, typename t> +struct remove, t> { + using type = list<>; }; } // namespace @@ -490,6 +561,13 @@ result_t type_dispatch(type_id_t type_id, type_to_id_t&& type_to_id, type_to_val std::forward(default_value)); } +template ()(type_to_type> {}))> +result_t type_dispatch(pred_t&& pred, type_to_value_t&& type_to_value, default_t default_value = {}) { + return type_dispatch_impl::template dispatch(std::forward(pred), + std::forward(type_to_value), + std::forward(default_value)); +} namespace { struct cv_type_id { @@ -668,81 +746,47 @@ GAPI_FLUID_KERNEL(FSplit4, Split4, false) { }; //---------------------------------------------------------------------- - -template -static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, int length) { -// AVX512 implementation of wide universal intrinsics is slower than AVX2. -// It is turned off until the cause isn't found out. -#if 0 - #ifdef HAVE_AVX512 - if (with_cpu_x86_avx512f()) { - if (std::is_same::value && chs == 1) { - avx512::copyRow_8U(in, out, length); - return; - } - - if (std::is_same::value && chs == 1) { - avx512::copyRow_32F(reinterpret_cast(in), - reinterpret_cast(out), - length); - return; - } - } - #endif // HAVE_AVX512 +using isas_set = typelist< +#ifdef HAVE_AVX512 + avx512_tag, #endif +#ifdef HAVE_AVX2 + avx2_tag, +#endif +#ifdef HAVE_SSE + sse42_tag, +#endif +#ifdef HAVE_NEON + neon_tag, +#endif + //scalar "ISA" have to be the last one in the list, + //as the search for supported ISA is performed until first match + scalar_tag>; +#ifdef HAVE_AVX512 +bool is_present(avx512_tag) { return with_cpu_x86_avx512f(); } +#endif // HAVE_AVX512 - #ifdef HAVE_AVX2 - if (with_cpu_x86_avx2()) { - if (std::is_same::value && chs == 1) { - avx::copyRow_8U(in, out, length); - return; - } +#ifdef HAVE_AVX2 +bool is_present(avx2_tag) { return with_cpu_x86_avx2(); } +#endif // HAVE_AVX2 - if (std::is_same::value && chs == 1) { - avx::copyRow_32F(reinterpret_cast(in), - reinterpret_cast(out), - length); - return; - } +#ifdef HAVE_SSE +bool is_present(sse42_tag) { return with_cpu_x86_sse42(); } +#endif // HAVE_SSE + +#ifdef HAVE_NEON +bool is_present(neon_tag) { return true; } +#endif // HAVE_NEON + +//scalar version of kernels is always available +bool is_present(scalar_tag) { return true; } + +struct is_isa_present { + template< typename isa_tag_t> + bool operator()(type_to_type) { + return is_present(isa_tag_t{}); } - #endif // HAVE_AVX2 - #ifdef HAVE_SSE - if (with_cpu_x86_sse42()) { - if (std::is_same::value && chs == 1) { - copyRow_8U(in, out, length); - return; - } - - if (std::is_same::value && chs == 1) { - copyRow_32F(reinterpret_cast(in), - reinterpret_cast(out), - length); - return; - } - } - #endif // HAVE_SSE - - #ifdef HAVE_NEON - if (std::is_same::value && chs == 1) { - neon::copyRow_8U(in, out, length); - return; - } - - if (std::is_same::value && chs == 1) { - neon::copyRow_32F(reinterpret_cast(in), - reinterpret_cast(out), - length); - return; - } - #endif // HAVE_NEON - - const auto inT = reinterpret_cast(in); - auto outT = reinterpret_cast< T*>(out); - - for (int x = 0; x < length; x++) { - outT[x] = inT[x*chs + chan]; - } -} +}; // GAPI_OCV_KERNEL(OCVChanToPlane, ChanToPlane) { // static void run(const cv::Mat &in, int chan, cv::Mat &out) { @@ -774,15 +818,225 @@ static void chanToPlaneRow(const uint8_t* in, int chan, int chs, uint8_t* out, i // } // }; +namespace { + +using chan_to_plane_supported_types = typelist; + +template +void chanToPlaneRowImpl(scalar_tag, const T* in, int chan, int chs, T* out, int length) { + for (int x = 0; x < length; x++) { + out[x] = in[x*chs + chan]; + } +} + +template +struct typed_chan_to_plane_row { + using p_f = void (*)(const uint8_t* in, int chan, int chs, uint8_t* out, int length); + + template + p_f operator()(type_to_type ) { + return [](const uint8_t* in, int chan, int chs, uint8_t* out, int length){ + const auto inT = reinterpret_cast(in); + auto outT = reinterpret_cast< type*>(out); + + chanToPlaneRowImpl(isa_tag_t{}, inT, chan, chs, outT, length); + }; + } +}; +} //namespace + +namespace { + +using nv12_to_rgb_supported_types = typelist; + +void nv12ToRgbRowImpl(scalar_tag, const uint8_t** y_rows, const uint8_t* uv_row, + uint8_t** out_rows, const int buf_width) { + for (int i = 0; i < buf_width; i += 2) { + uint8_t u = uv_row[i]; + uint8_t v = uv_row[i + 1]; + int ruv, guv, buv; + uvToRGBuv(u, v, ruv, guv, buv); + + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + uint8_t vy = y_rows[y][i + x]; + uint8_t r, g, b; + yRGBuvToRGB(vy, ruv, guv, buv, r, g, b); + + out_rows[y][3 * (i + x)] = r; + out_rows[y][3 * (i + x) + 1] = g; + out_rows[y][3 * (i + x) + 2] = b; + } + } + } +} + +template +struct typed_nv12_to_rgb_row { + using p_f = void (*)(const uint8_t** y_rows, const uint8_t* uv_row, + uint8_t** out_rows, const int buf_width); + + template + p_f operator()(type_to_type) { + return [](const uint8_t** y_rows, const uint8_t* uv_row, + uint8_t** out_rows, const int buf_width) { + const auto inT1 = reinterpret_cast(y_rows); + const auto inT2 = reinterpret_cast(uv_row); + auto outT = reinterpret_cast(out_rows); + + nv12ToRgbRowImpl(isa_tag_t{}, inT1, inT2, outT, buf_width); + }; + } +}; +} // namespace + +namespace { + +using i420_to_rgb_supported_types = typelist; + +static void i420ToRgbRowImpl(scalar_tag, const uint8_t** y_rows, + const uint8_t* u_row, + const uint8_t* v_row, + uint8_t** out_rows, + const int buf_width) { + for (int i = 0; i < buf_width; i += 2) { + uchar u = u_row[i / 2]; + uchar v = v_row[i / 2]; + int ruv, guv, buv; + uvToRGBuv(u, v, ruv, guv, buv); + + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + uchar vy = y_rows[y][i + x]; + uchar r, g, b; + yRGBuvToRGB(vy, ruv, guv, buv, r, g, b); + + out_rows[y][3 * (i + x)] = r; + out_rows[y][3 * (i + x) + 1] = g; + out_rows[y][3 * (i + x) + 2] = b; + } + } + } +} + +template +struct typed_i420_to_rgb_row { + using p_f = void (*)(const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row, + uint8_t** out_rows, const int buf_width); + + template + p_f operator()(type_to_type) { + return [](const uint8_t** y_rows, const uint8_t* u_row, const uint8_t* v_row, + uint8_t** out_rows, const int buf_width) { + const auto inT1 = reinterpret_cast(y_rows); + const auto inT2 = reinterpret_cast(u_row); + const auto inT3 = reinterpret_cast(v_row); + auto outT = reinterpret_cast(out_rows); + + i420ToRgbRowImpl(isa_tag_t{}, inT1, inT2, inT3, outT, buf_width); + }; + } +}; +} // namespace + +template +struct choose_impl { GAPI_FLUID_KERNEL(FChanToPlane, ChanToPlane, false) { static const int Window = 1; static void run(const cv::gapi::fluid::View& in, int chan, cv::gapi::fluid::Buffer& out) { - const auto rowFunc = (in.meta().depth == CV_8U) ? &chanToPlaneRow : &chanToPlaneRow; + GAPI_DbgAssert(is_cv_type_in_list(out.meta().depth)); + + const auto rowFunc = type_dispatch(out.meta().depth, cv_type_id{}, typed_chan_to_plane_row{}, nullptr); + + GAPI_DbgAssert(rowFunc); + rowFunc(in.InLineB(0), chan, in.meta().chan, out.OutLineB(), in.length()); } }; +GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) { + static const int Window = 1; + static const int LPI = 2; + static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB; + + static void run(const cv::gapi::fluid::View & in_y, + const cv::gapi::fluid::View & in_uv, + cv::gapi::fluid::Buffer & out) { + GAPI_DbgAssert(is_cv_type_in_list(out.meta().depth)); + + const uchar* uv_row = in_uv.InLineB(0); + const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) }; + uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) }; + + int buf_width = out.length(); + + const auto rowFunc = type_dispatch(out.meta().depth, cv_type_id{}, typed_nv12_to_rgb_row{}, nullptr); + + GAPI_DbgAssert(rowFunc); + + rowFunc(y_rows, uv_row, out_rows, buf_width); + } +}; + +GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) { + static const int Window = 1; + static const int LPI = 2; + static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB; + + static void run(const cv::gapi::fluid::View & in_y, + const cv::gapi::fluid::View & in_u, + const cv::gapi::fluid::View & in_v, + cv::gapi::fluid::Buffer & out) { + GAPI_DbgAssert(is_cv_type_in_list(out.meta().depth)); + + const uchar* u_row = in_u.InLineB(0); + const uchar* v_row = in_v.InLineB(0); + const uchar* y_rows[2] = { in_y.InLineB(0), in_y.InLineB(1) }; + uchar* out_rows[2] = { out.OutLineB(0), out.OutLineB(1) }; + + int buf_width = out.length(); + GAPI_DbgAssert(in_u.length() == in_v.length()); + + const auto rowFunc = type_dispatch(out.meta().depth, cv_type_id{}, typed_i420_to_rgb_row{}, nullptr); + + GAPI_DbgAssert(rowFunc); + + rowFunc(y_rows, u_row, v_row, out_rows, buf_width); + } +}; +}; + +namespace { +struct ColorConversionISA { + cv::gapi::GKernelPackage& pckg; + + ColorConversionISA(cv::gapi::GKernelPackage& _pckg) : pckg(_pckg) {} + + template + bool operator()(type_to_type) { + pckg.include::FI420toRGB>(); + pckg.include::FNV12toRGB>(); + pckg.include::FChanToPlane>(); + //at the moment type_dispatch requires something to be returned by the lambda + return true; + } +}; +} //namespace + +cv::gapi::GKernelPackage FColorConversionChooseISA() { + // At the moment AVX512 implementation of wide universal intrinsics is slower than AVX2. + // So, disable it for now. + using isas = remove_t; + + cv::gapi::GKernelPackage pckg; + ColorConversionISA ctpISA{pckg}; + + type_dispatch(is_isa_present{}, ctpISA, false); + + return pckg; +} + //---------------------------------------------------------------------- G_TYPED_KERNEL(ScalePlane8u, , "com.intel.ie.scale_plane_8u") { @@ -2234,180 +2488,6 @@ GAPI_FLUID_KERNEL(FScalePlaneArea8u, ScalePlaneArea8u, true) { } }; -static const int ITUR_BT_601_CY = 1220542; -static const int ITUR_BT_601_CUB = 2116026; -static const int ITUR_BT_601_CUG = -409993; -static const int ITUR_BT_601_CVG = -852492; -static const int ITUR_BT_601_CVR = 1673527; -static const int ITUR_BT_601_SHIFT = 20; - -static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) { - int uu, vv; - uu = static_cast(u) - 128; - vv = static_cast(v) - 128; - - ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv; - guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu; - buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu; -} - -static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv, - uchar& r, uchar& g, uchar& b) { - int yy = static_cast(vy); - int y = std::max(0, yy - 16) * ITUR_BT_601_CY; - r = saturate_cast((y + ruv) >> ITUR_BT_601_SHIFT); - g = saturate_cast((y + guv) >> ITUR_BT_601_SHIFT); - b = saturate_cast((y + buv) >> ITUR_BT_601_SHIFT); -} - -static void calculate_nv12_to_rgb_fallback(const uchar **y_rows, - const uchar *uv_row, - uchar **out_rows, - int buf_width) { - for (int i = 0; i < buf_width; i += 2) { - uchar u = uv_row[i]; - uchar v = uv_row[i + 1]; - int ruv, guv, buv; - uvToRGBuv(u, v, ruv, guv, buv); - - for (int y = 0; y < 2; y++) { - for (int x = 0; x < 2; x++) { - uchar vy = y_rows[y][i + x]; - uchar r, g, b; - yRGBuvToRGB(vy, ruv, guv, buv, r, g, b); - - out_rows[y][3*(i + x)] = r; - out_rows[y][3*(i + x) + 1] = g; - out_rows[y][3*(i + x) + 2] = b; - } - } - } -} - -static void calculate_i420_to_rgb_fallback(const uchar **y_rows, - const uchar *u_row, - const uchar *v_row, - uchar **out_rows, - int buf_width) { - for (int i = 0; i < buf_width; i += 2) { - uchar u = u_row[i / 2]; - uchar v = v_row[i / 2]; - int ruv, guv, buv; - uvToRGBuv(u, v, ruv, guv, buv); - - for (int y = 0; y < 2; y++) { - for (int x = 0; x < 2; x++) { - uchar vy = y_rows[y][i + x]; - uchar r, g, b; - yRGBuvToRGB(vy, ruv, guv, buv, r, g, b); - - out_rows[y][3*(i + x)] = r; - out_rows[y][3*(i + x) + 1] = g; - out_rows[y][3*(i + x) + 2] = b; - } - } - } -} - -GAPI_FLUID_KERNEL(FNV12toRGB, NV12toRGB, false) { - static const int Window = 1; - static const int LPI = 2; - static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB; - - static void run(const cv::gapi::fluid::View &in_y, - const cv::gapi::fluid::View &in_uv, - cv::gapi::fluid::Buffer &out) { - const uchar* uv_row = in_uv.InLineB(0); - const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)}; - uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)}; - - int buf_width = out.length(); - -// AVX512 implementation of wide universal intrinsics is slower than AVX2. -// It is turned off until the cause isn't found out. - #if 0 - #ifdef HAVE_AVX512 - if (with_cpu_x86_avx512_core()) { - #define CV_AVX_512DQ 1 - avx512::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width); - return; - } - #endif // HAVE_AVX512 - #endif - - #ifdef HAVE_AVX2 - if (with_cpu_x86_avx2()) { - avx::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width); - return; - } - #endif // HAVE_AVX2 - #ifdef HAVE_SSE - if (with_cpu_x86_sse42()) { - calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width); - return; - } - #endif // HAVE_SSE - - #ifdef HAVE_NEON - neon::calculate_nv12_to_rgb(y_rows, uv_row, out_rows, buf_width); - return; - #endif // HAVE_NEON - - calculate_nv12_to_rgb_fallback(y_rows, uv_row, out_rows, buf_width); - } -}; - -GAPI_FLUID_KERNEL(FI420toRGB, I420toRGB, false) { - static const int Window = 1; - static const int LPI = 2; - static const auto Kind = cv::GFluidKernel::Kind::YUV420toRGB; - - static void run(const cv::gapi::fluid::View &in_y, - const cv::gapi::fluid::View &in_u, - const cv::gapi::fluid::View &in_v, - cv::gapi::fluid::Buffer &out) { - const uchar* u_row = in_u.InLineB(0); - const uchar* v_row = in_v.InLineB(0); - const uchar* y_rows[2] = {in_y. InLineB(0), in_y. InLineB(1)}; - uchar* out_rows[2] = {out.OutLineB(0), out.OutLineB(1)}; - - int buf_width = out.length(); - GAPI_DbgAssert(in_u.length() == in_v.length()); - - // AVX512 implementation of wide universal intrinsics is slower than AVX2. - // It is turned off until the cause isn't found out. - #if 0 - #ifdef HAVE_AVX512 - if (with_cpu_x86_avx512_core()) { - #define CV_AVX_512DQ 1 - avx512::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width); - return; - } - #endif // HAVE_AVX512 - #endif - - #ifdef HAVE_AVX2 - if (with_cpu_x86_avx2()) { - avx::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width); - return; - } - #endif // HAVE_AVX2 - #ifdef HAVE_SSE - if (with_cpu_x86_sse42()) { - calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width); - return; - } - #endif // HAVE_SSE - - #ifdef HAVE_NEON - neon::calculate_i420_to_rgb(y_rows, u_row, v_row, out_rows, buf_width); - return; - #endif // HAVE_NEON - - calculate_i420_to_rgb_fallback(y_rows, u_row, v_row, out_rows, buf_width); - } -}; - namespace { template @@ -2520,9 +2600,10 @@ GAPI_FLUID_KERNEL(FDivC, GDivC, false) { using namespace kernels; cv::gapi::GKernelPackage preprocKernels() { - return cv::gapi::kernels - < FChanToPlane - , FScalePlanes + return combine( + FColorConversionChooseISA(), + cv::gapi::kernels + (); + >()); } } // namespace gapi diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp index 452a29b2e2d..3acf5a56b26 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_impl.hpp @@ -34,6 +34,12 @@ namespace InferenceEngine { namespace gapi { namespace kernels { +struct avx512_tag {}; +struct avx2_tag {}; +struct sse42_tag {}; +struct neon_tag {}; +struct scalar_tag {}; + template static inline DST saturate_cast(SRC x); template<> inline short saturate_cast(int x) { return (std::min)(SHRT_MAX, (std::max)(SHRT_MIN, x)); } template<> inline short saturate_cast(float x) { return saturate_cast(static_cast(std::rint(x))); } @@ -116,6 +122,31 @@ static inline Q8_8 mulaw(Q0_16 a, Q8_8 w) { return static_cast((a * w) >> static inline float mulas(float a, float s) { return a * s; } static inline float mulaw(float a, float w) { return a * w; } +static const int ITUR_BT_601_CY = 1220542; +static const int ITUR_BT_601_CUB = 2116026; +static const int ITUR_BT_601_CUG = -409993; +static const int ITUR_BT_601_CVG = -852492; +static const int ITUR_BT_601_CVR = 1673527; +static const int ITUR_BT_601_SHIFT = 20; + +static inline void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) { + int uu, vv; + uu = static_cast(u) - 128; + vv = static_cast(v) - 128; + + ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv; + guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu; + buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu; +} + +static inline void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, const int buv, + uchar& r, uchar& g, uchar& b) { + int yy = static_cast(vy); + int y = std::max(0, yy - 16) * ITUR_BT_601_CY; + r = saturate_cast((y + ruv) >> ITUR_BT_601_SHIFT); + g = saturate_cast((y + guv) >> ITUR_BT_601_SHIFT); + b = saturate_cast((y + buv) >> ITUR_BT_601_SHIFT); +} } // namespace kernels } // namespace gapi } // namespace InferenceEngine diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp index a59111b86b6..e559b83b87e 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels_simd_impl.hpp @@ -374,23 +374,6 @@ CV_ALWAYS_INLINE void splitRow_32FC4_Impl(const float in[], float out0[], float //------------------------------------------------------------------------------ -static const int ITUR_BT_601_CY = 1220542; -static const int ITUR_BT_601_CUB = 2116026; -static const int ITUR_BT_601_CUG = -409993; -static const int ITUR_BT_601_CVG = -852492; -static const int ITUR_BT_601_CVR = 1673527; -static const int ITUR_BT_601_SHIFT = 20; - -CV_ALWAYS_INLINE void uvToRGBuv(const uchar u, const uchar v, int& ruv, int& guv, int& buv) { - int uu, vv; - uu = static_cast(u) - 128; - vv = static_cast(v) - 128; - - ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * vv; - guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * vv + ITUR_BT_601_CUG * uu; - buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * uu; -} - CV_ALWAYS_INLINE void uvToRGBuv(const v_uint8& u, const v_uint8& v, v_int32 (&ruv)[4], v_int32 (&guv)[4], v_int32 (&buv)[4]) { @@ -418,15 +401,6 @@ CV_ALWAYS_INLINE void uvToRGBuv(const v_uint8& u, const v_uint8& v, } } -CV_ALWAYS_INLINE void yRGBuvToRGB(const uchar vy, const int ruv, const int guv, - const int buv, uchar& r, uchar& g, uchar& b) { - int yy = static_cast(vy); - int y = std::max(0, yy - 16) * ITUR_BT_601_CY; - r = saturate_cast((y + ruv) >> ITUR_BT_601_SHIFT); - g = saturate_cast((y + guv) >> ITUR_BT_601_SHIFT); - b = saturate_cast((y + buv) >> ITUR_BT_601_SHIFT); -} - CV_ALWAYS_INLINE void yRGBuvToRGB(const v_uint8& vy, const v_int32 (&ruv)[4], const v_int32 (&guv)[4], @@ -464,16 +438,15 @@ CV_ALWAYS_INLINE void yRGBuvToRGB(const v_uint8& vy, bb = v_pack_u(b0, b1); } -CV_ALWAYS_INLINE void calculate_nv12_to_rgb_impl(const uchar **srcY, - const uchar *srcUV, - uchar **dstRGBx, - int width) { +template +CV_ALWAYS_INLINE void nv12ToRgbRowImpl(isa_tag_t, const uchar** srcY, const uchar* srcUV, + uchar** dstRGBx, const int width) { int i = 0; #if MANUAL_SIMD constexpr int nlanes = v_uint8::nlanes; - for ( ; i <= width - 2*nlanes; i += 2*nlanes) { + for (; i <= width - 2 * nlanes; i += 2 * nlanes) { v_uint8 u, v; v_load_deinterleave(srcUV + i, u, v); @@ -510,9 +483,7 @@ CV_ALWAYS_INLINE void calculate_nv12_to_rgb_impl(const uchar **srcY, v_store_interleave(dstRGBx[1] + i * 3, b1_0, g1_0, r1_0); v_store_interleave(dstRGBx[1] + i * 3 + 3 * nlanes, b1_1, g1_1, r1_1); } - - vx_cleanup(); - + //vx_cleanup(); #endif for (; i < width; i += 2) { @@ -527,25 +498,25 @@ CV_ALWAYS_INLINE void calculate_nv12_to_rgb_impl(const uchar **srcY, uchar r, g, b; yRGBuvToRGB(vy, ruv, guv, buv, r, g, b); - dstRGBx[y][3*(i + x)] = r; - dstRGBx[y][3*(i + x) + 1] = g; - dstRGBx[y][3*(i + x) + 2] = b; + dstRGBx[y][3 * (i + x)] = r; + dstRGBx[y][3 * (i + x) + 1] = g; + dstRGBx[y][3 * (i + x) + 2] = b; } } } } -CV_ALWAYS_INLINE void calculate_i420_to_rgb_impl(const uchar **srcY, const uchar *srcU, - const uchar *srcV, uchar **dstRGBx, - int width) { +template +CV_ALWAYS_INLINE void i420ToRgbRowImpl(isa_tag_t, const uint8_t** srcY, const uint8_t* srcU, + const uint8_t* srcV, uint8_t** dstRGBx, const int width) { int i = 0; #if MANUAL_SIMD constexpr int nlanes = v_uint8::nlanes; - for ( ; i <= width - 2*nlanes; i += 2*nlanes) { - v_uint8 u = vx_load(srcU + i/2); - v_uint8 v = vx_load(srcV + i/2); + for (; i <= width - 2 * nlanes; i += 2 * nlanes) { + v_uint8 u = vx_load(srcU + i / 2); + v_uint8 v = vx_load(srcV + i / 2); v_uint8 vy[4]; v_load_deinterleave(srcY[0] + i, vy[0], vy[1]); @@ -580,14 +551,11 @@ CV_ALWAYS_INLINE void calculate_i420_to_rgb_impl(const uchar **srcY, const uch v_store_interleave(dstRGBx[1] + i * 3, b1_0, g1_0, r1_0); v_store_interleave(dstRGBx[1] + i * 3 + 3 * nlanes, b1_1, g1_1, r1_1); } - - vx_cleanup(); - - #endif - + //vx_cleanup(); +#endif for (; i < width; i += 2) { - uchar u = srcU[i/2]; - uchar v = srcV[i/2]; + uchar u = srcU[i / 2]; + uchar v = srcV[i / 2]; int ruv, guv, buv; uvToRGBuv(u, v, ruv, guv, buv); @@ -597,9 +565,9 @@ CV_ALWAYS_INLINE void calculate_i420_to_rgb_impl(const uchar **srcY, const uch uchar r, g, b; yRGBuvToRGB(vy, ruv, guv, buv, r, g, b); - dstRGBx[y][3*(i + x)] = r; - dstRGBx[y][3*(i + x) + 1] = g; - dstRGBx[y][3*(i + x) + 2] = b; + dstRGBx[y][3 * (i + x)] = r; + dstRGBx[y][3 * (i + x) + 1] = g; + dstRGBx[y][3 * (i + x) + 2] = b; } } } @@ -737,48 +705,24 @@ CV_ALWAYS_INLINE void calcRowArea_impl(T dst[], const T *src[], const Size& inSz //------------------------------------------------------------------------------ -#if MANUAL_SIMD template -CV_ALWAYS_INLINE void copyRow_impl(const T in[], T out[], int l) { - VecT r; - r = vx_load(&in[l]); - vx_store(&out[l], r); -} -#endif - -CV_ALWAYS_INLINE void copyRow_8U_impl(const uint8_t in[], uint8_t out[], int length) { +CV_ALWAYS_INLINE void copyRow_Impl(const T in[], T out[], int length) { int l = 0; #if MANUAL_SIMD - constexpr int nlanes = v_uint8::nlanes; + const int nlanes = VecT::nlanes; + + auto copy_row = [](const T in[], T out[], int l) { + VecT r = vx_load(&in[l]); + vx_store(&out[l], r); + }; for (; l <= length - nlanes; l += nlanes) { - copyRow_impl(in, out, l); + copy_row(in, out, l); } if (l < length && length >= nlanes) { - copyRow_impl(in, out, length - nlanes); - l = length; - } -#endif - - for (; l < length; l++) { - out[l] = in[l]; - } -} - -CV_ALWAYS_INLINE void copyRow_32F_impl(const float in[], float out[], int length) { - int l = 0; - -#if MANUAL_SIMD - constexpr int nlanes = v_float32::nlanes; - - for (; l <= length - nlanes; l += nlanes) { - copyRow_impl(in, out, l); - } - - if (l < length && length >= nlanes) { - copyRow_impl(in, out, length - nlanes); + copy_row(in, out, length - nlanes); l = length; } #endif @@ -916,6 +860,26 @@ CV_ALWAYS_INLINE void calcRowLinear_32FC1(float *dst[], } } +template +struct vector_type_of; + +template +using vector_type_of_t = typename vector_type_of::type; + +template struct vector_type_of { using type = v_uint8; }; +template struct vector_type_of { using type = v_float32;}; + +template +CV_ALWAYS_INLINE void chanToPlaneRowImpl(isa_tag_t, const T* in, const int chan, const int chs, T* out, const int length) { + if (chs == 1) { + copyRow_Impl, T>(in, out, length); + return; + } + + for (int x = 0; x < length; x++) { + out[x] = in[x*chs + chan]; + } +} } // namespace kernels } // namespace gapi } // namespace InferenceEngine From dbe1c1ab49b3e59ec3c41ca715a9e7f01714bf77 Mon Sep 17 00:00:00 2001 From: Chenhu Wang Date: Wed, 16 Jun 2021 15:32:32 +0800 Subject: [PATCH 30/43] [CPU] Change rounding type in load/store emitters (#6134) --- .../src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp index 57689d6302d..83bc04c530d 100644 --- a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp @@ -106,7 +106,6 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 ®_src, int offset_byte, In break; case Precision::I32: if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) { - h->uni_vroundps(Vmm(out_vec_idx), Vmm(out_vec_idx), 3); // rounding to zero h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx)); } break; @@ -552,7 +551,6 @@ template switch (src_prc) { case Precision::FP32: if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) { - h->uni_vroundps(Vmm(in_vec_idx), Vmm(in_vec_idx), 3); // rounding to zero h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx)); } break; From 72cb75ffc7df4bf4fc59791b81d9627df34e4e83 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Wed, 16 Jun 2021 11:14:29 +0300 Subject: [PATCH 31/43] [LPT] Reshape folding extending: cherry-pick to master (#6151) * [LPT] Reshape folding extending * [LPT] tests addition * typo quick fix --- .../include/low_precision/network_helper.hpp | 6 ++ .../group_convolution_transformation.cpp | 75 +++++++++++++++++++ .../group_convolution_transformation.cpp | 28 +++++++ .../group_convolution_transformation.cpp | 28 +++++++ .../group_convolution_transformation.hpp | 5 ++ .../group_convolution_transformation.cpp | 16 ++++ .../group_convolution_function.hpp | 3 + .../src/group_convolution_function.cpp | 14 +++- 8 files changed, 174 insertions(+), 1 deletion(-) diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index 8cf52a13fe2..bf45cdeae94 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -270,6 +270,12 @@ template std::shared_ptr fold_reshape(Args&&... args) { std::shared_ptr node = std::make_shared(std::forward(args)...); if (node->get_output_size() == 1) { + // issue #57985: remove fold_reshape & reuse nGraph implementation + const auto values = as_type_ptr(node->input_value(1).get_node_shared_ptr())->template cast_vector(); + if (std::any_of(values.begin(), values.end(), [](const int64_t value) { return (value == 0) || (value == -1); })) { + return fold(std::forward(args)...); + } + OutputVector folded; if (is_type(node->input_value(0).get_node_shared_ptr()) && is_type(node->input_value(1).get_node_shared_ptr())) { diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp index d90999bb8cc..22844037c98 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/group_convolution_transformation.cpp @@ -53,6 +53,7 @@ public: ngraph::Shape inputShape; ngraph::Shape outputShape; size_t group; + int groupCalculationDimention; Actual actual; Expected expected; }; @@ -67,6 +68,7 @@ public: testValues.inputShape, testValues.outputShape, testValues.group, + testValues.groupCalculationDimention, testValues.actual.dequantization, testValues.actual.weights, testValues.actual.fakeQuantizeOnWeights, @@ -84,6 +86,7 @@ public: testValues.inputShape, testValues.outputShape, testValues.group, + testValues.groupCalculationDimention, testValues.expected.dequantizationBefore, testValues.expected.weights, testValues.expected.fakeQuantizeOnWeights, @@ -101,6 +104,7 @@ public: testValues.inputShape << "_" << testValues.outputShape << "_" << testValues.group << "_" << + testValues.groupCalculationDimention << "_" << testValues.actual.precisionBeforeDequantization << "_" << testValues.actual.dequantization << "_" << "_weights_" << testValues.actual.weights->get_element_type() << "_" << "{ " << @@ -123,6 +127,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -142,12 +147,68 @@ const std::vector testValues = { {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 24, 1, 1 }}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) } }, + + // group convolution, tensor quantization, with zero point + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + { 1, 6, 224, 224 }, + { 1, 24, 218, 218 }, + 3ul, + 0, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, { { 128.f }, ngraph::element::f32, { 1, 6, 1, 1 }, false }, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 24, 1, 1 }}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, + + // group convolution, tensor quantization, with zero point + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + { 1, 6, 224, 224 }, + { 1, 24, 218, 218 }, + 3ul, + 1, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, { 128.f }, { 0.02f }}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{ 2.f }), + { 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } }, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, { { 128.f }, ngraph::element::f32, { 1, 6, 1, 1 }, false }, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{ -125.f }), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{ 0.0002f }, ngraph::element::f32, { 1, 24, 1, 1 }}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, + // group convolution, tensor quantization, with zero point { LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false), { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -173,6 +234,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::f32, @@ -198,6 +260,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -241,6 +304,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -274,6 +338,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::f32, @@ -299,6 +364,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -324,6 +390,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -349,6 +416,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::f32, @@ -374,6 +442,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 6ul, + -1, // ActualValues { ngraph::element::u8, @@ -414,6 +483,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 6ul, + -1, // ActualValues { ngraph::element::u8, @@ -447,6 +517,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 6ul, + -1, // ActualValues { ngraph::element::f32, @@ -472,6 +543,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 6ul, + -1, // ActualValues { ngraph::element::u8, @@ -497,6 +569,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 6, 218, 218 }, 6ul, + -1, // ActualValues { ngraph::element::f32, @@ -523,6 +596,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, @@ -567,6 +641,7 @@ const std::vector testValues = { { 1, 6, 224, 224 }, { 1, 24, 218, 218 }, 3ul, + -1, // ActualValues { ngraph::element::u8, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp index 36e81ef438d..739897ee273 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp @@ -26,14 +26,40 @@ const std::vector pa ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 24, 18, 18 }, 3ul, + -1, { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + "Convolution", + "U8" + }, + // group convolution, tensor quantization + { + ngraph::Shape{ 1, 6, 24, 24 }, + ngraph::Shape{ 1, 24, 18, 18 }, + 3ul, + 0, + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + "Convolution", + "U8" + }, + // group convolution, tensor quantization + { + ngraph::Shape{ 1, 6, 24, 24 }, + ngraph::Shape{ 1, 24, 18, 18 }, + 3ul, + 1, + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + "Convolution", + "U8" }, // group convolution, per-channel quantization { ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 24, 18, 18 }, 3ul, + -1, { 256ul, ngraph::Shape { 6, 1, 1, 1 }, @@ -49,6 +75,7 @@ const std::vector pa ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 6, 18, 18 }, 6ul, + -1, { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, }, @@ -57,6 +84,7 @@ const std::vector pa ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 6, 18, 18 }, 6ul, + -1, { 256ul, ngraph::Shape { 6, 1, 1, 1 }, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp index 8c346ba667e..624d933188b 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp @@ -26,14 +26,40 @@ const std::vector pa ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 24, 18, 18 }, 3ul, + -1, { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + "Convolution", + "U8" + }, + // group convolution, tensor quantization + { + ngraph::Shape{ 1, 6, 24, 24 }, + ngraph::Shape{ 1, 24, 18, 18 }, + 3ul, + 0, + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + "Convolution", + "U8" + }, + // group convolution, tensor quantization + { + ngraph::Shape{ 1, 6, 24, 24 }, + ngraph::Shape{ 1, 24, 18, 18 }, + 3ul, + 1, + { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, + "Convolution", + "U8" }, // group convolution, per-channel quantization { ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 24, 18, 18 }, 3ul, + -1, { 256ul, ngraph::Shape { 6, 1, 1, 1 }, @@ -49,6 +75,7 @@ const std::vector pa ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 6, 18, 18 }, 6ul, + -1, { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -127.f }, { 127.f } }, }, @@ -57,6 +84,7 @@ const std::vector pa ngraph::Shape{ 1, 6, 24, 24 }, ngraph::Shape{ 1, 6, 18, 18 }, 6ul, + -1, { 256ul, ngraph::Shape { 6, 1, 1, 1 }, diff --git a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/group_convolution_transformation.hpp b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/group_convolution_transformation.hpp index 1910df5e46d..b7d92417182 100644 --- a/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/group_convolution_transformation.hpp +++ b/inference-engine/tests/functional/plugin/shared/include/low_precision_transformations/group_convolution_transformation.hpp @@ -18,8 +18,11 @@ public: ngraph::Shape inputShape; ngraph::Shape outputShape; size_t group; + int groupCalculationDimention; ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData; ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights; + std::string layerName; + std::string expectedKernelType; }; typedef std::tuple< @@ -38,6 +41,8 @@ public: protected: void SetUp() override; + void Run() override; + private: void validate(); }; diff --git a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/group_convolution_transformation.cpp b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/group_convolution_transformation.cpp index f56c9743def..c9baa329329 100644 --- a/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/group_convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/low_precision_transformations/group_convolution_transformation.cpp @@ -33,6 +33,7 @@ std::string GroupConvolutionTransformation::getTestCaseName(testing::TestParamIn param.inputShape << "_" << param.outputShape << "_" << param.group << "_" << + param.groupCalculationDimention << "_" << param.fakeQuantizeOnData << "_" << param.fakeQuantizeOnWeights; return result.str(); @@ -51,12 +52,27 @@ void GroupConvolutionTransformation::SetUp() { param.inputShape, param.outputShape, param.group, + param.groupCalculationDimention, param.fakeQuantizeOnData, param.fakeQuantizeOnWeights); validate(); } +void GroupConvolutionTransformation::Run() { + LayerTestsCommon::Run(); + + const auto param = std::get<3>(GetParam()); + if (!param.layerName.empty()) { + const auto actualPrecision = getRuntimePrecisionByType(param.layerName); + auto expectedPrecision = param.expectedKernelType; + if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ngraph::element::f16) { + expectedPrecision = "FP16"; + } + EXPECT_EQ(actualPrecision, expectedPrecision); + } +} + void GroupConvolutionTransformation::validate() { ngraph::element::Type netPrecision; ngraph::pass::low_precision::LayerTransformation::Params params; diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp index 2273de2602c..1b94400f7bc 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp @@ -22,6 +22,7 @@ public: const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, + const int groupCalculationDimention, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weightsConst, const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights); @@ -31,6 +32,7 @@ public: const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, + const int groupCalculationDimention, const FakeQuantizeOnData& fakeQuantizeOnData, const FakeQuantizeOnWeights& fakeQuantizeOnWeights); @@ -39,6 +41,7 @@ public: const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, + const int calculatedDimention, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weightsConst, const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp index 4d3b6153f08..205d57747ec 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp @@ -28,6 +28,7 @@ std::shared_ptr createWeightsOriginal( const size_t inputChannelsCount, const size_t outputChannelsCount, const size_t groupCount, + const int calculatedDimention, const size_t kernelSize, const std::vector& weightsValues, const FakeQuantizeOnWeights& fakeQuantizeOnWeights, @@ -70,7 +71,12 @@ std::shared_ptr createWeightsOriginal( ngraph::opset1::Constant::create( element::i64, Shape{ 5 }, - std::vector({ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize, kernelSize })), + std::vector { + calculatedDimention == 0 ? -1 : static_cast(groupCount), + calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), + static_cast(inputChannelsPerGroup), + static_cast(kernelSize), + static_cast(kernelSize) }), true); } @@ -82,6 +88,7 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, + const int groupCalculationDimention, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weightsConst, const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) { @@ -103,6 +110,7 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( inputChannelsCount, outputChannelsCount, groupCount, + groupCalculationDimention, kernelSize, weightsConst->cast_vector(), fakeQuantizeOnWeights, @@ -126,6 +134,7 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, + const int groupCalculationDimention, const FakeQuantizeOnData& fakeQuantizeOnData, const FakeQuantizeOnWeights& fakeQuantizeOnWeights) { const auto input = std::make_shared(precision, ngraph::Shape(inputShape)); @@ -156,6 +165,7 @@ std::shared_ptr GroupConvolutionFunction::getOriginal( inputChannelsCount, outputChannelsCount, groupCount, + groupCalculationDimention, kernelSize, weightsValues, fakeQuantizeOnWeights, @@ -178,6 +188,7 @@ std::shared_ptr GroupConvolutionFunction::get( const ngraph::Shape& inputShape, const ngraph::Shape& outputShape, const size_t groupCount, + const int calculatedDimention, const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore, std::shared_ptr weightsConst, const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights, @@ -216,6 +227,7 @@ std::shared_ptr GroupConvolutionFunction::get( inputChannelsCount, outputChannelsCount, groupCount, + calculatedDimention, kernelSize, weightsConst->cast_vector(), fakeQuantizeOnWeights, From b0c508d4ff8698d80dd0187465cdaba93234e59d Mon Sep 17 00:00:00 2001 From: Alexander Shchepetov Date: Wed, 16 Jun 2021 12:50:16 +0300 Subject: [PATCH 32/43] Add layer tests (#5789) Co-authored-by: alexander.shchepetov --- tests/layer_tests/.gitignore | 9 + tests/layer_tests/README.md | 32 + tests/layer_tests/common/constants.py | 25 + tests/layer_tests/common/layer_test_class.py | 155 ++++ tests/layer_tests/common/layer_utils.py | 58 ++ .../common/onnx_layer_test_class.py | 63 ++ .../layer_tests/common/tf_layer_test_class.py | 61 ++ .../layer_tests/common/utils/common_utils.py | 84 +++ .../common/utils/multiprocessing_utils.py | 99 +++ tests/layer_tests/common/utils/parsers.py | 28 + tests/layer_tests/common/utils/tf_utils.py | 109 +++ tests/layer_tests/conftest.py | 77 ++ tests/layer_tests/onnx_tests/conftest.py | 12 + tests/layer_tests/onnx_tests/test_abs.py | 178 +++++ .../onnx_tests/test_add_sub_mul_div.py | 337 +++++++++ tests/layer_tests/onnx_tests/test_and.py | 268 +++++++ tests/layer_tests/onnx_tests/test_argmax.py | 153 ++++ tests/layer_tests/onnx_tests/test_bn.py | 133 ++++ tests/layer_tests/onnx_tests/test_ceil.py | 194 +++++ tests/layer_tests/onnx_tests/test_clip.py | 169 +++++ tests/layer_tests/onnx_tests/test_concat.py | 225 ++++++ tests/layer_tests/onnx_tests/test_conv.py | 413 +++++++++++ .../onnx_tests/test_conv_transpose.py | 215 ++++++ tests/layer_tests/onnx_tests/test_cumsum.py | 267 +++++++ .../onnx_tests/test_dequantize_linear.py | 205 ++++++ tests/layer_tests/onnx_tests/test_dropout.py | 169 +++++ tests/layer_tests/onnx_tests/test_elu.py | 185 +++++ .../onnx_tests/test_embedding_bag.py | 131 ++++ tests/layer_tests/onnx_tests/test_flatten.py | 302 ++++++++ tests/layer_tests/onnx_tests/test_floor.py | 179 +++++ tests/layer_tests/onnx_tests/test_gather.py | 261 +++++++ tests/layer_tests/onnx_tests/test_gemm.py | 308 ++++++++ .../onnx_tests/test_hard_sigmoid.py | 235 ++++++ tests/layer_tests/onnx_tests/test_identity.py | 183 +++++ .../onnx_tests/test_image_scaler.py | 161 +++++ .../onnx_tests/test_instance_normalization.py | 111 +++ .../layer_tests/onnx_tests/test_leaky_relu.py | 204 ++++++ tests/layer_tests/onnx_tests/test_log.py | 193 +++++ .../layer_tests/onnx_tests/test_logsoftmax.py | 240 +++++++ tests/layer_tests/onnx_tests/test_lrn.py | 133 ++++ tests/layer_tests/onnx_tests/test_lstm.py | 160 +++++ tests/layer_tests/onnx_tests/test_matmul.py | 190 +++++ .../test_mean_variance_normalization.py | 73 ++ tests/layer_tests/onnx_tests/test_neg.py | 91 +++ tests/layer_tests/onnx_tests/test_non_zero.py | 194 +++++ tests/layer_tests/onnx_tests/test_not.py | 190 +++++ tests/layer_tests/onnx_tests/test_or.py | 277 +++++++ tests/layer_tests/onnx_tests/test_pad.py | 212 ++++++ tests/layer_tests/onnx_tests/test_pooling.py | 432 +++++++++++ tests/layer_tests/onnx_tests/test_prelu.py | 161 +++++ .../layer_tests/onnx_tests/test_reciprocal.py | 171 +++++ tests/layer_tests/onnx_tests/test_reduce.py | 179 +++++ .../layer_tests/onnx_tests/test_reduce_lp.py | 247 +++++++ tests/layer_tests/onnx_tests/test_relu.py | 183 +++++ tests/layer_tests/onnx_tests/test_reshape.py | 283 ++++++++ tests/layer_tests/onnx_tests/test_resize.py | 676 ++++++++++++++++++ .../layer_tests/onnx_tests/test_roi_align.py | 109 +++ tests/layer_tests/onnx_tests/test_scale.py | 144 ++++ tests/layer_tests/onnx_tests/test_scatter.py | 125 ++++ tests/layer_tests/onnx_tests/test_sigmoid.py | 196 +++++ tests/layer_tests/onnx_tests/test_sign.py | 176 +++++ tests/layer_tests/onnx_tests/test_slice.py | 399 +++++++++++ tests/layer_tests/onnx_tests/test_softmax.py | 167 +++++ tests/layer_tests/onnx_tests/test_softplus.py | 182 +++++ .../onnx_tests/test_split_concat.py | 307 ++++++++ tests/layer_tests/onnx_tests/test_sqrt.py | 187 +++++ tests/layer_tests/onnx_tests/test_squeeze.py | 210 ++++++ tests/layer_tests/onnx_tests/test_sum.py | 322 +++++++++ tests/layer_tests/onnx_tests/test_topk.py | 167 +++++ .../layer_tests/onnx_tests/test_transpose.py | 178 +++++ .../onnx_tests/test_trigonometry.py | 298 ++++++++ .../layer_tests/onnx_tests/test_unsqueeze.py | 210 ++++++ tests/layer_tests/onnx_tests/test_upsample.py | 188 +++++ tests/layer_tests/onnx_tests/test_where.py | 97 +++ tests/layer_tests/onnx_tests/test_xor.py | 268 +++++++ tests/layer_tests/requirements.txt | 2 + .../layer_tests/tensorflow_tests/conftest.py | 12 + .../tensorflow_tests/permutation_utils.py | 22 + .../tensorflow_tests/test_tf_Add.py | 281 ++++++++ .../tensorflow_tests/test_tf_BatchToSpace.py | 80 +++ .../tensorflow_tests/test_tf_BiasAdd.py | 194 +++++ .../tensorflow_tests/test_tf_Bucketize.py | 83 +++ .../tensorflow_tests/test_tf_Ceil.py | 78 ++ .../tensorflow_tests/test_tf_Concat.py | 114 +++ .../tensorflow_tests/test_tf_ELU.py | 88 +++ .../tensorflow_tests/test_tf_Eltwise.py | 79 ++ .../tensorflow_tests/test_tf_FakeQuantize.py | 125 ++++ .../tensorflow_tests/test_tf_Floor.py | 78 ++ .../tensorflow_tests/test_tf_Identity.py | 85 +++ .../tensorflow_tests/test_tf_Log.py | 79 ++ .../tensorflow_tests/test_tf_LogSoftmax.py | 138 ++++ .../tensorflow_tests/test_tf_Mul.py | 279 ++++++++ .../tensorflow_tests/test_tf_NormalizeL2.py | 207 ++++++ .../tensorflow_tests/test_tf_OneHot.py | 159 ++++ .../tensorflow_tests/test_tf_Pooling.py | 230 ++++++ .../tensorflow_tests/test_tf_ReLU.py | 85 +++ .../tensorflow_tests/test_tf_ReLU6.py | 85 +++ .../test_tf_Resample_pattern_new.py | 81 +++ .../tensorflow_tests/test_tf_Rsqrt.py | 73 ++ .../tensorflow_tests/test_tf_Select.py | 107 +++ .../tensorflow_tests/test_tf_Softplus.py | 90 +++ .../tensorflow_tests/test_tf_SpaceToBatch.py | 80 +++ .../tensorflow_tests/test_tf_Squeeze.py | 124 ++++ .../tensorflow_tests/test_tf_Sub.py | 294 ++++++++ .../tensorflow_tests/test_tf_Swish.py | 88 +++ .../tensorflow_tests/test_tf_TopK.py | 143 ++++ 106 files changed, 17876 insertions(+) create mode 100644 tests/layer_tests/.gitignore create mode 100644 tests/layer_tests/README.md create mode 100644 tests/layer_tests/common/constants.py create mode 100644 tests/layer_tests/common/layer_test_class.py create mode 100644 tests/layer_tests/common/layer_utils.py create mode 100644 tests/layer_tests/common/onnx_layer_test_class.py create mode 100644 tests/layer_tests/common/tf_layer_test_class.py create mode 100644 tests/layer_tests/common/utils/common_utils.py create mode 100644 tests/layer_tests/common/utils/multiprocessing_utils.py create mode 100644 tests/layer_tests/common/utils/parsers.py create mode 100644 tests/layer_tests/common/utils/tf_utils.py create mode 100644 tests/layer_tests/conftest.py create mode 100644 tests/layer_tests/onnx_tests/conftest.py create mode 100644 tests/layer_tests/onnx_tests/test_abs.py create mode 100644 tests/layer_tests/onnx_tests/test_add_sub_mul_div.py create mode 100644 tests/layer_tests/onnx_tests/test_and.py create mode 100644 tests/layer_tests/onnx_tests/test_argmax.py create mode 100644 tests/layer_tests/onnx_tests/test_bn.py create mode 100644 tests/layer_tests/onnx_tests/test_ceil.py create mode 100644 tests/layer_tests/onnx_tests/test_clip.py create mode 100644 tests/layer_tests/onnx_tests/test_concat.py create mode 100644 tests/layer_tests/onnx_tests/test_conv.py create mode 100644 tests/layer_tests/onnx_tests/test_conv_transpose.py create mode 100644 tests/layer_tests/onnx_tests/test_cumsum.py create mode 100644 tests/layer_tests/onnx_tests/test_dequantize_linear.py create mode 100644 tests/layer_tests/onnx_tests/test_dropout.py create mode 100644 tests/layer_tests/onnx_tests/test_elu.py create mode 100644 tests/layer_tests/onnx_tests/test_embedding_bag.py create mode 100644 tests/layer_tests/onnx_tests/test_flatten.py create mode 100644 tests/layer_tests/onnx_tests/test_floor.py create mode 100644 tests/layer_tests/onnx_tests/test_gather.py create mode 100644 tests/layer_tests/onnx_tests/test_gemm.py create mode 100644 tests/layer_tests/onnx_tests/test_hard_sigmoid.py create mode 100644 tests/layer_tests/onnx_tests/test_identity.py create mode 100644 tests/layer_tests/onnx_tests/test_image_scaler.py create mode 100644 tests/layer_tests/onnx_tests/test_instance_normalization.py create mode 100644 tests/layer_tests/onnx_tests/test_leaky_relu.py create mode 100644 tests/layer_tests/onnx_tests/test_log.py create mode 100644 tests/layer_tests/onnx_tests/test_logsoftmax.py create mode 100644 tests/layer_tests/onnx_tests/test_lrn.py create mode 100644 tests/layer_tests/onnx_tests/test_lstm.py create mode 100644 tests/layer_tests/onnx_tests/test_matmul.py create mode 100644 tests/layer_tests/onnx_tests/test_mean_variance_normalization.py create mode 100644 tests/layer_tests/onnx_tests/test_neg.py create mode 100644 tests/layer_tests/onnx_tests/test_non_zero.py create mode 100644 tests/layer_tests/onnx_tests/test_not.py create mode 100644 tests/layer_tests/onnx_tests/test_or.py create mode 100644 tests/layer_tests/onnx_tests/test_pad.py create mode 100644 tests/layer_tests/onnx_tests/test_pooling.py create mode 100644 tests/layer_tests/onnx_tests/test_prelu.py create mode 100644 tests/layer_tests/onnx_tests/test_reciprocal.py create mode 100644 tests/layer_tests/onnx_tests/test_reduce.py create mode 100644 tests/layer_tests/onnx_tests/test_reduce_lp.py create mode 100644 tests/layer_tests/onnx_tests/test_relu.py create mode 100644 tests/layer_tests/onnx_tests/test_reshape.py create mode 100644 tests/layer_tests/onnx_tests/test_resize.py create mode 100644 tests/layer_tests/onnx_tests/test_roi_align.py create mode 100644 tests/layer_tests/onnx_tests/test_scale.py create mode 100644 tests/layer_tests/onnx_tests/test_scatter.py create mode 100644 tests/layer_tests/onnx_tests/test_sigmoid.py create mode 100644 tests/layer_tests/onnx_tests/test_sign.py create mode 100644 tests/layer_tests/onnx_tests/test_slice.py create mode 100644 tests/layer_tests/onnx_tests/test_softmax.py create mode 100644 tests/layer_tests/onnx_tests/test_softplus.py create mode 100644 tests/layer_tests/onnx_tests/test_split_concat.py create mode 100644 tests/layer_tests/onnx_tests/test_sqrt.py create mode 100644 tests/layer_tests/onnx_tests/test_squeeze.py create mode 100644 tests/layer_tests/onnx_tests/test_sum.py create mode 100644 tests/layer_tests/onnx_tests/test_topk.py create mode 100644 tests/layer_tests/onnx_tests/test_transpose.py create mode 100644 tests/layer_tests/onnx_tests/test_trigonometry.py create mode 100644 tests/layer_tests/onnx_tests/test_unsqueeze.py create mode 100644 tests/layer_tests/onnx_tests/test_upsample.py create mode 100644 tests/layer_tests/onnx_tests/test_where.py create mode 100644 tests/layer_tests/onnx_tests/test_xor.py create mode 100644 tests/layer_tests/requirements.txt create mode 100644 tests/layer_tests/tensorflow_tests/conftest.py create mode 100644 tests/layer_tests/tensorflow_tests/permutation_utils.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Add.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Ceil.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Concat.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_ELU.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Eltwise.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_FakeQuantize.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Floor.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Identity.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Log.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Mul.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_OneHot.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Pooling.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_ReLU.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Rsqrt.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Select.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Softplus.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Squeeze.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Sub.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_Swish.py create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_TopK.py diff --git a/tests/layer_tests/.gitignore b/tests/layer_tests/.gitignore new file mode 100644 index 00000000000..f7e78b45447 --- /dev/null +++ b/tests/layer_tests/.gitignore @@ -0,0 +1,9 @@ +#IDE FILES +.pytest_cache + +#TESTS TEMP DIRECTORY +ir/ +out/ +tf_models/ +onnx_models/ + diff --git a/tests/layer_tests/README.md b/tests/layer_tests/README.md new file mode 100644 index 00000000000..5787a19bbd2 --- /dev/null +++ b/tests/layer_tests/README.md @@ -0,0 +1,32 @@ +# Layer tests + +This folder layer tests framework code and test files. + +## Getting Started + +#### Pre-requisites + +* OpenVINO should be configured as usual. + +#### Setup + +* Install requirements: + ```bash + pip3 install -r requirements.txt + ``` +* Set up environment variables for layer tests: + ```bash + export MO_ROOT=PATH_TO_MO + ``` + ```bash + export PYTHONPATH="path_to_openvino"/tests/layer_tests/:$PYTHONPATH + ``` + ```bash + export IE_APP_PATH="path_to_IE" + ``` +* Add IE dependencies in LD_LIBRARY_PATH. + +## Run tests +```bash +py.test +``` diff --git a/tests/layer_tests/common/constants.py b/tests/layer_tests/common/constants.py new file mode 100644 index 00000000000..f1e97d43817 --- /dev/null +++ b/tests/layer_tests/common/constants.py @@ -0,0 +1,25 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + + +if 'MO_ROOT' in os.environ: + mo_bin = os.environ['MO_ROOT'] + if not os.path.exists(mo_bin): + raise EnvironmentError( + "Environment variable MO_ROOT points to non existing path {}".format(mo_bin)) +else: + raise EnvironmentError("MO_ROOT variable is not set") + +if os.environ.get('OUTPUT_DIR') is not None: + out_path = os.environ['OUTPUT_DIR'] +else: + script_path = os.path.dirname(os.path.realpath(__file__)) + out_path = os.path.join(script_path, 'out') + if not os.path.exists(out_path): + os.makedirs(out_path) + +# supported_devices : CPU, GPU, MYRIAD, FPGA +test_device = os.environ.get('TEST_DEVICE', 'CPU;GPU').split(';') +test_precision = os.environ.get('TEST_PRECISION', 'FP32;FP16').split(';') diff --git a/tests/layer_tests/common/layer_test_class.py b/tests/layer_tests/common/layer_test_class.py new file mode 100644 index 00000000000..28d08d23090 --- /dev/null +++ b/tests/layer_tests/common/layer_test_class.py @@ -0,0 +1,155 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import itertools +import os +import warnings +from pathlib import Path + +import numpy as np +from common.constants import test_device, test_precision +from common.layer_utils import IEInfer +from mo.utils.ir_engine.ir_engine import IREngine + +from common.utils.common_utils import generate_ir +from common.utils.parsers import mapping_parser + + +class CommonLayerTest: + input_model_key = "input_model" + + def produce_model_path(self, framework_model, save_path): + pass + + def get_framework_results(self, inputs_dict, model_path): + pass + + def _test(self, framework_model, ref_net, ie_device, precision, ir_version, temp_dir, + enabled_transforms='', disabled_transforms='', **kwargs): + """ + :param enabled_transforms/disabled_transforms: string with idxs of transforms that should be enabled/disabled. + Example: "transform_1,transform_2" + """ + model_path = self.produce_model_path(framework_model=framework_model, save_path=temp_dir) + + # TODO Pass environment variables via subprocess environment + os.environ['MO_ENABLED_TRANSFORMS'] = enabled_transforms + os.environ['MO_DISABLED_TRANSFORMS'] = disabled_transforms + + mo_params = {self.input_model_key: model_path, + "output_dir": temp_dir, + "data_type": precision, "model_name": 'model' + } + + if 'input_shapes' in kwargs and len(kwargs['input_shapes']): + input_shapes_str = [] + for ishape in kwargs['input_shapes']: + input_shapes_str.append('[' + ','.join([str(i) for i in ishape]) + ']') + mo_params.update(dict(input_shape=','.join(input_shapes_str))) + + if 'input_names' in kwargs and len(kwargs['input_names']): + mo_params.update(dict(input=','.join(kwargs['input_names']))) + + exit_code, stderr = generate_ir(**mo_params) + + del os.environ['MO_ENABLED_TRANSFORMS'] + del os.environ['MO_DISABLED_TRANSFORMS'] + assert not exit_code, ("IR generation failed with {} exit code: {}".format(exit_code, stderr)) + + path_to_xml = Path(temp_dir, 'model.xml') + path_to_bin = Path(temp_dir, 'model.bin') + + ir = IREngine(path_to_xml, path_to_bin, precision=precision) + if ref_net is not None: + (flag, resp) = ir.compare(ref_net) + assert flag, '\n'.join(resp) + + # Prepare feed dict + if 'kwargs_to_prepare_input' in kwargs and kwargs['kwargs_to_prepare_input']: + inputs_dict = self._prepare_input(ir.get_inputs(), kwargs['kwargs_to_prepare_input']) + else: + inputs_dict = self._prepare_input(ir.get_inputs()) + + # IE infer: + ie_engine = IEInfer(model=path_to_xml, + weights=path_to_bin, + device=ie_device) + infer_res = ie_engine.infer(input_data=inputs_dict) + + if hasattr(self, 'skip_framework') and self.skip_framework: + warnings.warn('Framework is skipped') + return + + # Framework infer: + fw_res = self.get_framework_results(inputs_dict=inputs_dict, model_path=model_path) + + if len(fw_res) == len(infer_res) == 1: + # match output layers directly + mapping_dict = {next(iter(fw_res)): next(iter(infer_res))} + else: + # Load mapping file + mapping_dict = mapping_parser(path_to_xml.with_suffix('.mapping')) + + if 'custom_eps' in kwargs and kwargs['custom_eps'] is not None: + custom_eps = kwargs['custom_eps'] + else: + custom_eps = 1e-4 + + # Compare Ie results with Framework results + fw_eps = custom_eps if precision == 'FP32' else 5e-2 + assert self.compare_ie_results_with_framework(infer_res=infer_res, framework_res=fw_res, + mapping_dict=mapping_dict, framework_eps=fw_eps), \ + "Comparing with Framework failed: ie_res={}; framework_res={}.".format(infer_res, fw_res) + + # Feed dict for each input is filled with random number. + # It is possible to redefine this function and generate your own input + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(-255, 255, inputs_dict[input]).astype(np.float32) + return inputs_dict + + def compare_ie_results_with_framework(self, infer_res, framework_res, mapping_dict, framework_eps): + is_ok = True + from common.utils.common_utils import allclose + for framework_out_name in framework_res: + if framework_out_name not in mapping_dict: + raise RuntimeError("Output {} not found in mapping file!".format(framework_out_name)) + + ie_out_name = mapping_dict[framework_out_name] + + if not allclose(infer_res[ie_out_name], framework_res[framework_out_name], atol=framework_eps, + rtol=framework_eps): + is_ok = False + print("Max diff is {}".format( + np.array(abs(infer_res[ie_out_name] - framework_res[framework_out_name])).max())) + else: + print("Accuracy validation successful!\n") + print("absolute eps: {}, relative eps: {}".format(framework_eps, framework_eps)) + return is_ok + + +def get_params(ie_device=None, precision=None): + """ + :param ie_device: list of devices + :param precision: list of precisions + """ + + ie_device_params = ie_device if ie_device else test_device + precision_params = precision if precision else test_precision + + test_args = [] + for element in itertools.product(ie_device_params, precision_params): + if element[0] == 'CPU' and element[1] == 'FP16': + continue + test_args.append(element) + return test_args + + +def check_ir_version(left, right, ir_version): + try: + _ir_version = int(ir_version) + except ValueError: + raise RuntimeError("Wrong ir version type: {}, must be an integer".format(ir_version)) + left_bound = _ir_version - 1 if left is None else left + right_bound = _ir_version + 1 if right is None else right + return left_bound <= _ir_version < right_bound diff --git a/tests/layer_tests/common/layer_utils.py b/tests/layer_tests/common/layer_utils.py new file mode 100644 index 00000000000..a933ac719aa --- /dev/null +++ b/tests/layer_tests/common/layer_utils.py @@ -0,0 +1,58 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import subprocess +import sys + +from common.utils.multiprocessing_utils import multiprocessing_run + + +def shell(cmd, env=None, cwd=None): + if sys.platform.startswith('linux') or sys.platform == 'darwin': + cmd = ['/bin/bash', '-c', "".join(cmd)] + else: + cmd = "".join(cmd) + p = subprocess.Popen(cmd, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = p.communicate() + return p.returncode, stdout, stderr + + +class BaseInfer: + def __init__(self, name): + self.name = name + self.res = None + + def fw_infer(self, input_data): + raise RuntimeError("This is base class, please implement infer function for the specific framework") + + def infer(self, input_data): + self.res = multiprocessing_run(self.fw_infer, [input_data], self.name, timeout=60) + return self.res + + +class IEInfer(BaseInfer): + def __init__(self, model, weights, device): + super().__init__('Inference Engine') + self.device = device + self.model = model + self.weights = weights + + def fw_infer(self, input_data): + from openvino.inference_engine import IECore, get_version as ie_get_version + + print("Inference Engine version: {}".format(ie_get_version())) + print("Creating IE Core Engine...") + ie = IECore() + print("Reading network files") + net = ie.read_network(self.model, self.weights) + print("Loading network") + exec_net = ie.load_network(net, self.device) + print("Starting inference") + result = exec_net.infer(input_data) + + if "exec_net" in locals(): + del exec_net + if "ie" in locals(): + del ie + + return result diff --git a/tests/layer_tests/common/onnx_layer_test_class.py b/tests/layer_tests/common/onnx_layer_test_class.py new file mode 100644 index 00000000000..ff97718f1c0 --- /dev/null +++ b/tests/layer_tests/common/onnx_layer_test_class.py @@ -0,0 +1,63 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from common.layer_test_class import CommonLayerTest +from common.layer_utils import BaseInfer + + +def save_to_onnx(onnx_model, path_to_saved_onnx_model): + import onnx + path = os.path.join(path_to_saved_onnx_model, 'model.onnx') + onnx.save(onnx_model, path) + assert os.path.isfile(path), "model.onnx haven't been saved here: {}".format(path_to_saved_onnx_model) + return path + + +class Caffe2OnnxLayerTest(CommonLayerTest): + def produce_model_path(self, framework_model, save_path): + return save_to_onnx(framework_model, save_path) + + def get_framework_results(self, inputs_dict, model_path): + # Evaluate model via Caffe2 and IE + # Load the ONNX model + import onnx + model = onnx.load(model_path) + # Run the ONNX model with Caffe2 + import caffe2.python.onnx.backend + caffe2_res = caffe2.python.onnx.backend.run_model(model, inputs_dict) + res = dict() + for field in caffe2_res._fields: + res[field] = caffe2_res[field] + return res + + +class OnnxRuntimeInfer(BaseInfer): + def __init__(self, net): + super().__init__('OnnxRuntime') + self.net = net + + def fw_infer(self, input_data): + import onnxruntime as rt + + sess = rt.InferenceSession(self.net) + out = sess.run(None, input_data) + result = dict() + for i, output in enumerate(sess.get_outputs()): + result[output.name] = out[i] + + if "sess" in locals(): + del sess + + return result + + +class OnnxRuntimeLayerTest(CommonLayerTest): + def produce_model_path(self, framework_model, save_path): + return save_to_onnx(framework_model, save_path) + + def get_framework_results(self, inputs_dict, model_path): + ort = OnnxRuntimeInfer(net=model_path) + res = ort.infer(input_data=inputs_dict) + return res diff --git a/tests/layer_tests/common/tf_layer_test_class.py b/tests/layer_tests/common/tf_layer_test_class.py new file mode 100644 index 00000000000..ab19c29e970 --- /dev/null +++ b/tests/layer_tests/common/tf_layer_test_class.py @@ -0,0 +1,61 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from common.layer_test_class import CommonLayerTest + +from common.utils.tf_utils import summarize_graph + + +def save_to_pb(tf_model, path_to_saved_tf_model): + import tensorflow as tf + tf.io.write_graph(tf_model, path_to_saved_tf_model, 'model.pb', False) + assert os.path.isfile(os.path.join(path_to_saved_tf_model, 'model.pb')), "model.pb haven't been saved " \ + "here: {}".format(path_to_saved_tf_model) + return os.path.join(path_to_saved_tf_model, 'model.pb') + + +class CommonTFLayerTest(CommonLayerTest): + def produce_model_path(self, framework_model, save_path): + return save_to_pb(framework_model, save_path) + + def get_framework_results(self, inputs_dict, model_path): + # Evaluate model via Tensorflow and IE + # Load the Tensorflow model + import tensorflow as tf + from tensorflow.python.platform import gfile + + graph_summary = summarize_graph(model_path=model_path) + outputs_list = graph_summary["outputs"] + + tf.compat.v1.reset_default_graph() + + with tf.compat.v1.Session() as sess: + with gfile.FastGFile(model_path, 'rb') as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + sess.graph.as_default() + tf.compat.v1.import_graph_def(graph_def, name='') + + input = dict() + for key in inputs_dict.keys(): + data = inputs_dict.get(key) + if len(data.shape) == 4: # reshaping for 4D tensors + input[key+':0'] = data.transpose(0, 2, 3, 1) + elif len(data.shape) == 5: # reshaping for 5D tensors + input[key+':0'] = data.transpose(0, 2, 3, 4, 1) + else: + input[key+':0'] = data + tf_res = sess.run([out + ":0" for out in outputs_list], input) + + result = dict() + for i, output in enumerate(outputs_list): + _tf_res = tf_res[i] + if len(_tf_res.shape) == 4: # reshaping for 4D tensors + result[output] = _tf_res.transpose(0, 3, 1, 2) # 2, 0, 1 + elif len(_tf_res.shape) == 5: # reshaping for 5D tensors + result[output] = _tf_res.transpose(0, 4, 1, 2, 3) # 3, 0, 1, 2 + else: + result[output] = _tf_res + return result diff --git a/tests/layer_tests/common/utils/common_utils.py b/tests/layer_tests/common/utils/common_utils.py new file mode 100644 index 00000000000..a487573afd8 --- /dev/null +++ b/tests/layer_tests/common/utils/common_utils.py @@ -0,0 +1,84 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os +import subprocess +import sys + +import numpy as np + + +logger = logging.getLogger(__name__) + + +def generate_ir(coverage=False, **kwargs): + # Get default mo args + mo = os.path.join(os.environ.get("MO_ROOT"), "mo.py") + if coverage: + params = [sys.executable, '-m', 'coverage', 'run', '-p', '--source={}'.format(os.environ.get("MO_ROOT")), + '--omit=*_test.py', mo] + else: + params = [sys.executable, mo] + for key, value in kwargs.items(): + if key == "batch": + params.extend(("-b", str(value))) + elif key == "k": + params.extend(("-k", str(value))) + elif isinstance(value, bool) and value: + params.append("--{}".format(key)) + elif isinstance(value, bool) and not value: + continue + elif (isinstance(value, tuple) and value) or (isinstance(value, str)): + params.extend(("--{}".format(key), str('"{}"'.format(value)))) + elif (key == "mean_values" and (' ' in value or '(' in value)): + params.extend(("--{}".format(key), str('"{}"'.format(value)))) + else: + params.extend(("--{}".format(key), str(value))) + exit_code, stdout, stderr = shell(params) + logger.info("Model Optimizer out:\n{}".format(stdout)) + logger.error(stderr) + return exit_code, stderr + + +def shell(cmd, env=None, cwd=None, out_format="plain"): + """ + Run command execution in specified environment + :param cmd: list containing command and its parameters + :param env: set of environment variables to set for this command + :param cwd: working directory from which execute call + :param out_format: 'plain' or 'html'. If 'html' all '\n; symbols are replaced by '
' tag + :return: + """ + if sys.platform.startswith('linux') or sys.platform == 'darwin': + cmd = ['/bin/bash', '-c', "unset OMP_NUM_THREADS; " + " ".join(cmd)] + else: + cmd = " ".join(cmd) + + sys.stdout.write("Running command:\n" + "".join(cmd) + "\n") + p = subprocess.Popen(cmd, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (stdout, stderr) = p.communicate() + stdout = str(stdout.decode('utf-8')) + stderr = str(stderr.decode('utf-8')) + if out_format == "html": + stdout = "
\n".join(stdout.split('\n')) + stderr = "
\n".join(stderr.split('\n')) + return p.returncode, stdout, stderr + + +def allclose(cur_array, ref_array, atol, rtol): + """ + Comparison of abs_diff and rel_diff with tolerances for every values of corresponding elements. + If (abs_diff < atol) or (rel_diff < rtol) for every element, comparison of elements will pass, else will fail. + Note: if value is very small, firstly abs_diff will be used. If value is huge, abs_diff may be failed, + and rel_diff will be used. So if tensor has small and huge values, need to compare every value + with abs_diff and rel_diff instead of using one of it for the whole array. + :param cur_array: tensor from IE + :param ref_array: tensor from FW + :param atol: absolute tolerance (threshold for absolute difference) + :param rtol: relative tolerance (threshold for relative difference) + :return: bool value means that values of tensors are equal with tolerance or not + """ + abs_diff = np.absolute(cur_array - ref_array) + max_val = np.maximum(np.absolute(cur_array), np.absolute(ref_array)) + return ((abs_diff < atol) | (abs_diff < rtol * max_val)).all() diff --git a/tests/layer_tests/common/utils/multiprocessing_utils.py b/tests/layer_tests/common/utils/multiprocessing_utils.py new file mode 100644 index 00000000000..8d5293476d2 --- /dev/null +++ b/tests/layer_tests/common/utils/multiprocessing_utils.py @@ -0,0 +1,99 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging as log +import os +import platform +import signal +import sys +import traceback +from multiprocessing import Process, Queue, TimeoutError, ProcessError +from queue import Empty as QueueEmpty +from typing import Callable, Union + +if platform.system() == "Darwin": + # Fix for MacOS + import multiprocessing + multiprocessing.set_start_method("forkserver", True) + + +def _mp_wrapped_func(func: Callable, func_args: list, queue: Queue, logger_queue: Queue): + """ + Wraps callable object with exception handling. Current wrapper is a target for + `multiprocessing_run` function + :param func: see `multiprocessing_run` + :param func_args: see `multiprocessing_run` + :param queue: multiprocessing.Queue(). Used for getting callable object return values + :param logger_queue: multiprocessing.Queue(). Used for getting logs from child process in parent process + :return: + """ + + error_message = "" + res = None + try: + res = func(*func_args) + except: + ex_type, ex_value, tb = sys.exc_info() + error_message = "{tb}\n{ex_type}: {ex_value}".format(tb=''.join(traceback.format_tb(tb)), + ex_type=ex_type.__name__, ex_value=ex_value) + queue.put((error_message, res)) + + +def multiprocessing_run(func: Callable, func_args: list, func_log_name: str, timeout: Union[int, None] = None): + """ + Wraps callable object to a separate process using multiprocessing module + :param func: callable object + :param func_args: list of arguments for callable + :param func_log_name: name of callable used for logging + :param timeout: positive int to limit execution time + :return: return value (or values) from callable object + """ + queue = Queue() + logger_queue = Queue(-1) + process = Process(target=_mp_wrapped_func, args=(func, func_args, queue, logger_queue)) + process.start() + try: + error_message, *ret_args = queue.get(timeout=timeout) + except QueueEmpty: + raise TimeoutError("{func} running timed out!".format(func=func_log_name)) + finally: + queue.close() + + # Extract logs from Queue and pass to root logger + while not logger_queue.empty(): + rec = logger_queue.get() + log.getLogger().handle(rec) + logger_queue.close() + + if process.is_alive(): + process.terminate() + process.join() + else: + exit_signal = multiprocessing_exitcode_to_signal(process.exitcode) + if exit_signal: + raise ProcessError( + "{func} was killed with a signal {signal}".format(func=func_log_name, signal=exit_signal)) + + if error_message: + raise ProcessError("\n{func} running failed: \n{msg}".format(func=func_log_name, msg=error_message)) + + ret_args = ret_args[0] if len(ret_args) == 1 else ret_args # unwrap from list if only 1 item is returned + return ret_args + + +def multiprocessing_exitcode_to_signal(exitcode): + """ + Map multiprocessing exitcode to signals from "signal" module + :param exitcode: multiprocessing exitcode + :return: signal from "signal" if exitcode mapped on signal or None + """ + # Multiprocessing return negative values of signal of the process, but on Win they are positive. + # Bring the value to the positive format. + exit_code = exitcode if os.name == "nt" else -exitcode + if exit_code > 0: + code_map = {int(getattr(signal, sig)): str(getattr(signal, sig)) + for sig in dir(signal) if sig.startswith("SIG")} + exit_signal = code_map[exit_code] if exit_code in code_map else exit_code + else: + exit_signal = None + return exit_signal diff --git a/tests/layer_tests/common/utils/parsers.py b/tests/layer_tests/common/utils/parsers.py new file mode 100644 index 00000000000..e5e97727c6a --- /dev/null +++ b/tests/layer_tests/common/utils/parsers.py @@ -0,0 +1,28 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import xml.etree.ElementTree + + +def mapping_parser(file): + """ + Parse mapping file if it exists + :param file: Name of mapping file + :return: Dictionary with framework layers as keys and IR layers as values + """ + mapping_dict = {} + if os.path.splitext(file)[1] == '.mapping' and os.path.isfile(file): + xml_tree = xml.etree.ElementTree.parse(file) + xml_root = xml_tree.getroot() + for child in xml_root: + framework_info = child.find('.//framework') + ir_info = child.find('.//IR') + if framework_info is None: + continue + framework_name = framework_info.attrib['name'] + ir_name = ir_info.attrib['name'] if ir_info is not None else None + mapping_dict[framework_name] = ir_name + else: + raise FileNotFoundError("Mapping file was not found at path {}!".format(os.path.dirname(file))) + return mapping_dict diff --git a/tests/layer_tests/common/utils/tf_utils.py b/tests/layer_tests/common/utils/tf_utils.py new file mode 100644 index 00000000000..7900586390c --- /dev/null +++ b/tests/layer_tests/common/utils/tf_utils.py @@ -0,0 +1,109 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import re + +import tensorflow as tf +import numpy as np + + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + + +def load_graph(model_file, output_nodes_for_freeze=None): + is_meta = os.path.splitext(model_file)[-1] == ".meta" + + tf.compat.v1.reset_default_graph() + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() if not is_meta else tf.compat.v1.MetaGraphDef() + + with open(model_file, "rb") as f: + graph_def.ParseFromString(f.read()) + + nodes_to_clear_device = graph_def.node if isinstance(graph_def, tf.compat.v1.GraphDef) else graph_def.graph_def.node + for node in nodes_to_clear_device: + node.device = "" + + if is_meta: + with tf.compat.v1.Session() as sess: + restorer = tf.compat.v1.train.import_meta_graph(graph_def) + restorer.restore(sess, re.sub('\.meta$', '', model_file)) + graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(sess, graph_def.graph_def, output_nodes_for_freeze) + + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + + +def collect_tf_references(model_path, feed_dict, out_layer, output_nodes_for_freeze=None): + _feed_dict = dict() + + graph = load_graph(model_path, output_nodes_for_freeze) + output_tensors_list = list() + outputs_list = list() + for input in feed_dict: + input_node = [node for node in graph.as_graph_def().node if node.name == input][0] + if input_node.op == "Placeholder": + tensor = graph.get_tensor_by_name(input + ":0") + _feed_dict[tensor] = feed_dict[input] + else: + for parrent_input in input_node.input: + in_node = [node for node in graph.as_graph_def().node if node.name == parrent_input][0] + if in_node.op in ['Const', 'Assign', 'NoOp', 'Assert']: + continue + else: + tensor = graph.get_tensor_by_name(parrent_input + ":0") + _feed_dict[tensor] = feed_dict[input] + + for output in out_layer: + tensor = graph.get_tensor_by_name(output + ":0") + output_tensors_list.append(tensor) + outputs_list.append(output) + with graph.as_default(): + with tf.compat.v1.Session(graph=graph) as sess: + outputs = sess.run(output_tensors_list, feed_dict=_feed_dict) + out_dict = dict(zip(outputs_list, outputs)) + return out_dict + + +def children(op, graph): + op = graph.get_operation_by_name(op) + return set(op for out in op.outputs for op in out.consumers()) + + +def summarize_graph(model_path, output_nodes_for_freeze=None, reshape_net=None): + placeholders = dict() + variables = list() + outputs = list() + graph = load_graph(model_path, output_nodes_for_freeze) + unlikely_output_types = ['Const', 'Assign', 'NoOp', 'Placeholder', 'Assert', 'switch_t', 'switch_f'] + for node in graph.as_graph_def().node: + if node.op == 'Placeholder': + node_dict = dict() + node_dict['type'] = tf.DType(node.attr['dtype'].type).name + node_dict['shape'] = str(node.attr['shape'].shape.dim).replace('\n', '').replace(' ', '').replace( + 'size:', '').replace('[', '').replace(']', '') + node_dict['shape'] = tuple(map(lambda x: int(x), node_dict['shape'].split(','))) + placeholders[node.name] = node_dict + if node.op == "Variable" or node.op == "VariableV2": + variables.append(node.name) + if len(children(node.name, graph)) == 0: + if node.op not in unlikely_output_types and node.name.split('/')[-1] not in unlikely_output_types: + outputs.append(node.name) + result = dict() + result['inputs'] = placeholders + result['outputs'] = outputs + + if reshape_net: + out_layer = list(result['inputs'].keys()) + result['outputs'] + feed_dict = {} + for inputl in reshape_net: + feed_dict.update({inputl: np.ones(shape=reshape_net[inputl])}) + scoring_res = collect_tf_references(model_path=model_path, feed_dict=feed_dict, out_layer=out_layer) + for layer in scoring_res: + if layer in result['inputs']: + result['inputs'][layer]['shape'] = scoring_res[layer].shape + + return result diff --git a/tests/layer_tests/conftest.py b/tests/layer_tests/conftest.py new file mode 100644 index 00000000000..350fd22a3b2 --- /dev/null +++ b/tests/layer_tests/conftest.py @@ -0,0 +1,77 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import re +from pathlib import Path +import tempfile + +import pytest +from common import constants + + +def pytest_make_parametrize_id(config, val, argname): + return " {0}:{1} ".format(argname, val) + + +def pytest_collection_modifyitems(items): + def remove_ignored_attrs(ref_dict, dict_to_upd): + _dict_to_upd = dict_to_upd.copy() + for key, value in dict_to_upd.items(): + if key not in ref_dict.keys(): + _dict_to_upd.pop(key) + elif isinstance(value, dict): + _dict_to_upd[key] = remove_ignored_attrs(ref_dict[key], value) + return _dict_to_upd + + for test in items: + special_marks = [mark for mark in test.own_markers if "special_" in mark.name] + for mark in special_marks: + if mark.name == "special_xfail": + params = test.callspec.params + # Remove items from params if key of item is not in mark.kwargs["args"]. + # Remaining items will be used to mark test cases that contain them. + # It is required to specify in mark only valuable parameters + # (e.g. {"device": "FP16"} will mean that for all test cases with FP16 test will be marked) + params = remove_ignored_attrs(mark.kwargs["args"], params) + if mark.kwargs["args"] == params: + test.add_marker(pytest.mark.xfail(reason=mark.kwargs["reason"])) + + +@pytest.mark.hookwrapper +def pytest_runtest_makereport(item, call): + pytest_html = item.config.pluginmanager.getplugin('html') + outcome = yield + report = outcome.get_result() + extra = getattr(report, 'extra', []) + if report.when == 'call': + xfail_reason = getattr(report, 'wasxfail', None) + if report.skipped and xfail_reason: + jira_ticket_nums = re.findall(r"\*-\d+", xfail_reason) + for ticket_num in jira_ticket_nums: + extra.append(pytest_html.extras.url(ticket_num)) + report.extra = extra + + +def pytest_addoption(parser): + """Specify command-line options for all plugins""" + parser.addoption( + "--ir_version", + required=True, + action="store", + help="Version of IR to generate by Model Optimizer") + + +@pytest.fixture(scope="session") +def ir_version(request): + """Fixture function for command-line option.""" + return request.config.getoption('ir_version') + + +@pytest.fixture(scope="function") +def temp_dir(request): + """Create directory for test purposes.""" + Path(constants.out_path).mkdir(parents=True, exist_ok=True) + test_name = re.sub(r"[^\w_]", "_", request.node.originalname) + device = request.node.funcargs["ie_device"].upper() + temp_dir = tempfile.TemporaryDirectory(dir=constants.out_path, prefix=f"{device}_{test_name}") + yield str(temp_dir.name) diff --git a/tests/layer_tests/onnx_tests/conftest.py b/tests/layer_tests/onnx_tests/conftest.py new file mode 100644 index 00000000000..bbf635f5680 --- /dev/null +++ b/tests/layer_tests/onnx_tests/conftest.py @@ -0,0 +1,12 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import inspect + +from common.layer_test_class import get_params + + +def pytest_generate_tests(metafunc): + test_gen_attrs_names = list(inspect.signature(get_params).parameters) + params = get_params() + metafunc.parametrize(test_gen_attrs_names, params, scope="function") diff --git a/tests/layer_tests/onnx_tests/test_abs.py b/tests/layer_tests/onnx_tests/test_abs.py new file mode 100644 index 00000000000..577965df82a --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_abs.py @@ -0,0 +1,178 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestAbs(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Abs->Output => Input->Abs + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Abs', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Abs'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+abs const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randint(-127, 127, shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Abs', + inputs=['const'], + outputs=['abs'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'abs'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = abs(constant) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_abs(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_abs_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_add_sub_mul_div.py b/tests/layer_tests/onnx_tests/test_add_sub_mul_div.py new file mode 100644 index 00000000000..9076ce8e650 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_add_sub_mul_div.py @@ -0,0 +1,337 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestOperations(Caffe2OnnxLayerTest): + def create_net(self, shape1, shape2, op, precision, ir_version, opset=None): + """ + ONNX net IR net + + Input->Add/Mul with const->Output => Input->Eltwise + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + if op not in ['Add', 'Sub', 'Mul', 'Div']: + raise ValueError("Operation has to be either Add or Mul or Sub or Div") + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape1) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape1) + + min_val = 1 if op == 'Div' else -127 + if shape2: + const = np.random.randint(min_val, 127, shape2).astype(np.float) + else: + const = np.random.randint(min_val, 127, 1).astype(np.float) + # TODO: add check when MO remove redundant layer (as Add/Sub if const = 0 or Mul/Div if const = 1) + if const in [0, 1]: + const = np.array([2], dtype=np.float) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = helper.make_node( + op, + inputs=['input', 'const'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # Create reference IR net + if op == 'Div': + const = np.power(const, -1) + elif op == 'Sub': + const = -const + + ref_net = None + + return onnx_net, ref_net + + def create_net_const(self, shape1, shape2, op, precision, ir_version, opset=None): + """ + ONNX net IR net + + Input->Concat with two added/multiplied consts->Output => Input->Concat + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + if op not in ['Add', 'Sub', 'Mul', 'Div']: + raise ValueError("op has to be either Add or Mul") + + concat_axis = 0 + output_shape = list(shape1) + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape1) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const1 = np.random.randint(-127, 127, shape1).astype(np.float) + min_val = 1 if op == 'Div' else -127 + if shape2: + const2 = np.random.randint(min_val, 127, shape2).astype(np.float) + else: + const2 = np.random.randint(min_val, 127, 1).astype(np.float) + + node_const1_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const1.shape, + vals=const1.flatten(), + ), + ) + + node_const2_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const2.shape, + vals=const2.flatten(), + ), + ) + + node_def = helper.make_node( + op, + inputs=['const1', 'const2'], + outputs=['node_out'] + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'node_out'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const1_def, node_const2_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # Create reference IR net + if op == 'Add': + constant_calculated = np.add(const1, const2) + elif op == 'Sub': + constant_calculated = np.subtract(const1, const2) + elif op == 'Mul': + constant_calculated = np.multiply(const1, const2) + elif op == 'Div': + constant_calculated = np.divide(const1, const2) + + if precision == 'FP16': + constant_calculated = constant_calculated.astype(np.float16) + + ref_net = None + + return onnx_net, ref_net + + test_data_precommit = [dict(shape1=[2, 4], shape2=[2, 4]), + # scalar cases + dict(shape1=[2, 4], shape2=None)] + + test_data = [dict(shape1=[4, 6], shape2=[4, 6]), + dict(shape1=[4, 6, 8], shape2=[4, 6, 8]), + dict(shape1=[4, 6, 8, 10], shape2=[4, 6, 8, 10]), + dict(shape1=[4, 6, 8, 10, 12], shape2=[4, 6, 8, 10, 12]), + # scalar cases + dict(shape1=[4, 6], shape2=None), + dict(shape1=[4, 6, 8], shape2=None), + dict(shape1=[4, 6, 8, 10], shape2=None), + dict(shape1=[4, 6, 8, 10, 12], shape2=None)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_add(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Add', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_add_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Add', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sub(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Sub', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sub_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Sub', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_mul(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Mul', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_mul_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Mul', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_div(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Div', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_div_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Div', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_add_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Add', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_add_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Add', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_sub_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Sub', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_sub_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Sub', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_mul_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Mul', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_mul_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Mul', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_div_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Div', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_div_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Div', precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_add_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Add', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_add_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Add', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sub_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Sub', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sub_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Sub', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_mul_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Mul', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_mul_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Mul', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_div_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, op='Div', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_div_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, op='Div', precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_and.py b/tests/layer_tests/onnx_tests/test_and.py new file mode 100644 index 00000000000..115fe1e706d --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_and.py @@ -0,0 +1,268 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestAnd(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(0, 2, inputs_dict[input]).astype(np.bool) + return inputs_dict + + def create_net(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->And with 2nd input->Output => Input->LogicalAnd + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input1 = helper.make_tensor_value_info('input1', TensorProto.BOOL, shape1) + input2 = helper.make_tensor_value_info('input2', TensorProto.BOOL, shape2) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape1) + + node_def = helper.make_node( + 'And', + inputs=['input1', 'input2'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input1, input2], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input1': {'kind': 'op', 'type': 'Parameter'}, + 'input1_data': {'shape': shape1, 'kind': 'data'}, + 'input2': {'kind': 'op', 'type': 'Parameter'}, + 'input2_data': {'shape': shape2, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalAnd'}, + 'node_data': {'shape': shape1, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input1', 'input1_data'), + ('input2', 'input2_data'), + ('input1_data', 'node'), + ('input2_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_one_const(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->And with const->Output => Input->LogicalAnd + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape1) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape1) + + const = np.random.randint(0, 2, shape2).astype(np.bool) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = helper.make_node( + 'And', + inputs=['input', 'const'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape1, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': const.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': const.shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalAnd'}, + 'node_data': {'shape': shape1, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Concat with const and const->Output => Input->Concat + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = list(shape1) + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape1) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, output_shape) + + const1 = np.random.randint(0, 2, shape1).astype(np.bool) + const2 = np.random.randint(0, 2, shape2).astype(np.bool) + + node_const1_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const1.shape, + vals=const1.flatten(), + ), + ) + + node_const2_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const2.shape, + vals=const2.flatten(), + ), + ) + + node_def = helper.make_node( + 'And', + inputs=['const1', 'const2'], + outputs=['node_out'] + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'node_out'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const1_def, node_const2_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + constant_calculated = np.logical_and(const1, const2) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': const1.shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant_calculated.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': const1.shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data = [dict(shape1=[4, 6], shape2=[4, 6]), + dict(shape1=[4, 6, 8], shape2=[4, 6, 8]), + dict(shape1=[4, 6, 8, 10], shape2=[4, 6, 8, 10]), + dict(shape1=[4, 6, 8, 10, 12], shape2=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_and(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_and_one_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_one_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_and_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_argmax.py b/tests/layer_tests/onnx_tests/test_argmax.py new file mode 100644 index 00000000000..476a06d187b --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_argmax.py @@ -0,0 +1,153 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestArgMax(OnnxRuntimeLayerTest): + def create_net(self, shape, axis, keepdims, ir_version): + """ + ONNX net IR net + + Input->ArgMax->Output => Input->TopK + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + output_shape = shape.copy() + output_shape[axis if axis is not None else 0] = 1 + output_shape_squeeze = output_shape.copy() + if keepdims == 0: + output_shape_squeeze.remove(1) + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.INT64, output_shape_squeeze) + + const = np.random.randint(-10, 10, output_shape_squeeze).astype(np.int64) + + args = dict() + if axis is not None: + args['axis'] = axis + else: + axis = 0 + if keepdims is not None: + args['keepdims'] = keepdims + node_def = onnx.helper.make_node( + 'ArgMax', + inputs=['input'], + outputs=['argmax' if keepdims is None or keepdims == 1 else 'output'], + **args + ) + edges = [node_def] + + if keepdims is None or keepdims == 1: + node_flatten_def = onnx.helper.make_node( + 'Flatten', + inputs=['argmax'], + outputs=['output'] + ) + edges.append(node_flatten_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + edges, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'const_indata': {'shape': [1], 'kind': 'data'}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': [], 'kind': 'data'}, # TODO shape [] or [1] ?? + 'node': {'kind': 'op', 'type': 'TopK'}, + 'node_data': {'shape': output_shape, 'kind': 'data'}, + 'indices_data': {'shape': output_shape, 'kind': 'data'}, + 'result1': {'kind': 'op', 'type': 'Result'}, + 'result2': {'kind': 'op', 'type': 'Result'} + } + edges = [('input', 'input_data'), + ('const_indata', 'const'), + ('const', 'const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('node', 'node_data'), + ('node', 'indices_data'), + ('node_data', 'result1')] + if keepdims == 0: + nodes_attributes.update({'squeeze_const_indata': {'shape': [1], 'kind': 'data'}, + 'squeeze_const': {'kind': 'op', 'type': 'Const'}, + 'squeeze_const_data': {'shape': [1], 'kind': 'data'}, + 'squeeze': {'kind': 'op', 'type': 'Squeeze'}, + 'squeeze_data': {'shape': output_shape_squeeze, 'kind': 'data'} + }) + edges.extend([('squeeze_const_indata', 'squeeze_const'), + ('squeeze_const', 'squeeze_const_data'), + ('indices_data', 'squeeze'), + ('squeeze_const_data', 'squeeze'), + ('squeeze', 'squeeze_data'), + ('squeeze_data', 'result2')]) + else: + nodes_attributes.update( + {'flatten_const_indata': {'kind': 'data', 'value': [0, -1]}, + 'flatten_const': {'kind': 'op', 'type': 'Const'}, + 'flatten_const_data': {'shape': [2], 'kind': 'data'}, + 'flatten': {'kind': 'op', 'type': 'Reshape'}, + 'flatten_data': {'shape': [output_shape_squeeze[0], np.prod(output_shape_squeeze[1:])], + 'kind': 'data'} + }) + edges.extend([('indices_data', 'flatten'), + ('flatten_const_indata', 'flatten_const'), + ('flatten_const', 'flatten_const_data'), + ('flatten_const_data', 'flatten'), + ('flatten', 'flatten_data'), + ('flatten_data', 'result2')]) + + ref_net = build_graph(nodes_attributes, edges) + + return onnx_net, ref_net + + test_data = [ + dict(shape=[10, 12], axis=None), + dict(shape=[10, 12], axis=1), + dict(shape=[8, 10, 12], axis=None), + dict(shape=[8, 10, 12], axis=1), + dict(shape=[8, 10, 12], axis=2), + dict(shape=[6, 8, 10, 12], axis=None), + dict(shape=[6, 8, 10, 12], axis=1), + dict(shape=[6, 8, 10, 12], axis=2), + dict(shape=[6, 8, 10, 12], axis=3), + dict(shape=[4, 6, 8, 10, 12], axis=None), + dict(shape=[4, 6, 8, 10, 12], axis=1), + dict(shape=[4, 6, 8, 10, 12], axis=2), + dict(shape=[4, 6, 8, 10, 12], axis=3), + dict(shape=[4, 6, 8, 10, 12], axis=4)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keepdims", [None, 0]) + @pytest.mark.nightly + def test_argmax(self, params, keepdims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, keepdims=keepdims), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_bn.py b/tests/layer_tests/onnx_tests/test_bn.py new file mode 100644 index 00000000000..fb916f4a9aa --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_bn.py @@ -0,0 +1,133 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestBatchNormalization(Caffe2OnnxLayerTest): + def create_net(self, shape, epsilon, precision, ir_version, opset=None): + """ + ONNX net IR net + + Input->BatchNormalization->Output => Input->ScaleShift(Power) + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + scale_const = np.random.randint(128, 256, shape[1]).astype(np.float32) / 128. + bias_const = np.random.randint(0, 128, shape[1]).astype(np.float32) / 128. + mean_const = np.random.randint(-127, 127, shape[1]).astype(np.float32) + var_const = np.random.randint(128, 256, shape[1]).astype(np.float32) / 128. + + node_scale_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['scale_const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=scale_const.shape, + vals=scale_const.flatten(), + ), + ) + + node_bias_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['bias'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=bias_const.shape, + vals=bias_const.flatten(), + ), + ) + + node_mean_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['mean'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=mean_const.shape, + vals=mean_const.flatten(), + ), + ) + + node_var_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['var'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=var_const.shape, + vals=var_const.flatten(), + ), + ) + + args = dict(epsilon=epsilon) + if opset == 6: + args['is_test'] = 1; + node_def = helper.make_node( + 'BatchNormalization', + inputs=['input', 'scale_const', 'bias', 'mean', 'var'], + outputs=['output'], + **args + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_scale_def, node_bias_def, node_mean_def, node_var_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + test_data = [dict(shape=[1, 1, 4, 6], epsilon=0.001), + dict(shape=[1, 2, 4, 6], epsilon=0.001), + dict(shape=[2, 3, 4, 6], epsilon=0.001)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_bn(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_bn_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_bn_opset7(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=7, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_ceil.py b/tests/layer_tests/onnx_tests/test_ceil.py new file mode 100644 index 00000000000..cde48393375 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_ceil.py @@ -0,0 +1,194 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestCeil(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Ceil->Output => Input->Ceiling + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Ceil', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Ceiling'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+ceil const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randn(*shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Ceil', + inputs=['const1'], + outputs=['floor'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'floor'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.ceil(constant) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[2, 4, 6, 8, 10])] + + test_data = [ + dict(shape=[1, 2]), + dict(shape=[2, 4, 6]), + dict(shape=[2, 4, 6, 8]), + dict(shape=[2, 4, 6, 8, 10])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_ceil_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_ceil_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_ceil(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_ceil_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_clip.py b/tests/layer_tests/onnx_tests/test_clip.py new file mode 100644 index 00000000000..94b55bf80ef --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_clip.py @@ -0,0 +1,169 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestClip(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version, opset, min=None, max=None): + """ + ONNX net IR net + + Input->Clip->Output => Input->Clamp + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + nodes = [] + if opset < 11: + args = dict() + if min is not None: + args['min'] = min + if max is not None: + args['max'] = max + node_def = onnx.helper.make_node( + 'Clip', + inputs=['input'], + outputs=['output'], + **args + ) + nodes.append(node_def) + else: + clip_inputs = ['input'] + if min is not None: + node_min_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['min_const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=[], + vals=[min], + ), + ) + clip_inputs.append('min_const') + nodes.append(node_min_def) + else: + clip_inputs.append('') + if max is not None: + node_max_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['max_const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=[], + vals=[max], + ), + ) + clip_inputs.append('max_const') + nodes.append(node_max_def) + node_def = onnx.helper.make_node( + 'Clip', + inputs=clip_inputs, + outputs=['output'] + ) + nodes.append(node_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + if opset < 11 or min is not None and max is not None: + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Clamp', + 'min': min if min is not None else -3.4028235e+38, + 'max': max if max is not None else 3.4028235e+38}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + else: + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': [min] if min is not None else [max]}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': [], 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Minimum' if max is not None else 'Maximum'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[12], min=-3.5), + dict(shape=[12], max=3.5), + dict(shape=[12], min=-3.5, max=3.5), + dict(shape=[10, 12], min=-3.5), + dict(shape=[10, 12], max=3.5), + dict(shape=[10, 12], min=-3.5, max=3.5), + dict(shape=[8, 10, 12], min=-3.5), + dict(shape=[8, 10, 12], max=3.5), + dict(shape=[8, 10, 12], min=-3.5, max=3.5), + dict(shape=[6, 8, 10, 12], min=-3.5), + dict(shape=[6, 8, 10, 12], max=3.5), + dict(shape=[6, 8, 10, 12], min=-3.5, max=3.5), + dict(shape=[4, 6, 8, 10, 12], min=-3.5), + dict(shape=[4, 6, 8, 10, 12], max=3.5), + dict(shape=[4, 6, 8, 10, 12], min=-3.5, max=3.5)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_clip_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, opset=6), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_clip_opset11(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, opset=11), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_concat.py b/tests/layer_tests/onnx_tests/test_concat.py new file mode 100644 index 00000000000..1321ccda5b9 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_concat.py @@ -0,0 +1,225 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestConcat(Caffe2OnnxLayerTest): + # TODO Add test with default values (axis=0) + def create_concat_net_const(self, input_shape, output_shape, axis, ir_version): + """ + ONNX net IR net + + Input(const)----->Concat--------->Concat->Output => Input--->Concat + Input(const)-----' ' Const---' + Input-' + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] *= 2 + + const_number = np.prod(input_shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) + + # Output for concat + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + node_const1_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const1_tensor', + data_type=TensorProto.FLOAT, + dims=input_shape, + vals=constant, + ), + ) + + node_const2_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const2_tensor', + data_type=TensorProto.FLOAT, + dims=input_shape, + vals=constant, + ), + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['const1', 'const2'], + outputs=['output_concat'], + axis=axis + ) + + node_dyn_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'output_concat'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const1_def, node_const2_def, node_concat_def, node_dyn_concat_def], + 'test_concat_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_split_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + constant_reshape = np.reshape(constant, input_shape) + constant_reshape = np.concatenate([constant_reshape, constant_reshape], axis=axis) + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': output_shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant_reshape.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': output_shape, 'value': None, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': concat_output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_3D = [ + dict(input_shape=[1, 50, 50], + output_shape=[2, 50, 50], + axis=0), + + dict(input_shape=[2, 50, 50], + output_shape=[2, 100, 50], + axis=1), + + dict(input_shape=[4, 50, 50], + output_shape=[4, 50, 100], + axis=2), + ] + + test_data_4D_precommit = [ + dict(input_shape=[1, 32, 800, 800], + output_shape=[2, 32, 800, 800], + axis=0) + ] + + test_data_4D = [ + dict(input_shape=[1, 32, 800, 800], + output_shape=[2, 32, 800, 800], + axis=0), + + dict(input_shape=[4, 32, 80, 80], + output_shape=[4, 64, 80, 80], + axis=1), + + dict(input_shape=[2, 21, 80, 80], + output_shape=[2, 21, 160, 80], + axis=2), + + dict(input_shape=[3, 21, 80, 80], + output_shape=[3, 21, 80, 160], + axis=3), + ] + + test_data_5D_precommit = [ + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[2, 50, 50, 80, 60], + axis=0), + + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[1, 50, 50, 80, 120], + axis=4), + ] + + test_data_5D = [ + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[2, 50, 50, 80, 60], + axis=0), + + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[1, 100, 50, 80, 60], + axis=1), + + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[1, 50, 100, 80, 60], + axis=2), + + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[1, 50, 50, 160, 60], + axis=3), + + dict(input_shape=[1, 50, 50, 80, 60], + output_shape=[1, 50, 50, 80, 120], + axis=4), + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_concat_3D_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D_precommit) + @pytest.mark.precommit + def test_concat_4D_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_concat_4D_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D_precommit) + @pytest.mark.nightly + def test_concat_5D_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_concat_5D_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_conv.py b/tests/layer_tests/onnx_tests/test_conv.py new file mode 100644 index 00000000000..94774be4af9 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_conv.py @@ -0,0 +1,413 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestConv(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape, weights_shape, dilations, group, pads, strides, bias, ir_version, auto_pad=None): + """ + ONNX net IR net + + Input->Conv->Output => Input->Convolution + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + output_shape = np.array(shape) + output_shape[1] = group + _pads = np.array(pads).reshape([2, -1]) + kernel_extent = np.array(dilations) * (np.array(weights_shape[2:]) - 1) + 1 + spatial_val_wo_stride = shape[2:] + np.add(_pads[0, :], _pads[1, :]) - kernel_extent + output_shape[2:] = (spatial_val_wo_stride.astype(np.float) / strides + 1).astype(np.int64) + output_shape = output_shape.astype(np.int).tolist() + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + weights_const = np.random.randn(*weights_shape).astype(np.float32) + + node_weights_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['weights'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=weights_const.shape, + vals=weights_const.flatten(), + ), + ) + + conv_args = dict(kernel_shape=weights_shape[2:], + dilations=dilations, + group=group, + strides=strides) + if pads and auto_pad not in ['SAME_UPPER', 'SAME_LOWER']: + conv_args['pads'] = pads + if auto_pad: + conv_args['auto_pad'] = auto_pad + if bias: + bias_const = np.random.randint(-10, 10, weights_shape[0]).astype(np.float32) + + node_bias_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['bias'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=bias_const.shape, + vals=bias_const.flatten(), + ), + ) + node_def = onnx.helper.make_node( + 'Conv', + inputs=['input', 'weights', 'bias'], + outputs=['output'], + **conv_args + ) + nodes = [node_weights_def, node_bias_def, node_def] + else: + node_def = onnx.helper.make_node( + 'Conv', + inputs=['input', 'weights'], + outputs=['output'], + **conv_args + ) + nodes = [node_weights_def, node_def] + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + if len(shape) == 3: + input_shape = shape.copy() + input_shape.insert(2, 1) + node_shape = output_shape.copy() + node_shape.insert(2, 1) + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'before_shape_const_indata': {'shape': [len(input_shape)], 'value': input_shape, 'kind': 'data'}, + 'before_shape_const': {'kind': 'op', 'type': 'Const'}, + 'before_shape_const_data': {'shape': [len(input_shape)], 'kind': 'data'}, + 'reshape_before': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_before_data': {'shape': input_shape, 'kind': 'data'}, + 'kernel_indata': {'kind': 'data', 'shape': [len(weights_const.flatten())]}, + 'kernel': {'kind': 'op', 'type': 'Const'}, + 'kernel_data': {'kind': 'data', 'value': None}, + 'node': {'kind': 'op', 'type': 'Convolution' if group == 1 else 'GroupConvolution', + 'dilations': [1, dilations[0]], + 'pads_begin': [0, _pads[0, 0]], 'pads_end': [0, _pads[1, 0]]}, + 'node_data': {'shape': node_shape, 'kind': 'data'}, + 'after_shape_const_indata': {'shape': [len(output_shape)], 'value': output_shape, 'kind': 'data'}, + 'after_shape_const': {'kind': 'op', 'type': 'Const'}, + 'after_shape_const_data': {'shape': [len(output_shape)], 'kind': 'data'}, + 'reshape_after': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_after_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'}} + edges = [('input', 'input_data'), + ('input_data', 'reshape_before'), + ('before_shape_const_indata', 'before_shape_const'), + ('before_shape_const', 'before_shape_const_data'), + ('before_shape_const_data', 'reshape_before'), + ('reshape_before', 'reshape_before_data'), + ('reshape_before_data', 'node'), + ('kernel_indata', 'kernel'), + ('kernel', 'kernel_data'), + ('kernel_data', 'node'), + ('node', 'node_data'), + ('node_data', 'reshape_after'), + ('after_shape_const_indata', 'after_shape_const'), + ('after_shape_const', 'after_shape_const_data'), + ('after_shape_const_data', 'reshape_after'), + ('reshape_after', 'reshape_after_data')] + if bias: + nodes_attributes.update({'const_indata': {'kind': 'data', 'value': bias_const.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'kind': 'data', 'shape': None}, + 'bias': {'type': 'Add', 'kind': 'op'}, + 'bias_data': {'kind': 'data', 'shape': output_shape}}) + edges += [('reshape_after_data', 'bias'), + ('const_indata', 'const'), + ('const', 'const_data'), + ('const_data', 'bias'), + ('bias', 'bias_data'), + ('bias_data', 'result')] + else: + edges += [('reshape_after_data', 'result')] + ref_net = build_graph(nodes_attributes, edges) + else: + _weights_shape = weights_shape.copy() + if group != 1: + _weights_shape.insert(1, 1) + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'kernel_indata': {'kind': 'data', 'value': weights_const.flatten()}, + 'kernel': {'kind': 'op', 'type': 'Const'}, + 'kernel_data': {'kind': 'data', 'shape': _weights_shape}, + 'node': {'kind': 'op', 'type': 'Convolution' if group == 1 else 'GroupConvolution', + 'dilations': dilations, 'pads_begin': _pads[0, :], 'pads_end': _pads[1, :]}, + 'node_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'}} + edges = [('input', 'input_data'), + ('input_data', 'node'), + ('kernel_indata', 'kernel'), + ('kernel', 'kernel_data'), + ('kernel_data', 'node'), + ('node', 'node_data')] + + if bias: + nodes_attributes.update({'const_indata': {'kind': 'data', 'value': bias_const.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'kind': 'data', 'shape': None}, + 'bias': {'type': 'Add', 'kind': 'op'}, + 'bias_data': {'kind': 'data', 'shape': output_shape}}) + edges += [('node_data', 'bias'), + ('const_indata', 'const'), + ('const', 'const_data'), + ('const_data', 'bias'), + ('bias', 'bias_data'), + ('bias_data', 'result')] + else: + edges += [('node_data', 'result')] + + ref_net = build_graph(nodes_attributes, edges) + + return onnx_net, ref_net + + test_data_3D = [ + dict(weights_shape=[1, 3, 3], group=1), + dict(weights_shape=[1, 3, 5], group=1), + dict(weights_shape=[3, 1, 3], group=3), + dict(weights_shape=[3, 1, 5], group=3)] + + test_data_3D_autopad = [ + dict(weights_shape=[1, 3, 3], group=1, pads=[1, 1], strides=[1], dilations=[1]), + dict(weights_shape=[1, 3, 3], group=1, pads=[2, 2], strides=[1], dilations=[2]), + dict(weights_shape=[1, 3, 3], group=1, pads=[1, 1], strides=[2], dilations=[1]), + dict(weights_shape=[1, 3, 3], group=1, pads=[2, 2], strides=[2], dilations=[2]), + dict(weights_shape=[1, 3, 5], group=1, pads=[2, 2], strides=[1], dilations=[1]), + dict(weights_shape=[1, 3, 5], group=1, pads=[4, 4], strides=[1], dilations=[2]), + dict(weights_shape=[1, 3, 5], group=1, pads=[2, 2], strides=[2], dilations=[1]), + dict(weights_shape=[1, 3, 5], group=1, pads=[4, 4], strides=[2], dilations=[2]), + dict(weights_shape=[3, 1, 3], group=3, pads=[1, 1], strides=[1], dilations=[1]), + dict(weights_shape=[3, 1, 3], group=3, pads=[2, 2], strides=[1], dilations=[2]), + dict(weights_shape=[3, 1, 3], group=3, pads=[1, 1], strides=[2], dilations=[1]), + dict(weights_shape=[3, 1, 3], group=3, pads=[2, 2], strides=[2], dilations=[2]), + dict(weights_shape=[3, 1, 5], group=3, pads=[2, 2], strides=[1], dilations=[1]), + dict(weights_shape=[3, 1, 5], group=3, pads=[4, 4], strides=[1], dilations=[2]), + dict(weights_shape=[3, 1, 5], group=3, pads=[2, 2], strides=[2], dilations=[1]), + dict(weights_shape=[3, 1, 5], group=3, pads=[4, 4], strides=[2], dilations=[2])] + + test_data_4D_precommit = [ + dict(weights_shape=[1, 3, 3, 3], group=1), + dict(weights_shape=[3, 1, 3, 3], group=3)] + + test_data_4D = [ + dict(weights_shape=[1, 3, 3, 3], group=1), + dict(weights_shape=[1, 3, 5, 3], group=1), + dict(weights_shape=[3, 1, 3, 3], group=3), + dict(weights_shape=[3, 1, 3, 5], group=3)] + + test_data_4D_autopad = [ + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[1, 1, 1, 1], strides=[1, 1], dilations=[1, 1]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[2, 2, 2, 2], strides=[1, 1], dilations=[2, 2]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[3, 5, 3, 5], strides=[1, 1], dilations=[3, 5]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[1, 1, 1, 1], strides=[2, 2], dilations=[1, 1]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[2, 2, 2, 2], strides=[2, 2], dilations=[2, 2]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[3, 5, 3, 5], strides=[2, 2], dilations=[3, 5]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[1, 0, 1, 0], strides=[3, 5], dilations=[1, 1]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[2, 0, 2, 0], strides=[3, 5], dilations=[2, 2]), + dict(weights_shape=[1, 3, 3, 3], group=1, pads=[3, 3, 3, 3], strides=[3, 5], dilations=[3, 5]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[2, 1, 2, 1], strides=[1, 1], dilations=[1, 1]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[4, 2, 4, 2], strides=[1, 1], dilations=[2, 2]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[6, 5, 6, 5], strides=[1, 1], dilations=[3, 5]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[2, 1, 2, 1], strides=[2, 2], dilations=[1, 1]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[4, 2, 4, 2], strides=[2, 2], dilations=[2, 2]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[6, 5, 6, 5], strides=[2, 2], dilations=[3, 5]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[2, 0, 2, 0], strides=[3, 5], dilations=[1, 1]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[4, 0, 4, 0], strides=[3, 5], dilations=[2, 2]), + dict(weights_shape=[1, 3, 5, 3], group=1, pads=[6, 3, 6, 3], strides=[3, 5], dilations=[3, 5]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[1, 1, 1, 1], strides=[1, 1], dilations=[1, 1]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[2, 2, 2, 2], strides=[1, 1], dilations=[2, 2]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[3, 5, 3, 5], strides=[1, 1], dilations=[3, 5]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[1, 1, 1, 1], strides=[2, 2], dilations=[1, 1]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[2, 2, 2, 2], strides=[2, 2], dilations=[2, 2]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[3, 5, 3, 5], strides=[2, 2], dilations=[3, 5]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[1, 0, 1, 0], strides=[3, 5], dilations=[1, 1]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[2, 0, 2, 0], strides=[3, 5], dilations=[2, 2]), + dict(weights_shape=[3, 1, 3, 3], group=3, pads=[3, 3, 3, 3], strides=[3, 5], dilations=[3, 5]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[1, 2, 1, 2], strides=[1, 1], dilations=[1, 1]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[2, 4, 2, 4], strides=[1, 1], dilations=[2, 2]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[3, 10, 3, 10], strides=[1, 1], dilations=[3, 5]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[1, 2, 1, 2], strides=[2, 2], dilations=[1, 1]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[2, 4, 2, 4], strides=[2, 2], dilations=[2, 2]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[3, 10, 3, 10], strides=[2, 2], dilations=[3, 5]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[1, 0, 1, 0], strides=[3, 5], dilations=[1, 1]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[2, 2, 2, 2], strides=[3, 5], dilations=[2, 2]), + dict(weights_shape=[3, 1, 3, 5], group=3, pads=[3, 8, 3, 8], strides=[3, 5], dilations=[3, 5])] + + test_data_5D_precommit = [ + dict(weights_shape=[1, 3, 3, 3, 3], group=1), + dict(weights_shape=[3, 1, 3, 3, 3], group=3)] + + test_data_5D = [ + dict(weights_shape=[1, 3, 3, 3, 3], group=1), + dict(weights_shape=[1, 3, 3, 4, 5], group=1), + dict(weights_shape=[3, 1, 3, 3, 3], group=3), + dict(weights_shape=[3, 1, 5, 4, 3], group=3)] + + test_data_5D_autopad = [ + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[1, 1, 1, 1, 1, 1], strides=[1, 1, 1], dilations=[1, 1, 1]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[2, 2, 2, 2, 2, 2], strides=[1, 1, 1], dilations=[2, 2, 2]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[3, 4, 5, 3, 4, 5], strides=[1, 1, 1], dilations=[3, 4, 5]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[1, 1, 1, 1, 1, 1], strides=[2, 2, 2], dilations=[1, 1, 1]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[2, 2, 2, 2, 2, 2], strides=[2, 2, 2], dilations=[2, 2, 2]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[3, 4, 5, 3, 4, 5], strides=[2, 2, 2], dilations=[3, 4, 5]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[1, 1, 0, 1, 1, 0], strides=[3, 4, 5], dilations=[1, 1, 1]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[2, 2, 0, 2, 2, 0], strides=[3, 4, 5], dilations=[2, 2, 2]), + dict(weights_shape=[1, 3, 3, 3, 3], group=1, pads=[3, 4, 3, 3, 4, 3], strides=[3, 4, 5], dilations=[3, 4, 5]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[1, 1, 2, 1, 2, 2], strides=[1, 1, 1], dilations=[1, 1, 1]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[2, 3, 4, 2, 3, 4], strides=[1, 1, 1], dilations=[2, 2, 2]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[3, 6, 10, 3, 6, 10], strides=[1, 1, 1], dilations=[3, 4, 5]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[1, 1, 2, 1, 2, 2], strides=[2, 2, 2], dilations=[1, 1, 1]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[2, 3, 4, 2, 3, 4], strides=[2, 2, 2], dilations=[2, 2, 2]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[3, 6, 10, 3, 6, 10], strides=[2, 2, 2], dilations=[3, 4, 5]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[1, 1, 0, 1, 2, 0], strides=[3, 4, 5], dilations=[1, 1, 1]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[2, 3, 2, 2, 3, 2], strides=[3, 4, 5], dilations=[2, 2, 2]), + dict(weights_shape=[1, 3, 3, 4, 5], group=1, pads=[3, 6, 8, 3, 6, 8], strides=[3, 4, 5], dilations=[3, 4, 5]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[1, 1, 1, 1, 1, 1], strides=[1, 1, 1], dilations=[1, 1, 1]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[2, 2, 2, 2, 2, 2], strides=[1, 1, 1], dilations=[2, 2, 2]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[3, 4, 5, 3, 4, 5], strides=[1, 1, 1], dilations=[3, 4, 5]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[1, 1, 1, 1, 1, 1], strides=[2, 2, 2], dilations=[1, 1, 1]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[2, 2, 2, 2, 2, 2], strides=[2, 2, 2], dilations=[2, 2, 2]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[3, 4, 5, 3, 4, 5], strides=[2, 2, 2], dilations=[3, 4, 5]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[1, 1, 0, 1, 1, 0], strides=[3, 4, 5], dilations=[1, 1, 1]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[2, 2, 0, 2, 2, 0], strides=[3, 4, 5], dilations=[2, 2, 2]), + dict(weights_shape=[3, 1, 3, 3, 3], group=3, pads=[3, 4, 3, 3, 4, 3], strides=[3, 4, 5], dilations=[3, 4, 5]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[2, 1, 1, 2, 2, 1], strides=[1, 1, 1], dilations=[1, 1, 1]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[4, 3, 2, 4, 3, 2], strides=[1, 1, 1], dilations=[2, 2, 2]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[6, 6, 5, 6, 6, 5], strides=[1, 1, 1], dilations=[3, 4, 5]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[2, 1, 1, 2, 2, 1], strides=[2, 2, 2], dilations=[1, 1, 1]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[4, 3, 2, 4, 3, 2], strides=[2, 2, 2], dilations=[2, 2, 2]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[6, 6, 5, 6, 6, 5], strides=[2, 2, 2], dilations=[3, 4, 5]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[2, 1, 0, 2, 2, 0], strides=[3, 4, 5], dilations=[1, 1, 1]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[4, 3, 0, 4, 3, 0], strides=[3, 4, 5], dilations=[2, 2, 2]), + dict(weights_shape=[3, 1, 5, 4, 3], group=3, pads=[6, 6, 3, 6, 6, 3], strides=[3, 4, 5], dilations=[3, 4, 5])] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.parametrize("dilations", [[1], [2]]) + @pytest.mark.parametrize("pads", [[0, 0], [1, 1], [1, 2]]) + @pytest.mark.parametrize("strides", [[1], [2]]) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.nightly + def test_conv_3D(self, params, dilations, pads, strides, bias, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25], dilations=dilations, pads=pads, strides=strides, + bias=bias, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D_autopad[:-1]) + @pytest.mark.parametrize("auto_pad", ['SAME_UPPER', 'SAME_LOWER']) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.nightly + @pytest.mark.xfail(reason='autopad dimetions do not agree with framework') + def test_conv_3D_autopad(self, params, auto_pad, bias, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25], bias=bias, auto_pad=auto_pad, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D_precommit) + @pytest.mark.parametrize("dilations", [[3, 5]]) + @pytest.mark.parametrize("pads", [[1, 2, 3, 4]]) + @pytest.mark.parametrize("strides", [[3, 5]]) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.precommit + def test_conv_4D_precommit(self, params, dilations, pads, strides, bias, ie_device, precision, + ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25, 25], dilations=dilations, pads=pads, strides=strides, + bias=bias, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.parametrize("dilations", [[1, 1], [2, 2], [3, 5]]) + @pytest.mark.parametrize("pads", [[0, 0, 0, 0], [1, 1, 1, 1], [1, 2, 3, 4]]) + @pytest.mark.parametrize("strides", [[1, 1], [2, 2], [3, 5]]) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.nightly + def test_conv_4D(self, params, dilations, pads, strides, bias, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_net(**params, shape=[2, 3, 25, 25], dilations=dilations, pads=pads, strides=strides, bias=bias, + ir_version=ir_version), ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D_autopad[:-1]) + @pytest.mark.parametrize("auto_pad", ['SAME_UPPER', 'SAME_LOWER']) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.nightly + @pytest.mark.xfail(reason='autopad dimetions do not agree with framework') + def test_conv_4D_autopad(self, params, auto_pad, bias, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25, 25], bias=bias, auto_pad=auto_pad, + ir_version=ir_version), ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D_precommit) + @pytest.mark.parametrize("dilations", [[3, 4, 5]]) + @pytest.mark.parametrize("pads", [[1, 2, 3, 4, 5, 6]]) + @pytest.mark.parametrize("strides", [[3, 4, 5]]) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.precommit + def test_conv_5D_precommit(self, params, dilations, pads, strides, bias, ie_device, precision, + ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25, 25, 25], dilations=dilations, pads=pads, strides=strides, + bias=bias, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.parametrize("dilations", [[1, 1, 1], [2, 2, 2], [3, 4, 5]]) + @pytest.mark.parametrize("pads", [[0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1], [1, 2, 3, 4, 5, 6]]) + @pytest.mark.parametrize("strides", [[1, 1, 1], [2, 2, 2], [3, 4, 5]]) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.nightly + def test_conv_5D(self, params, dilations, pads, strides, bias, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25, 25, 25], dilations=dilations, pads=pads, strides=strides, + bias=bias, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D_autopad[:-1]) + @pytest.mark.parametrize("auto_pad", ['SAME_UPPER', 'SAME_LOWER']) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.nightly + @pytest.mark.xfail(reason='autopad dimetions do not agree with framework') + def test_conv_5D_autopad(self, params, auto_pad, bias, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, shape=[2, 3, 25, 25, 25], bias=bias, auto_pad=auto_pad, + ir_version=ir_version), ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_conv_transpose.py b/tests/layer_tests/onnx_tests/test_conv_transpose.py new file mode 100644 index 00000000000..1de48549f3f --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_conv_transpose.py @@ -0,0 +1,215 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestConvTranspose(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_conv_transpose(self, ir_version, input_shape, output_shape, kernel_shape, strides, group=1, + dilations=None, pads=None, force_output_shape=False, output_padding=None, bias=False, + auto_pad=None): + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + weights = np.random.randn(*kernel_shape).astype(np.float) + + node_weights_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['kernel'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=weights.shape, + vals=weights.flatten(), + ), + ) + + conv_attrs = { + 'strides': strides, + 'group': group, + 'kernel_shape': kernel_shape[2:], # As we have NCHW layout + } + + if pads is not None: + if not force_output_shape: + conv_attrs.update({'pads': pads}) + else: + pads = np.zeros(2 * (len(input_shape) - 2)) + _pads = np.array(pads).reshape([2, -1]) + if output_padding is not None: + conv_attrs.update({'output_padding': output_padding}) + if dilations is not None: + conv_attrs.update({'dilations': dilations}) + else: + dilations = np.ones(len(input_shape) - 2) + if force_output_shape: + conv_attrs.update({'output_shape': output_shape[2:]}) + + if auto_pad: + conv_attrs.update({'auto_pad': auto_pad}) + + nodes = [node_weights_def] + if bias: + bias_const = np.random.randint(-10, 10, kernel_shape[0]).astype(np.float32) + + node_bias_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['bias'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=bias_const.shape, + vals=bias_const.flatten(), + ), + ) + node_conv_transpose = onnx.helper.make_node( + 'ConvTranspose', + inputs=['input', 'kernel', 'bias'], + outputs=['output'], + **conv_attrs + ) + nodes.extend([node_bias_def, node_conv_transpose]) + else: + node_conv_transpose = onnx.helper.make_node( + 'ConvTranspose', + inputs=['input', 'kernel'], + outputs=['output'], + **conv_attrs + ) + nodes.append(node_conv_transpose) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_conv_transpose_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_conv_transpose_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + ref_net = None + + return onnx_net, ref_net + + common_tests_4D_precommit = [ + pytest.param(dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 12, 12], + kernel_shape=[3, 3, 2, 2], strides=[1, 1], dilations=[2, 2]), + marks=pytest.mark.skip(reason="Skipped until fixed")), + pytest.param(dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 85, 85], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, output_padding=[1, 1]), + marks=pytest.mark.skip(reason="Skipped until fixed")) + ] + + common_tests_4D = [ + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 11, 11], + kernel_shape=[3, 3, 2, 2], strides=[1, 1]), + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 20, 20], + kernel_shape=[3, 3, 2, 2], strides=[2, 2]), + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 12, 12], + kernel_shape=[3, 3, 2, 2], strides=[1, 1], dilations=[2, 2]), + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 21, 21], + kernel_shape=[3, 3, 2, 2], strides=[2, 2], dilations=[2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 85, 85], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, output_padding=[1, 1]), + ] + + explicit_pads_tests_4D = common_tests_4D + [ + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 80, 80], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, pads=[2, 2, 2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 87, 87], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, pads=[2, 2, 2, 2], dilations=[2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 80, 80], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, pads=[2, 2, 2, 2], force_output_shape=True), + ] + + valid_auto_pad_tests_4D = common_tests_4D + [ + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 84, 84], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 91, 91], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, dilations=[2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 80, 80], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, force_output_shape=True), + ] + + same_auto_pad_tests_4D = [ + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 10, 10], + kernel_shape=[3, 3, 2, 2], strides=[1, 1]), + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 20, 20], + kernel_shape=[3, 3, 2, 2], strides=[2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 80, 80], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2), + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 10, 10], + kernel_shape=[3, 3, 2, 2], strides=[1, 1], dilations=[2, 2]), + dict(input_shape=[1, 3, 10, 10], output_shape=[1, 3, 20, 20], + kernel_shape=[3, 3, 2, 2], strides=[2, 2], dilations=[2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 80, 80], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, dilations=[2, 2]), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 80, 80], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, force_output_shape=True), + dict(input_shape=[1, 2, 20, 20], output_shape=[1, 2, 81, 81], + kernel_shape=[2, 1, 8, 8], strides=[4, 4], group=2, output_padding=[1, 1]), + ] + + @pytest.mark.parametrize("params", common_tests_4D_precommit) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.parametrize("auto_pad", ["NOTSET"]) + @pytest.mark.precommit + def test_conv_transpose_4D_precommit(self, params, bias, ie_device, precision, ir_version, auto_pad, temp_dir): + if ie_device == 'GPU' and 'dilations' in params: + pytest.xfail('dilations are not supported on GPU') + self._test(*self.create_conv_transpose(**params, ir_version=ir_version, bias=bias, auto_pad=auto_pad), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", explicit_pads_tests_4D) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.parametrize("auto_pad", ["NOTSET"]) + @pytest.mark.nightly + def test_conv_transpose_4D(self, params, bias, ie_device, precision, ir_version, auto_pad, temp_dir): + if ie_device == 'GPU' and 'dilations' in params: + pytest.xfail('dilations are not supported on GPU') + self._test(*self.create_conv_transpose(**params, ir_version=ir_version, bias=bias, auto_pad=auto_pad), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", valid_auto_pad_tests_4D) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.parametrize("auto_pad", ["VALID"]) + @pytest.mark.nightly + def test_conv_transpose_valid_auto_pad_4D(self, params, bias, ie_device, precision, ir_version, auto_pad, temp_dir): + if ie_device == 'GPU' and 'dilations' in params: + pytest.xfail('dilations are not supported on GPU') + self._test(*self.create_conv_transpose(**params, ir_version=ir_version, bias=bias, auto_pad=auto_pad), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", same_auto_pad_tests_4D) + @pytest.mark.parametrize("bias", [False, True]) + @pytest.mark.parametrize("auto_pad", ["SAME_UPPER", "SAME_LOWER"]) + @pytest.mark.nightly + def test_conv_transpose_same_auto_pad_4D(self, params, bias, ie_device, precision, ir_version, auto_pad, temp_dir): + if ie_device == 'GPU' and 'dilations' in params: + pytest.xfail('dilations are not supported on GPU') + self._test(*self.create_conv_transpose(**params, ir_version=ir_version, bias=bias, auto_pad=auto_pad), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_cumsum.py b/tests/layer_tests/onnx_tests/test_cumsum.py new file mode 100644 index 00000000000..b49f00f887c --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_cumsum.py @@ -0,0 +1,267 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +def cumsum(a, axis=None, exclusive=False, reverse=False): + if reverse: + a = np.flip(a, axis) + res = np.cumsum(a, axis=axis) + if exclusive: + res -= a + if reverse: + res = np.flip(res, axis) + return res + + +class TestCumSum(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version, axis=None, reverse=None, exclusive=None): + """ + ONNX net IR net + + Input->CumSum->Output => Input->CumSum + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + nodes = [] + inputs = ['input'] + if axis is not None: + node_axis_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['axis'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[], + vals=[axis], + ), + ) + nodes.append(node_axis_def) + inputs.append('axis') + + args = dict() + if exclusive is not None: + args['exclusive'] = exclusive + if reverse is not None: + args['reverse'] = reverse + node_def = onnx.helper.make_node( + 'CumSum', + inputs=inputs, + outputs=['output'], + **args + ) + nodes.append(node_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + onnx.checker.check_model(onnx_net) + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'CumSum'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + if exclusive is not None: + nodes_attributes['node']['exclusive'] = exclusive + if reverse is not None: + nodes_attributes['node']['reverse'] = reverse + edges = [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ] + if axis is not None: + nodes_attributes.update({ + 'input_axis_data': {'kind': 'data', 'value': [axis]}, + 'axis': {'kind': 'op', 'type': 'Const'}, + 'axis_data': {'shape': [], 'kind': 'data'}}) + edges.extend([('input_axis_data', 'axis'), + ('axis', 'axis_data'), + ('axis_data', 'node')]) + ref_net = build_graph(nodes_attributes, edges) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version, axis=None, reverse=None, exclusive=None): + """ + ONNX net IR net + + Input->Concat(+cumsum const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randn(*shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + nodes = [node_const_def] + inputs = ['const1'] + if axis is not None: + node_axis_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['axis'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[], + vals=[axis], + ), + ) + nodes.append(node_axis_def) + inputs.append('axis') + + args = dict() + if exclusive is not None: + args['exclusive'] = exclusive + if reverse is not None: + args['reverse'] = reverse + node_def = onnx.helper.make_node( + 'CumSum', + inputs=inputs, + outputs=['cumsum'], + **args + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'cumsum'], + outputs=['output'], + axis=concat_axis + ) + nodes.extend([node_def, node_concat_def]) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + onnx.checker.check_model(onnx_net) + + # + # Create reference IR net + # + constant = cumsum(constant, axis=axis, reverse=reverse, exclusive=exclusive) + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [ + dict(shape=[1]), + dict(shape=[1, 2]), + dict(shape=[2, 4, 6]), + dict(shape=[2, 4, 6, 8]), + dict(shape=[2, 4, 6, 8, 10]), + dict(shape=[1, 2], axis=-2), + dict(shape=[1, 2], axis=1), + dict(shape=[2, 4, 6], axis=-3), + dict(shape=[2, 4, 6], axis=2), + dict(shape=[2, 4, 6, 8], axis=-4), + dict(shape=[2, 4, 6, 8], axis=3), + dict(shape=[2, 4, 6, 8, 10], axis=-1), + dict(shape=[2, 4, 6, 8, 10], axis=4)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("reverse", [0, 1]) + @pytest.mark.parametrize("exclusive", [0, 1]) + @pytest.mark.nightly + def test_cumsum(self, params, reverse, exclusive, ie_device, precision, ir_version, temp_dir): + if 'axis' not in params: + pytest.skip('No axis cases fail in ONNX') + self._test(*self.create_net(**params, exclusive=exclusive, reverse=reverse, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("reverse", [0, 1]) + @pytest.mark.parametrize("exclusive", [0, 1]) + @pytest.mark.nightly + def test_cumsum_const(self, params, reverse, exclusive, ie_device, precision, ir_version, temp_dir): + if 'axis' not in params: + pytest.skip('No axis cases fail in ONNX') + self._test(*self.create_net_const(**params, precision=precision, exclusive=exclusive, reverse=reverse, + ir_version=ir_version), ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_dequantize_linear.py b/tests/layer_tests/onnx_tests/test_dequantize_linear.py new file mode 100644 index 00000000000..1bf364accff --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_dequantize_linear.py @@ -0,0 +1,205 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestDequantizeLinear(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(np.iinfo(self.inp_type).min, + np.iinfo(self.inp_type).max, + inputs_dict[input], + dtype=self.inp_type) + return inputs_dict + + def create_dequanize_linear(self, shape, y_scale: np.array, y_zero_point=None, axis=None, opset=10, ir_version='10'): + """ + ONNX net IR net + + Input->DequantizeLinear->Output => Input->Sub->Mul + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + self.inp_type = y_zero_point.dtype if y_zero_point is not None else np.uint8 + onnx_type = TensorProto.UINT8 if self.inp_type == np.uint8 else TensorProto.INT8 + input = helper.make_tensor_value_info('input', onnx_type, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + nodes = [] + inputs = ['input', 'y_scale'] + node_scale_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['y_scale'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=y_scale.shape, + vals=y_scale.flatten(), + ), + ) + nodes.append(node_scale_def) + + if y_zero_point is not None: + node_zero_point_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['y_zero_point'], + value=helper.make_tensor( + name='const_tensor', + data_type=onnx_type, + dims=y_zero_point.shape, + vals=y_zero_point.flatten(), + ), + ) + inputs.append('y_zero_point') + nodes.append(node_zero_point_def) + args = dict() + if axis is not None: + args['axis'] = axis + node_def = onnx.helper.make_node( + 'DequantizeLinear', + inputs=inputs, + outputs=['output'], + **args + ) + nodes.append(node_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model', + opset_imports=[helper.make_opsetid("", opset)]) + onnx.checker.check_model(onnx_net) + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_scale_data': {'kind': 'data', 'value': y_scale}, + 'scale_const': {'kind': 'op', 'type': 'Const'}, + 'scale_data': {'shape': np.ones(len(shape)), 'kind': 'data'}, + 'mul': {'kind': 'op', 'type': 'Multiply'}, + 'mul_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + edges = [('input', 'input_data'), + ('input_data', 'mul'), + ('input_scale_data', 'scale_const'), + ('scale_const', 'scale_data'), + ('scale_data', 'mul'), + ('mul', 'mul_data')] + if y_zero_point is not None: + nodes_attributes.update({ + 'input_zero_data': {'kind': 'data', 'value': -y_scale * y_zero_point}, + 'zero_const': {'kind': 'op', 'type': 'Const'}, + 'zero_data': {'shape': np.ones(len(shape)), 'kind': 'data'}, + 'sub': {'kind': 'op', 'type': 'Add'}, + 'sub_data': {'shape': shape, 'kind': 'data'}, + }) + edges.extend([('mul_data', 'sub'), + ('input_zero_data', 'zero_const'), + ('zero_const', 'zero_data'), + ('zero_data', 'sub'), + ('sub', 'sub_data'), + ('sub_data', 'result')]) + else: + edges.append(('mul_data', 'result')) + + ref_net = None + if check_ir_version(10, None, ir_version): + ref_net = build_graph(nodes_attributes, edges) + + return onnx_net, ref_net + + test_data = [ + dict(shape=[8], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(128, dtype=np.uint8)), + dict(shape=[8], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(1, dtype=np.int8)), + dict(shape=[2, 4], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(128, dtype=np.uint8)), + dict(shape=[2, 4], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(1, dtype=np.int8)), + dict(shape=[2, 4, 6], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(128, dtype=np.uint8)), + dict(shape=[2, 4, 6], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(1, dtype=np.int8)), + dict(shape=[2, 4, 6, 8], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(128, dtype=np.uint8)), + dict(shape=[2, 4, 6, 8], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(1, dtype=np.int8)), + dict(shape=[2, 4, 6, 8, 10], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(128, dtype=np.uint8)), + dict(shape=[2, 4, 6, 8, 10], y_scale=np.array(2, dtype=np.float), y_zero_point=np.array(1, dtype=np.int8)), + ] + test_data_def_zerop = [ + dict(shape=[8], y_scale=np.array(2, dtype=np.float)), + dict(shape=[2, 4], y_scale=np.array(2, dtype=np.float)), + dict(shape=[2, 4, 6], y_scale=np.array(2, dtype=np.float)), + dict(shape=[2, 4, 6, 8], y_scale=np.array(2, dtype=np.float)), + dict(shape=[2, 4, 6, 8, 10], y_scale=np.array(2, dtype=np.float)), + ] + + test_data_axis = [ + dict(shape=[2, 4], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), axis=1), + dict(shape=[2, 4], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([128, 128, 128, 128], dtype=np.uint8), axis=1), + dict(shape=[2, 4], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([1, 1, 1, 1], dtype=np.int8), axis=1), + dict(shape=[2, 4, 6], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), axis=1), + dict(shape=[2, 4, 6], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([128, 128, 128, 128], dtype=np.uint8), axis=1), + dict(shape=[2, 4, 6], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([1, 1, 1, 1], dtype=np.int8), axis=1), + dict(shape=[2, 4, 6, 8], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), axis=1), + dict(shape=[2, 4, 6, 8], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([128, 128, 128, 128], dtype=np.uint8), axis=1), + dict(shape=[2, 4, 6, 8], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([1, 1, 1, 1], dtype=np.int8), axis=1), + dict(shape=[2, 4, 6, 8, 10], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), axis=1), + dict(shape=[2, 4, 6, 8, 10], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([128, 128, 128, 128], dtype=np.uint8), axis=1), + dict(shape=[2, 4, 6, 8, 10], y_scale=np.array([2, 2.5, 3, 2.3], dtype=np.float), + y_zero_point=np.array([1, 1, 1, 1], dtype=np.int8), axis=1), + ] + + @pytest.mark.parametrize("params", test_data_def_zerop) + @pytest.mark.nightly + @pytest.mark.xfail(reason='Defualt zero_point fails on onnxruntime') + def test_quantize_linear_def_zerop_opset10(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_dequanize_linear(**params, ir_version=ir_version), ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_quantize_linear_opset10(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_dequanize_linear(**params, ir_version=ir_version), ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data + test_data_def_zerop) + @pytest.mark.nightly + @pytest.mark.skip(reason='DequantizeLinear-13 is unsupported in MO') + def test_quantize_linear_opset13(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_dequanize_linear(**params, opset=13, ir_version=ir_version), ie_device, precision, + ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_axis) + @pytest.mark.nightly + @pytest.mark.skip(reason='DequantizeLinear-13 is unsupported in MO') + def test_quantize_linear_axis_opset13(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_dequanize_linear(**params, opset=13, ir_version=ir_version), ie_device, precision, + ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_dropout.py b/tests/layer_tests/onnx_tests/test_dropout.py new file mode 100644 index 00000000000..618fdc02d32 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_dropout.py @@ -0,0 +1,169 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestDropout(Caffe2OnnxLayerTest): + def create_net(self, shape, ratio, ir_version, opset=None): + """ + ONNX net IR net + + Input->Dropout->Sigmoid->Output => Input->sigmoid + + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + args = dict() + if ratio: + args['ratio'] = ratio + if opset == 6: + args['is_test'] = 1 + node_def = helper.make_node( + 'Dropout', + inputs=['input'], + outputs=['dropout'], + **args + ) + + sigmoid_def = helper.make_node( + 'Sigmoid', + inputs=['dropout'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def, sigmoid_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + def create_net_const(self, shape, ratio, ir_version, opset=None): + """ + ONNX net IR net + + Input->Concat(+dropout with const)->Output => Input->Concat(+const) + + """ + + from onnx import helper + from onnx import TensorProto + + constant = np.random.randint(-127, 127, shape).astype(np.float) + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + args = dict() + if ratio: + args['ratio'] = ratio + if opset == 6: + args['is_test'] = 1 + node_def = helper.make_node( + 'Dropout', + inputs=['const1'], + outputs=['dropout'], + **args + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'dropout'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + ref_net = None + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12], ratio=None), + dict(shape=[10, 12], ratio=0.7), + dict(shape=[8, 10, 12], ratio=None), + dict(shape=[8, 10, 12], ratio=0.7), + dict(shape=[6, 8, 10, 12], ratio=None), + dict(shape=[6, 8, 10, 12], ratio=0.7), + dict(shape=[4, 6, 8, 10, 12], ratio=None), + dict(shape=[4, 6, 8, 10, 12], ratio=0.7)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_dropout_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, opset=6, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_dropout(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_dropout_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, opset=6, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_dropout_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_elu.py b/tests/layer_tests/onnx_tests/test_elu.py new file mode 100644 index 00000000000..c65b000b082 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_elu.py @@ -0,0 +1,185 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestElu(Caffe2OnnxLayerTest): + def create_net(self, shape, alpha, ir_version): + """ + ONNX net IR net + + Input->Elu->Output => Input->elu + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Elu', + inputs=['input'], + outputs=['output'], + alpha=alpha + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Elu', 'alpha': alpha}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, alpha, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+elu const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const_number = np.prod(shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Elu', + inputs=['const1'], + outputs=['elu1'], + alpha=alpha + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'elu1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.clip(constant, 0, np.inf) + (np.exp(np.clip(constant, -np.inf, 0)) - 1) * alpha + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12], alpha=0.1), + dict(shape=[8, 10, 12], alpha=0.9), + dict(shape=[6, 8, 10, 12], alpha=1.5), + dict(shape=[4, 6, 8, 10, 12], alpha=4.5)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_elu(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_elu_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_embedding_bag.py b/tests/layer_tests/onnx_tests/test_embedding_bag.py new file mode 100644 index 00000000000..dce555c8261 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_embedding_bag.py @@ -0,0 +1,131 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import numpy as np +import pytest +import torch +import torch.nn as nn +from common.layer_test_class import CommonLayerTest, check_ir_version +from unit_tests.utils.graph import build_graph + + +class PytorchLayerTest(CommonLayerTest): + def produce_model_path(self, framework_model, save_path): + path = os.path.join(save_path, 'model.onnx') + self.torch_model = framework_model + torch.onnx.export(self.torch_model, self.var, path, output_names=['output']) + assert os.path.isfile(path), "model.onnx haven't been saved here: {}".format(save_path) + return path + + def get_framework_results(self, inputs_dict, model_path): + return {'output': self.torch_model(*self.var).detach().numpy()} + + +class EmbeddingBagModel(torch.nn.Module): + def __init__(self, n, m, indices_shape=None, per_sample_weights=False, mode="sum"): + super(EmbeddingBagModel, self).__init__() + EE = nn.EmbeddingBag(n, m, mode=mode, sparse=True) + self.W = np.random.uniform(low=-np.sqrt(1 / n), high=np.sqrt(1 / n), size=(n, m)).astype(np.float32) + EE.weight.data = torch.tensor(self.W, requires_grad=True) + self.embedding_bag = EE + if per_sample_weights: + self.per_sample_weights = torch.randn(indices_shape) + else: + self.per_sample_weights = None + + +class TestPytorchEmbeddingBag(PytorchLayerTest): + def _prepare_input(self, inputs_dict): + assert 'input' in inputs_dict and 'offsets' in inputs_dict, "input and offsets should be in inputs_dict" + indices, offsets = self.var + inputs_dict['input'] = indices.numpy().astype(np.int32) + inputs_dict['offsets'] = offsets.numpy().astype(np.int32) + return inputs_dict + + def create_net(self, n, m, emb_batch_size, ir_version, per_sample_weights=False, offsets=None): + """ + Pytorch net IR net + + Input->EmbeddingBag->Output => Input->Gather/SparseWeightedSum + + """ + # Create Pytorch model + EE = EmbeddingBagModel(n, m, indices_shape=[emb_batch_size], per_sample_weights=per_sample_weights) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input_weights_data': {'kind': 'data', 'value': EE.W.flatten()}, + 'weights': {'kind': 'op', 'type': 'Const'}, + 'weights_data': {'shape': EE.W.shape, 'kind': 'data'}, + 'indices': {'kind': 'op', 'type': 'Parameter'}, + 'indices_data': {'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'EmbeddingBagOffsetsSum'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + edges = [('input_weights_data', 'weights'), + ('weights', 'weights_data'), + ('indices', 'indices_data'), + ('weights_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ] + if offsets is not None: + nodes_attributes.update({ + 'offsets': {'kind': 'op', 'type': 'Parameter'}, + 'offsets_data': {'kind': 'data'}, + 'node_data': {'shape': [len(offsets), m], 'kind': 'data'}, + }) + edges.extend([ + ('offsets', 'offsets_data'), + ('indices_data', 'node'), + ('offsets_data', 'node'), + ]) + else: + nodes_attributes.update({ + 'input_shape_data': {'kind': 'data', 'value': [-1]}, + 'shape': {'kind': 'op', 'type': 'Const'}, + 'shape_data': {'shape': [1], 'kind': 'data'}, + 'reshape': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_data': {'shape': [emb_batch_size], 'kind': 'data'}, + 'input_offsets_data': {'kind': 'data', 'value': np.arange(0, 128, 2)}, + 'offsets': {'kind': 'op', 'type': 'Const'}, + 'offsets_data': {'shape': [int(emb_batch_size / 2)], 'kind': 'data'}, + 'node_data': {'shape': [int(emb_batch_size / 2), m], 'kind': 'data'}, + }) + edges.extend([ + ('input_shape_data', 'shape'), + ('shape', 'shape_data'), + ('indices_data', 'reshape'), + ('shape_data', 'reshape'), + ('reshape', 'reshape_data'), + ('reshape_data', 'node'), + ('input_offsets_data', 'offsets'), + ('offsets', 'offsets_data'), + ('offsets_data', 'node'), + ]) + + ref_net = build_graph(nodes_attributes, edges) + if offsets is not None: + self.var = (torch.from_numpy(np.random.choice(n, emb_batch_size)).long(), + torch.from_numpy(np.array(offsets)).long()) + else: + self.var = ( + torch.from_numpy(np.random.choice(n, emb_batch_size).reshape(int(emb_batch_size / 2), 2)).long(),) + return EE, ref_net + + test_data = [ + dict(n=1460, m=16, emb_batch_size=128), + dict(n=1460, m=16, emb_batch_size=128, offsets=np.arange(0, 128)), + dict(n=1460, m=16, emb_batch_size=128, offsets=[0, 2, 6, 20, 80]), + # dict(n=1460, m=16, emb_batch_size=128, offsets=[0, 2, 6, 20, 80], per_sample_weights=True), + # per_sample_weights not supported in ONNX + dict(n=1460, m=16, emb_batch_size=128, offsets=[0, 2, 6, 20, 20, 80]) # empty bag case + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_pytorch_embedding_bag(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_flatten.py b/tests/layer_tests/onnx_tests/test_flatten.py new file mode 100644 index 00000000000..bc24e300c20 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_flatten.py @@ -0,0 +1,302 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestFlatten(OnnxRuntimeLayerTest): + def create_flatten_net(self, axis, input_shape, dim, ir_version, opset=None): + """ + ONNX net IR net + + Input->Flatten->Output => Input->Reshape + + """ + + # + # Create ONNX model + # + + # TODO: possible move all imports to separate func? + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, dim) + + node_flatten_def = onnx.helper.make_node( + 'Flatten', + inputs=['input'], + outputs=['output'], + axis=axis, + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_flatten_def], + 'test_flatten_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + def create_flatten_net_const(self, axis, input_shape, dim, ir_version, opset=None): + """ + ONNX net IR net + + Input->Flatten->Concat->Output => Input->Concat + Input-' Const-' + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + concat_output_shape = dim.copy() + concat_output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, dim) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + const_number = np.prod(input_shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=input_shape, + vals=constant, + ), + ) + + node_flatten_def = onnx.helper.make_node( + 'Flatten', + inputs=['const'], + outputs=['flatten_output'], + axis=axis, + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'flatten_output'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_flatten_def, node_concat_def], + 'test_flatten_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + test_data_3D = [ + dict(axis=0, input_shape=[1, 3, 224], dim=[1, 672]), + dict(axis=-3, input_shape=[1, 3, 224], dim=[1, 672]), + dict(axis=1, input_shape=[1, 3, 224], dim=[1, 672]), + dict(axis=-2, input_shape=[1, 3, 224], dim=[1, 672]), + dict(axis=2, input_shape=[2, 3, 224], dim=[6, 224]), + dict(axis=-1, input_shape=[2, 3, 224], dim=[6, 224]), + dict(axis=3, input_shape=[3, 3, 224], dim=[2016, 1]) + ] + + test_data_4D_precommit = [ + dict(axis=1, input_shape=[1, 3, 224, 224], dim=[1, 150528]), + dict(axis=-3, input_shape=[1, 3, 224, 224], dim=[1, 150528]) + ] + + test_data_4D = [ + dict(axis=0, input_shape=[1, 3, 224, 224], dim=[1, 150528]), + dict(axis=-4, input_shape=[1, 3, 224, 224], dim=[1, 150528]), + dict(axis=1, input_shape=[1, 3, 224, 224], dim=[1, 150528]), + dict(axis=-3, input_shape=[1, 3, 224, 224], dim=[1, 150528]), + dict(axis=2, input_shape=[2, 3, 224, 224], dim=[6, 50176]), + dict(axis=-2, input_shape=[2, 3, 224, 224], dim=[6, 50176]), + dict(axis=3, input_shape=[3, 3, 224, 224], dim=[2016, 224]), + dict(axis=-1, input_shape=[3, 3, 224, 224], dim=[2016, 224]), + dict(axis=4, input_shape=[4, 3, 224, 224], dim=[602112, 1]) + ] + + test_data_5D_precommit = [ + dict(axis=-5, input_shape=[1, 3, 9, 224, 224], dim=[1, 1354752]), + dict(axis=5, input_shape=[4, 3, 9, 224, 224], dim=[5419008, 1])] + + test_data_5D = [ + dict(axis=0, input_shape=[1, 3, 9, 224, 224], dim=[1, 1354752]), + dict(axis=-5, input_shape=[1, 3, 9, 224, 224], dim=[1, 1354752]), + dict(axis=1, input_shape=[1, 3, 9, 224, 224], dim=[1, 1354752]), + dict(axis=-4, input_shape=[1, 3, 9, 224, 224], dim=[1, 1354752]), + dict(axis=2, input_shape=[2, 3, 9, 224, 224], dim=[6, 451584]), + dict(axis=-3, input_shape=[2, 3, 9, 224, 224], dim=[6, 451584]), + dict(axis=3, input_shape=[3, 3, 9, 224, 224], dim=[81, 50176]), + dict(axis=-2, input_shape=[3, 3, 9, 224, 224], dim=[81, 50176]), + dict(axis=4, input_shape=[3, 3, 9, 224, 224], dim=[18144, 224]), + dict(axis=-1, input_shape=[3, 3, 9, 224, 224], dim=[18144, 224]), + dict(axis=5, input_shape=[4, 3, 9, 224, 224], dim=[5419008, 1]) + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_3D(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_3D_const(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net_const(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_4D(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D_precommit) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.precommit + def test_flatten_4D_precommit(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D_precommit) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_4D_const_precommit(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net_const(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_4D_const(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net_const(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D_precommit) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_5D_precommit(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_5D(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D_precommit) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_5D_const_precommit(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net_const(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.parametrize("opset", [6, 9]) + @pytest.mark.nightly + def test_flatten_5D_const(self, params, opset, ie_device, precision, ir_version, temp_dir): + # negative axis not allowed by onnx spec for flatten-1 and flatten-9 + if params['axis'] < 0: + self.skip_framework = True + else: + self.skip_framework = False + self._test(*self.create_flatten_net_const(**params, ir_version=ir_version, opset=opset), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_floor.py b/tests/layer_tests/onnx_tests/test_floor.py new file mode 100644 index 00000000000..2d6be9c7262 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_floor.py @@ -0,0 +1,179 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestFloor(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Floor->Output => Input->Floor + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Floor', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Floor'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+floored const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randn(*shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Floor', + inputs=['const1'], + outputs=['floor'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'floor'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.floor(constant) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_floor(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_floor_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_gather.py b/tests/layer_tests/onnx_tests/test_gather.py new file mode 100644 index 00000000000..c4c0ee9c12e --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_gather.py @@ -0,0 +1,261 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestGather(OnnxRuntimeLayerTest): + def create_net(self, shape, axis, indices, output_shape, ir_version): + """ + ONNX net IR net + + Input->Gather->Output => Input->Gather + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + indices = np.array(indices) + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_indices_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['indices'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=indices.shape, + vals=indices.flatten(), + ), + ) + + args = dict() + if axis: + args['axis'] = axis + else: + axis = 0 + node_def = onnx.helper.make_node( + 'Gather', + inputs=['input', 'indices'], + outputs=['output'], + **args + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_indices_def, node_def], + 'test_model', + [input], + [output] + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': indices.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': indices.shape, 'kind': 'data', 'value': None}, + 'input_axis_const_data': {'kind': 'data', 'value': [axis]}, + 'axis_const': {'kind': 'op', 'type': 'Const'}, + 'axis_const_data': {'shape': [], 'kind': 'data', 'value': None}, + 'node': {'kind': 'op', 'type': 'Gather'}, + 'node_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_axis_const_data', 'axis_const'), + ('axis_const', 'axis_const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('axis_const_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, axis, indices, output_shape, ir_version): + """ + ONNX net IR net + + Input->Concat(+gathered const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + indices = np.array(indices) + + concat_axis = 0 + input_shape = output_shape.copy() + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] = 2 * concat_output_shape[concat_axis] + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + constant = np.random.randint(-127, 127, shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_indices_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['indices'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=indices.shape, + vals=indices.flatten(), + ), + ) + + args = dict() + if axis: + args['axis'] = axis + node_def = onnx.helper.make_node( + 'Gather', + inputs=['const1', 'indices'], + outputs=['gather'], + **args + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'gather'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_indices_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.take(constant, indices, axis=axis if axis else 0) + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': constant.shape, 'kind': 'data', 'value': None}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': concat_output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[6, 8, 10, 12], axis=2, indices=[[0, 2, 4], [5, 7, 9]], output_shape=[6, 8, 2, 3, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=1, indices=[2, 5], output_shape=[4, 2, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=-1, indices=[5, 8], output_shape=[4, 6, 8, 10, 2])] + + test_data = [dict(shape=[10, 12], axis=0, indices=[3, 6], output_shape=[2, 12]), + dict(shape=[10, 12], axis=1, indices=[4, 7], output_shape=[10, 2]), + dict(shape=[10, 12], axis=-1, indices=[4, 7], output_shape=[10, 2]), + dict(shape=[10, 12], axis=None, indices=[[0, 1, 3, 4], [5, 6, 8, 9]], output_shape=[2, 4, 12]), + dict(shape=[10, 12], axis=1, indices=[[0, 1, 3, 4, 5], [6, 7, 9, 10, 11]], output_shape=[10, 2, 5]), + dict(shape=[8, 10, 12], axis=0, indices=[3, 6], output_shape=[2, 10, 12]), + dict(shape=[8, 10, 12], axis=1, indices=[4, 7], output_shape=[8, 2, 12]), + dict(shape=[8, 10, 12], axis=2, indices=[5, 8], output_shape=[8, 10, 2]), + dict(shape=[8, 10, 12], axis=-1, indices=[5, 8], output_shape=[8, 10, 2]), + dict(shape=[8, 10, 12], axis=None, indices=[[0, 1], [3, 4], [6, 7]], output_shape=[3, 2, 10, 12]), + dict(shape=[8, 10, 12], axis=1, indices=[[0, 2, 4], [5, 7, 9]], output_shape=[8, 2, 3, 12]), + dict(shape=[6, 8, 10, 12], axis=0, indices=[2, 5], output_shape=[2, 8, 10, 12]), + dict(shape=[6, 8, 10, 12], axis=1, indices=[3, 6], output_shape=[6, 2, 10, 12]), + dict(shape=[6, 8, 10, 12], axis=2, indices=[4, 7], output_shape=[6, 8, 2, 12]), + dict(shape=[6, 8, 10, 12], axis=3, indices=[5, 8], output_shape=[6, 8, 10, 2]), + dict(shape=[6, 8, 10, 12], axis=-1, indices=[5, 8], output_shape=[6, 8, 10, 2]), + dict(shape=[6, 8, 10, 12], axis=None, indices=[[0, 1, 2], [3, 4, 5]], output_shape=[2, 3, 8, 10, 12]), + dict(shape=[6, 8, 10, 12], axis=2, indices=[[0, 2, 4], [5, 7, 9]], output_shape=[6, 8, 2, 3, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=0, indices=[1, 3], output_shape=[2, 6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=1, indices=[2, 5], output_shape=[4, 2, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=2, indices=[3, 6], output_shape=[4, 6, 2, 10, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=3, indices=[4, 7], output_shape=[4, 6, 8, 2, 12]), + dict(shape=[4, 6, 8, 10, 12], axis=4, indices=[5, 8], output_shape=[4, 6, 8, 10, 2]), + dict(shape=[4, 6, 8, 10, 12], axis=-1, indices=[5, 8], output_shape=[4, 6, 8, 10, 2])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_gather(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_gather(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_gather_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_gemm.py b/tests/layer_tests/onnx_tests/test_gemm.py new file mode 100644 index 00000000000..83bbe411c04 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_gemm.py @@ -0,0 +1,308 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +import numpy as np +import pytest +import torch +from common.layer_test_class import CommonLayerTest +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestGemm(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shapeA, shapeB, shapeC, alpha, beta, trans_a, trans_b, precision, ir_version): + """ + ONNX net IR net + + Input->Gemm->Output => Input->Concat + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + max_len = max([len(shapeA), len(shapeB)]) + extended_shape1 = np.concatenate([np.ones(max_len - len(shapeA)), shapeA], axis=0) + extended_shape2 = np.concatenate([np.ones(max_len - len(shapeB)), shapeB], axis=0) + output_shape = np.concatenate( + [np.maximum(*[extended_shape1[0:-2], extended_shape2[0:-2]]), [shapeA[-2], shapeB[-1]]], + axis=0).astype(np.int).tolist() + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shapeA) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + _shapeB = shapeB.copy() + if trans_b: + _shapeB.reverse() + const1 = np.random.ranf(_shapeB).astype(np.float) + const2 = np.random.ranf(shapeC).astype(np.float) + + node_const1_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const1.shape, + vals=const1.flatten(), + ), + ) + + node_const2_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const2.shape, + vals=const2.flatten(), + ), + ) + + attrs = dict() + if alpha: + attrs['alpha'] = alpha + if beta: + attrs['beta'] = beta + if trans_a: + attrs['transA'] = trans_a + if trans_b: + attrs['transB'] = trans_b + node_def = onnx.helper.make_node( + 'Gemm', + inputs=['input', 'const1', 'const2'], + outputs=['output'], + **attrs + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const1_def, node_const2_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + if alpha: + const1 *= alpha + if beta: + const2 *= beta + if precision == 'FP16': + const1 = const1.astype(np.float16) + const2 = const2.astype(np.float16) + if not trans_b: + const1 = const1.transpose() + + ref_net = None + + return onnx_net, ref_net + + def create_net_double(self, shapeA, shapeB, shapeC, alpha, beta, trans_a, trans_b, precision, ir_version): + """ + ONNX net IR net + + Input->Gemm->Output => Input->Concat + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + _shapeA = shapeA.copy() + if trans_a: + _shapeA.reverse() + _shapeB = shapeB.copy() + if trans_b: + _shapeB.reverse() + + max_len = max([len(shapeA), len(shapeB)]) + extended_shape1 = np.concatenate([np.ones(max_len - len(shapeA)), shapeA], axis=0) + extended_shape2 = np.concatenate([np.ones(max_len - len(shapeB)), shapeB], axis=0) + output_shape = np.concatenate( + [np.maximum(*[extended_shape1[0:-2], extended_shape2[0:-2]]), [shapeA[-2], shapeB[-1]]], + axis=0).astype(np.int).tolist() + input1 = helper.make_tensor_value_info('input1', TensorProto.FLOAT, _shapeA) + input2 = helper.make_tensor_value_info('input2', TensorProto.FLOAT, _shapeB) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const = np.random.ranf(shapeC).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + ) + + attrs = dict() + if alpha: + attrs['alpha'] = alpha + if beta: + attrs['beta'] = beta + if trans_a: + attrs['transA'] = trans_a + if trans_b: + attrs['transB'] = trans_b + node_def = onnx.helper.make_node( + 'Gemm', + inputs=['input1', 'input2', 'const'], + outputs=['output'], + **attrs + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def], + 'test_model', + [input1, input2], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + if precision == 'FP16': + const = const.astype(np.float16) + ref_net = None + + return onnx_net, ref_net + + test_data = [ + dict(shapeA=[3, 6], shapeB=[6, 4], shapeC=[3, 4]) + ] + + test_data_bc = [ + dict(shapeA=[3, 6], shapeB=[6, 4], shapeC=[4]) + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("alpha", [None, 0.1, 2.0]) + @pytest.mark.parametrize("beta", [None, 0.1, 2.0]) + @pytest.mark.parametrize("trans_a", [None]) + @pytest.mark.parametrize("trans_b", [None, 1]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_gemm(self, params, alpha, beta, trans_a, trans_b, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(params['shapeA'], params['shapeB'], params['shapeC'], alpha, beta, trans_a, + trans_b, precision, ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_bc) + @pytest.mark.parametrize("alpha", [None, 0.1, 2.0]) + @pytest.mark.parametrize("beta", [None, 0.1, 2.0]) + @pytest.mark.parametrize("trans_a", [None]) # transA is not supported + @pytest.mark.parametrize("trans_b", [None, 1]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_gemm_bc(self, params, alpha, beta, trans_a, trans_b, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(params['shapeA'], params['shapeB'], params['shapeC'], alpha, beta, trans_a, + trans_b, precision, ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("alpha", [None, 0.1, 2.0]) + @pytest.mark.parametrize("beta", [None, 0.1, 2.0]) + @pytest.mark.parametrize("trans_a", [None, 1]) + @pytest.mark.parametrize("trans_b", [None, 1]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_gemm_double(self, params, alpha, beta, trans_a, trans_b, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_double(params['shapeA'], params['shapeB'], params['shapeC'], alpha, beta, + trans_a, trans_b, precision, ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_bc) + @pytest.mark.parametrize("alpha", [None, 0.1, 2.0]) + @pytest.mark.parametrize("beta", [None, 0.1, 2.0]) + @pytest.mark.parametrize("trans_a", [None, 1]) + @pytest.mark.parametrize("trans_b", [None, 1]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_gemm_double_bc(self, params, alpha, beta, trans_a, trans_b, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_double(params['shapeA'], params['shapeB'], params['shapeC'], alpha, beta, + trans_a, trans_b, precision, ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + +class PytorchLayerTest(CommonLayerTest): + def produce_model_path(self, framework_model, save_path): + path = os.path.join(save_path, 'model.onnx') + self.torch_model = framework_model['model'] + torch.onnx.export(self.torch_model, framework_model['var'], path, input_names=['input'], + output_names=['output']) + assert os.path.isfile(path), "model.onnx haven't been saved here: {}".format(save_path) + return path + + def get_framework_results(self, inputs_dict, model_path): + x = torch.tensor(inputs_dict['input'], dtype=torch.float32) + return {'output': self.torch_model(x).numpy()} + + +class GemmModel(torch.nn.Module): + def __init__(self, weights): + super(GemmModel, self).__init__() + self.weights = torch.from_numpy(weights) + + +class TestPytorchMM(PytorchLayerTest): + def create_net(self, precision, shape, w_shape, output_shape, ir_version): + """ + Pytorch net IR net + + Input->MM->Output => Input->FullyConnected + + """ + + weights_const = np.random.randn(*w_shape).astype(np.float32) + # Create Pytorch model + model = GemmModel(weights_const) + + if precision == 'FP16': + weights_const = weights_const.astype(np.float16) + + # Create reference IR net + + # TODO: add reference IR net. Now it is omitted since inference is more + # important and needs to be checked in the first + + ref_net = None + + return {'model': model, 'var': torch.randn(shape)}, ref_net + + test_data = [dict(shape=[1, 2048], w_shape=[2048, 3], output_shape=[1, 3])] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_pytorch_mm(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(precision, **params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_hard_sigmoid.py b/tests/layer_tests/onnx_tests/test_hard_sigmoid.py new file mode 100644 index 00000000000..e460d733e1c --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_hard_sigmoid.py @@ -0,0 +1,235 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestHardSigmoid(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape, alpha, beta, ir_version): + """ + ONNX net IR net + + Input->HardSigmoid->Output => Input->HardSigmoid + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + args = dict() + if alpha is not None: + args['alpha'] = alpha + if beta is not None: + args['beta'] = beta + node_def = onnx.helper.make_node( + 'HardSigmoid', + inputs=['input'], + outputs=['output'], + **args + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_alpha_data': {'kind': 'data', 'value': [alpha if alpha is not None else 0.2]}, + 'alpha': {'kind': 'op', 'type': 'Const'}, + 'alpha_data': {'shape': [], 'kind': 'data'}, + 'input_beta_data': {'kind': 'data', 'value': [beta if beta is not None else 0.5]}, + 'beta': {'kind': 'op', 'type': 'Const'}, + 'beta_data': {'shape': [], 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'HardSigmoid'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_alpha_data', 'alpha'), + ('alpha', 'alpha_data'), + ('input_beta_data', 'beta'), + ('beta', 'beta_data'), + ('input_data', 'node'), + ('alpha_data', 'node'), + ('beta_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, alpha, beta, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+hard sigmoid const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const_number = np.prod(shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + args = dict() + if alpha is not None: + args['alpha'] = alpha + if beta is not None: + args['beta'] = beta + node_def = onnx.helper.make_node( + 'HardSigmoid', + inputs=['const1'], + outputs=['sigmoid1'], + **args + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'sigmoid1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.clip(constant * (alpha if alpha is not None else 0.2) + (beta if beta is not None else 0.5), 0, 1) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[1, 2], alpha=None, beta=None), + dict(shape=[2, 3, 4, 5, 6], alpha=None, beta=0.7)] + + test_data = [ + dict(shape=[10, 12], alpha=None, beta=None), + dict(shape=[8, 10, 12], alpha=None, beta=None), + dict(shape=[6, 8, 10, 12], alpha=None, beta=None), + dict(shape=[4, 6, 8, 10, 12], alpha=None, beta=None), + dict(shape=[10, 12], alpha=0.3, beta=None), + dict(shape=[8, 10, 12], alpha=0.3, beta=None), + dict(shape=[6, 8, 10, 12], alpha=0.3, beta=None), + dict(shape=[4, 6, 8, 10, 12], alpha=0.3, beta=None), + dict(shape=[10, 12], alpha=None, beta=0.7), + dict(shape=[8, 10, 12], alpha=None, beta=0.7), + dict(shape=[6, 8, 10, 12], alpha=None, beta=0.7), + dict(shape=[4, 6, 8, 10, 12], alpha=None, beta=0.7), + dict(shape=[10, 12], alpha=0.1, beta=0.3), + dict(shape=[8, 10, 12], alpha=0.1, beta=0.3), + dict(shape=[6, 8, 10, 12], alpha=0.1, beta=0.3), + dict(shape=[4, 6, 8, 10, 12], alpha=0.1, beta=0.3)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_hard_sigmoid(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.nightly + def test_hard_sigmoid_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_hard_sigmoid_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_identity.py b/tests/layer_tests/onnx_tests/test_identity.py new file mode 100644 index 00000000000..e7d8d8acd65 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_identity.py @@ -0,0 +1,183 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestIdentity(Caffe2OnnxLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Identity->Sigmoid->Output => Input->sigmoid + + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = helper.make_node( + 'Identity', + inputs=['input'], + outputs=['identity'] + ) + + sigmoid_def = helper.make_node( + 'Sigmoid', + inputs=['identity'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def, sigmoid_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'sigmoid': {'kind': 'op', 'type': 'Sigmoid'}, + 'sigmoid_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'sigmoid'), + ('sigmoid', 'sigmoid_data'), + ('sigmoid_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+identity on const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + constant = np.random.randint(-127, 127, shape).astype(np.float) + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = helper.make_node( + 'Identity', + inputs=['const1'], + outputs=['identity'] + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'identity'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_identity(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_identity_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_image_scaler.py b/tests/layer_tests/onnx_tests/test_image_scaler.py new file mode 100644 index 00000000000..ee4f3490b7d --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_image_scaler.py @@ -0,0 +1,161 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestImageScaler(Caffe2OnnxLayerTest): + def create_net(self, shape, scale, ir_version): + """ + ONNX net IR net + + Input->ImageScaler->Output => Input->ScaleShift(Power) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + bias = np.random.randint(-10, 10, shape[1]).astype(np.float) + + node_def = onnx.helper.make_node( + 'ImageScaler', + inputs=['input'], + outputs=['output'], + bias=bias, + scale=scale + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + def create_net_const(self, shape, scale, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+scaled const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randint(-127, 127, shape).astype(np.float) + bias = np.random.randint(-10, 10, shape[1]).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'ImageScaler', + inputs=['const1'], + outputs=['scale'], + bias=bias, + scale=scale + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'scale'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ir_const = constant * scale + np.expand_dims(np.expand_dims([bias], 2), 3) + if precision == 'FP16': + ir_const = ir_const.astype(np.float16) + + ref_net = None + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[2, 4, 6, 8], scale=4.5), + dict(shape=[1, 1, 10, 12], scale=0.5)] + + test_data = [dict(shape=[1, 1, 10, 12], scale=0.5), + dict(shape=[1, 3, 10, 12], scale=1.5), + dict(shape=[6, 8, 10, 12], scale=4.5)] + + @pytest.mark.parametrize("params", test_data_precommit) + def test_image_scaler_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_image_scaler(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + def test_image_scaler_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_image_scaler_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_instance_normalization.py b/tests/layer_tests/onnx_tests/test_instance_normalization.py new file mode 100644 index 00000000000..d7467f0951b --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_instance_normalization.py @@ -0,0 +1,111 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestInstanceNormalization(OnnxRuntimeLayerTest): + def create_net(self, shape, epsilon, precision, ir_version): + """ + ONNX net IR net + + Input->InstanceNormalization->Output => Input->MVN->ScaleShift(Power) + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + scale_const = np.random.randn(shape[1]).astype(np.float) + bias_const = np.random.randn(shape[1]).astype(np.float) + + node_scale_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['scale'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=scale_const.shape, + vals=scale_const.flatten(), + ), + ) + + node_bias_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['bias'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=bias_const.shape, + vals=bias_const.flatten(), + ), + ) + + args = dict() + if epsilon: + args['epsilon'] = epsilon + node_def = helper.make_node( + 'InstanceNormalization', + inputs=['input', 'scale', 'bias'], + outputs=['output'], + **args + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_scale_def, node_bias_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[1, 1, 4, 6], epsilon=0.001), + dict(shape=[1, 1, 2, 4, 6], epsilon=0.001)] + + test_data = [ + dict(shape=[1, 1, 4, 6], epsilon=None), + dict(shape=[1, 1, 4, 6], epsilon=0.001), + dict(shape=[1, 2, 4, 6], epsilon=None), + dict(shape=[1, 2, 4, 6], epsilon=0.001), + dict(shape=[2, 3, 4, 6], epsilon=None), + dict(shape=[2, 3, 4, 6], epsilon=0.001), + dict(shape=[1, 1, 2, 4, 6], epsilon=None), + dict(shape=[1, 1, 2, 4, 6], epsilon=0.001), + dict(shape=[1, 2, 4, 6, 6], epsilon=None), + dict(shape=[1, 2, 4, 6, 6], epsilon=0.001), + dict(shape=[2, 3, 4, 6, 6], epsilon=None), + dict(shape=[2, 3, 4, 6, 6], epsilon=0.001)] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_instance_normalization(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_instance_normalization(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_leaky_relu.py b/tests/layer_tests/onnx_tests/test_leaky_relu.py new file mode 100644 index 00000000000..985ff27b801 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_leaky_relu.py @@ -0,0 +1,204 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestLeakyRelu(Caffe2OnnxLayerTest): + def create_net(self, shape, alpha, ir_version): + """ + ONNX net IR net + + Input->LeakyRelu->Output => Input->ReLU + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'LeakyRelu', + inputs=['input'], + outputs=['output'], + alpha=alpha + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'alpha_indata': {'value': alpha, 'kind': 'data'}, + 'alpha': {'kind': 'op', 'type': 'Const'}, + 'alpha_data': {'shape': [1], 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'PReLU'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('alpha_indata', 'alpha'), + ('alpha', 'alpha_data'), + ('alpha_data', 'node'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape, alpha, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+LeakyRelu const)->Output => Input->Concat(+ReLU const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const_number = np.prod(shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'LeakyRelu', + inputs=['const1'], + outputs=['lrelu1'], + alpha=alpha + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'lrelu1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.clip(constant, 0, np.inf) + np.clip(constant, -np.inf, 0) * alpha + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'const_indata': {'shape': None, 'kind': 'data'}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('const_indata', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[1, 2], alpha=0.1), + dict(shape=[2, 3, 4, 5, 6], alpha=4.5)] + + test_data = [ + dict(shape=[10, 12], alpha=0.1), + dict(shape=[8, 10, 12], alpha=0.9), + dict(shape=[6, 8, 10, 12], alpha=1.5), + dict(shape=[4, 6, 8, 10, 12], alpha=4.5)] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_leaky_relu_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_leaky_relu(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_leaky_relu_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_leaky_relu_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_log.py b/tests/layer_tests/onnx_tests/test_log.py new file mode 100644 index 00000000000..1b284b211ef --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_log.py @@ -0,0 +1,193 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestLog(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.rand(*(inputs_dict[input])).astype(np.float32) * 255 + 0.5 + return inputs_dict + + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Log->Output => Input->Log + + """ + + # + # Create ONNX model + # + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Log', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Log'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+log const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.rand(*shape).astype(np.float) * 255 + 0.5 + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Log', + inputs=['const'], + outputs=['log'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'log'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.log(constant) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[1, 2]), + dict(shape=[2, 3, 4, 5, 6])] + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_log_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_log(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.nightly + def test_log_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_log_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_logsoftmax.py b/tests/layer_tests/onnx_tests/test_logsoftmax.py new file mode 100644 index 00000000000..6371eae1a66 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_logsoftmax.py @@ -0,0 +1,240 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from mo.front.common.partial_infer.utils import int64_array +from unit_tests.utils.graph import build_graph + + +def second_input_data_of_reshape(src_shape, axis): + if axis == 0: + return [1, -1] + if axis == 1: + return [0, -1] + if axis > 1: + return [int(np.prod(int64_array(src_shape[: axis]))), -1] + return [-1, int(np.prod(int64_array(src_shape[len(src_shape) + axis:])))] + + +def get_flatten_shape(src_shape, axis): + flatten_axis = axis if axis >= 0 else len(src_shape) + axis + if flatten_axis == 0: + fst_dim = 1 + snd_dim = int(np.prod(int64_array(src_shape))) + elif flatten_axis == 1: + fst_dim = src_shape[0] + snd_dim = int(np.prod(int64_array(src_shape[1:]))) + else: + fst_dim = int(np.prod(int64_array(src_shape[: flatten_axis]))) + snd_dim = int(np.prod(int64_array(src_shape[flatten_axis:]))) + return [fst_dim, snd_dim] + + +class TestLog(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.rand(*(inputs_dict[input])).astype(np.float32) * 255 + 0.5 + return inputs_dict + + def create_net(self, shape, logsoftmax_axis, ir_version): + """ + ONNX net IR net + + Input->LogSoftmax->Output => Input->Softmax->Log->Output + + """ + + # + # Create ONNX model + # + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'LogSoftmax', + inputs=['input'], + outputs=['output'], + axis=logsoftmax_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + converted_shape = shape if len(shape) != 1 else shape[0] + flatten_shape = get_flatten_shape(shape, logsoftmax_axis) + reshape_data_val = second_input_data_of_reshape(shape, logsoftmax_axis) + reduce_sum_shape = np.copy(flatten_shape) + reduce_sum_shape[1] = 1 + + if len(shape) == 2 and shape == flatten_shape: + ref_nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter', 'shape': converted_shape}, + 'input_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'flatten_shape_val': {'shape': int64_array(reshape_data_val).shape, + 'kind': 'data', + 'value': int64_array(reshape_data_val)}, + 'flatten_shape': {'type': 'Const', 'kind': 'op', 'shape': 2}, + 'flatten_shape_data': {'shape': int64_array([2]), 'kind': 'data', 'value': None}, + 'reshape': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_data': {'kind': 'data', 'shape': flatten_shape, 'value': None}, + 'reduce_max_axis_val': {'shape': int64_array([1]).shape, 'kind': 'data', 'value': int64_array([1])}, + 'reduce_max_axis': {'type': 'Const', 'kind': 'op', 'shape': 1}, + 'reduce_max_axis_data': {'shape': int64_array([1]), 'kind': 'data', 'value': None}, + 'reduce_max': {'type': 'ReduceMax', 'kind': 'op', 'keep_dims': True}, + 'reduce_max_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'sub_first': {'type': 'Subtract', 'kind': 'op'}, + 'sub_first_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'reduce_sum_axis_val': {'shape': int64_array([1]).shape, 'kind': 'data', 'value': int64_array([1])}, + 'reduce_sum_axis': {'type': 'Const', 'kind': 'op', 'shape': 1}, + 'reduce_sum_axis_data': {'shape': int64_array([1]), 'kind': 'data', 'value': None}, + 'reduce_sum': {'type': 'ReduceSum', 'kind': 'op', 'keep_dims': True}, + 'reduce_sum_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'exp': {'type': 'Exp', 'kind': 'op'}, + 'exp_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'log': {'type': 'Log', 'kind': 'op'}, + 'log_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'sub_second': {'type': 'Subtract', 'kind': 'op'}, + 'sub_second_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_edges = [ + ('input', 'input_data'), + ('flatten_shape_val', 'flatten_shape'), + ('flatten_shape', 'flatten_shape_data'), + ('flatten_shape_data', 'reshape', {'in': 1}), + ('input_data', 'reshape', {'in': 0}), + ('reshape', 'reshape_data'), + ('reduce_max_axis_val', 'reduce_max_axis'), + ('reduce_max_axis', 'reduce_max_axis_data'), + ('reduce_max_axis_data', 'reduce_max', {'in': 1}), + ('reduce_max', 'reduce_max_data'), + ('reshape_data', 'reduce_max', {'out': 0, 'in': 0}), + ('reshape_data', 'sub_first', {'out': 0, 'in': 0}), + ('reduce_max_data', 'sub_first', {'in': 1}), + ('sub_first', 'sub_first_data'), + ('reduce_sum_axis_val', 'reduce_sum_axis'), + ('reduce_sum_axis', 'reduce_sum_axis_data'), + ('reduce_sum_axis_data', 'reduce_sum', {'in': 1}), + ('reduce_sum', 'reduce_sum_data'), + ('sub_first_data', 'exp'), + ('exp', 'exp_data'), + ('exp_data', 'reduce_sum', {'in': 0}), + ('reduce_sum_data', 'log'), + ('log', 'log_data'), + ('log_data', 'sub_second', {'in': 1}), + ('sub_second', 'sub_second_data'), + ('sub_first_data', 'sub_second', {'out': 0, 'in': 0}), + ('sub_second_data', 'result'), + ] + else: + ref_nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter', 'shape': converted_shape}, + 'input_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'flatten_shape_val': {'shape': int64_array(reshape_data_val).shape, + 'kind': 'data', + 'value': int64_array(reshape_data_val)}, + 'flatten_shape': {'type': 'Const', 'kind': 'op', 'shape': 2}, + 'flatten_shape_data': {'shape': int64_array([2]), 'kind': 'data', 'value': None}, + 'reshape': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_data': {'kind': 'data', 'shape': flatten_shape, 'value': None}, + 'reduce_max_axis_val': {'shape': int64_array([1]).shape, 'kind': 'data', 'value': int64_array([1])}, + 'reduce_max_axis': {'type': 'Const', 'kind': 'op', 'shape': 1}, + 'reduce_max_axis_data': {'shape': int64_array([1]), 'kind': 'data', 'value': None}, + 'reduce_max': {'type': 'ReduceMax', 'kind': 'op', 'keep_dims': True}, + 'reduce_max_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'sub_first': {'type': 'Subtract', 'kind': 'op'}, + 'sub_first_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'reduce_sum_axis_val': {'shape': int64_array([1]).shape, 'kind': 'data', 'value': int64_array([1])}, + 'reduce_sum_axis': {'type': 'Const', 'kind': 'op', 'shape': 1}, + 'reduce_sum_axis_data': {'shape': int64_array([1]), 'kind': 'data', 'value': None}, + 'reduce_sum': {'type': 'ReduceSum', 'kind': 'op', 'keep_dims': True}, + 'reduce_sum_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'exp': {'type': 'Exp', 'kind': 'op'}, + 'exp_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'log': {'type': 'Log', 'kind': 'op'}, + 'log_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'sub_second': {'type': 'Subtract', 'kind': 'op'}, + 'sub_second_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'last_shape_val': {'shape': int64_array(shape).shape, 'kind': 'data', 'value': int64_array(shape)}, + 'last_shape': {'type': 'Const', 'kind': 'op', 'shape': len(shape)}, + 'last_shape_data': {'shape': int64_array([len(shape)]), 'kind': 'data', 'value': None}, + 'last_reshape': {'kind': 'op', 'type': 'Reshape'}, + 'last_reshape_data': {'kind': 'data', 'shape': shape, 'value': None}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_edges = [ + ('input', 'input_data'), + ('flatten_shape_val', 'flatten_shape'), + ('flatten_shape', 'flatten_shape_data'), + ('flatten_shape_data', 'reshape', {'in': 1}), + ('input_data', 'reshape', {'in': 0}), + ('reshape', 'reshape_data'), + ('reduce_max_axis_val', 'reduce_max_axis'), + ('reduce_max_axis', 'reduce_max_axis_data'), + ('reduce_max_axis_data', 'reduce_max', {'in': 1}), + ('reduce_max', 'reduce_max_data'), + ('reshape_data', 'reduce_max', {'out': 0, 'in': 0}), + ('reshape_data', 'sub_first', {'out': 0, 'in': 0}), + ('reduce_max_data', 'sub_first', {'in': 1}), + ('sub_first', 'sub_first_data'), + ('reduce_sum_axis_val', 'reduce_sum_axis'), + ('reduce_sum_axis', 'reduce_sum_axis_data'), + ('reduce_sum_axis_data', 'reduce_sum', {'in': 1}), + ('reduce_sum', 'reduce_sum_data'), + ('sub_first_data', 'exp'), + ('exp', 'exp_data'), + ('exp_data', 'reduce_sum', {'in': 0}), + ('reduce_sum_data', 'log'), + ('log', 'log_data'), + ('log_data', 'sub_second', {'in': 1}), + ('sub_second', 'sub_second_data'), + ('sub_first_data', 'sub_second', {'out': 0, 'in': 0}), + ('last_shape_val', 'last_shape'), + ('last_shape', 'last_shape_data'), + ('last_shape_data', 'last_reshape', {'in': 1}), + ('sub_second_data', 'last_reshape', {'in': 0}), + ('last_reshape', 'last_reshape_data'), + ('last_reshape_data', 'result'), + ] + + ref_net = build_graph(ref_nodes_attributes, ref_edges) + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[2, 4], logsoftmax_axis=-1), + dict(shape=[2, 3, 2, 5, 6], logsoftmax_axis=-2)] + + test_data = [ + dict(shape=[10, 12], logsoftmax_axis=-1), + dict(shape=[4, 5, 3], logsoftmax_axis=1), + dict(shape=[6, 8, 5, 7], logsoftmax_axis=2), + dict(shape=[2, 3, 2, 5, 6], logsoftmax_axis=-2)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_log(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_lrn.py b/tests/layer_tests/onnx_tests/test_lrn.py new file mode 100644 index 00000000000..66104f467b9 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_lrn.py @@ -0,0 +1,133 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestLRN(OnnxRuntimeLayerTest): + def create_net(self, shape, alpha, beta, bias, size, ir_version): + """ + ONNX net IR net + + Input->LRN->Output => Input->Norm->Power + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + args = dict(size=size) + if alpha: + args['alpha'] = alpha + if beta: + args['beta'] = beta + if bias: + args['bias'] = bias + node_def = onnx.helper.make_node( + 'LRN', + inputs=['input'], + outputs=['output'], + **args + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + if not alpha: + alpha = 0.0001 + if not beta: + beta = 0.75 + if not bias: + bias = 1.0 + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'const_indata': {'value': [1], 'kind': 'data'}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': [1], 'kind': 'data'}, + 'norm': {'kind': 'op', 'type': 'LRN', 'alpha': alpha / bias, 'beta': beta, 'bias': bias, + 'size': size}, # 'region': 'across' + 'norm_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + edges = [('input', 'input_data'), + ('input_data', 'norm'), + ('const_indata', 'const'), + ('const', 'const_data'), + ('const_data', 'norm'), + ('norm', 'norm_data'), + ('norm_data', 'result') + ] + + ref_net = build_graph(nodes_attributes, edges) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[2, 12], alpha=None, beta=None, bias=None, size=1), + pytest.param(dict(shape=[2, 3, 12], alpha=0.0002, beta=0.5, bias=2.0, size=3), + marks=pytest.mark.skip(reason="Skipped until fixed")), + dict(shape=[2, 3, 12], alpha=0.0002, beta=0.5, bias=2.0, size=3), + dict(shape=[2, 3, 12], alpha=0.0002, beta=0.5, bias=2.0, size=3)] + + test_data = [ + dict(shape=[2, 12], alpha=None, beta=None, bias=None, size=1), + dict(shape=[2, 12], alpha=0.0002, beta=0.5, bias=2.0, size=1), + dict(shape=[2, 3, 12], alpha=None, beta=None, bias=None, size=3), + dict(shape=[2, 3, 12], alpha=0.0002, beta=0.5, bias=2.0, size=1), + dict(shape=[2, 3, 12], alpha=0.0002, beta=0.5, bias=2.0, size=3), + dict(shape=[2, 3, 8, 10, 12], alpha=None, beta=None, bias=None, size=3), + dict(shape=[2, 3, 8, 10, 12], alpha=0.0002, beta=0.5, bias=2.0, size=1), + dict(shape=[2, 3, 8, 10, 12], alpha=0.0002, beta=0.5, bias=2.0, size=3)] + + test_data_4D = [ + dict(shape=[2, 3, 10, 12], alpha=None, beta=None, bias=None, size=3), + dict(shape=[2, 3, 10, 12], alpha=0.0002, beta=0.5, bias=2.0, size=1), + dict(shape=[2, 3, 10, 12], alpha=0.0002, beta=0.5, bias=2.0, size=3)] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_lrn_precommit(self, params, ie_device, precision, ir_version, temp_dir): + # onnxruntime only supports 4D tensors for LRN + self.skip_framework = True + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_lrn(self, params, ie_device, precision, ir_version, temp_dir): + # onnxruntime only supports 4D tensors for LRN + self.skip_framework = True + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_lrn_4D(self, params, ie_device, precision, ir_version, temp_dir): + self.skip_framework = False + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_lstm.py b/tests/layer_tests/onnx_tests/test_lstm.py new file mode 100644 index 00000000000..e142d0070a4 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_lstm.py @@ -0,0 +1,160 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestLSTM(Caffe2OnnxLayerTest): + skip_framework = True + + def create_lstm(self, direction: str, cell_type: str, hidden_size=128): + """ + ONNX net + + Input->LSTM->Output => Only accuracy check + + """ + + # Create ONNX model + + import onnx + from onnx import helper + from onnx import TensorProto + + assert cell_type in ['LSTM', 'RNN', 'GRU'] + assert direction in ['forward', 'reverse', 'bidirectional'] + n_gates = {'LSTM': 4, 'RNN': 1, 'GRU': 3} + M = n_gates[cell_type] + + seq_len = 10 + batch_size = 4 + input_size = 64 + num_direction = 1 if direction in ["forward", "reverse"] else 2 + + input_shape = [seq_len, batch_size, input_size] + output_shape = [seq_len, num_direction, batch_size, hidden_size] + + w_shape = [num_direction, M * hidden_size, input_size] + r_shape = [num_direction, M * hidden_size, hidden_size] + + init_h_shape = [num_direction, batch_size, hidden_size] + init_c_shape = [num_direction, batch_size, hidden_size] + + init_h_value = np.ones(init_h_shape, dtype=np.float32) + init_c_value = np.ones(init_c_shape, dtype=np.float32) + + w_value = np.ones(w_shape, dtype=np.float32) + r_value = np.ones(r_shape, dtype=np.float32) + + # Creating LSTM Operation + x = helper.make_tensor_value_info('X', TensorProto.FLOAT, input_shape) + y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, None) + + w = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['W'], + value=onnx.helper.make_tensor( + name='const_tensor', + data_type=onnx.TensorProto.FLOAT, + dims=w_value.shape, + vals=w_value.flatten().astype(float), + ), + ) + + r = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['R'], + value=onnx.helper.make_tensor( + name='const_tensor', + data_type=onnx.TensorProto.FLOAT, + dims=r_value.shape, + vals=r_value.flatten().astype(float), + ), + ) + + init_h = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['init_h'], + value=onnx.helper.make_tensor( + name='const_tensor', + data_type=onnx.TensorProto.FLOAT, + dims=init_h_value.shape, + vals=init_h_value.flatten().astype(float), + ), + ) + + inputs = ['X', 'W', 'R', '', '', 'init_h'] + + if cell_type == 'LSTM': + init_c = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['init_c'], + value=onnx.helper.make_tensor( + name='const_tensor', + data_type=onnx.TensorProto.FLOAT, + dims=init_c_value.shape, + vals=init_c_value.flatten().astype(float), + ), + ) + + inputs.append('init_c') + + node_lstm = onnx.helper.make_node( + cell_type, + inputs=inputs, + outputs=['', 'Y'], + hidden_size=hidden_size, + direction=direction, + ) + + # Create the graph (GraphProto) + if cell_type == 'LSTM': + graph_def = helper.make_graph( + [w, r, init_h, init_c, node_lstm], + 'test_lstm', + [x], + [y], + ) + else: + graph_def = helper.make_graph( + [w, r, init_h, node_lstm], + 'test_lstm', + [x], + [y], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_{}_model'.format(cell_type)) + + # We do not create reference graph, as it's too complicated to construct it + # Moreover, IR reader do not support TensorIterator layers + # So we return None to skip IR comparision + + return onnx_net, None + + @pytest.mark.precommit + @pytest.mark.parametrize('direction', ["forward", "bidirectional", "reverse"]) + @pytest.mark.parametrize('cell_type', ["LSTM", "GRU", "RNN"]) + def test_lstm_simple_precommit(self, direction, cell_type, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_lstm(direction, cell_type), ie_device, precision, ir_version, temp_dir=temp_dir) + + # LSTM/RNN/GRU Sequence Generation + @pytest.mark.parametrize('direction', ["forward", "bidirectional", "reverse"]) + @pytest.mark.parametrize('cell_type', ["LSTM", "GRU", "RNN"]) + def test_lstm_sequence_generate(self, direction, cell_type, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_lstm(direction, cell_type), ie_device, precision, ir_version, + disabled_transforms='lstm_to_tensor_iterator,gru_and_rnn_to_tensor_iterator', temp_dir=temp_dir) + + # TODO: add more params for nightly + @pytest.mark.nightly + @pytest.mark.parametrize('direction', ["forward", "bidirectional", "reverse"]) + @pytest.mark.parametrize('cell_type', ["LSTM", "GRU", "RNN"]) + def test_lstm_nightly(self, direction, cell_type, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_lstm(direction, cell_type), ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_matmul.py b/tests/layer_tests/onnx_tests/test_matmul.py new file mode 100644 index 00000000000..d9fd2458809 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_matmul.py @@ -0,0 +1,190 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestMatMul(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape1, shape2, precision, ir_version): + """ + ONNX net IR net + + Input->MatMul with const->Output => Input->FullyConnected + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + max_len = max([len(shape1), len(shape2)]) + extended_shape1 = np.concatenate([np.ones(max_len - len(shape1)), shape1], axis=0) + extended_shape2 = np.concatenate([np.ones(max_len - len(shape2)), shape2], axis=0) + output_shape = np.concatenate( + [np.maximum(*[extended_shape1[0:-2], extended_shape2[0:-2]]), [shape1[-2], shape2[-1]]], + axis=0).astype(np.int).tolist() + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape1) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const = np.random.randn(*shape2).astype(np.float32) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'MatMul', + inputs=['input', 'const'], + outputs=['mm_output'] + ) + + # to avoid mapping problems + node_elu_def = onnx.helper.make_node( + 'Elu', + inputs=['mm_output'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_elu_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + if precision == 'FP16': + const = const.astype(np.float16) + ref_net = None + + return onnx_net, ref_net + + def create_dual_net(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->MatMul->Output => Input->Concat + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + max_len = max([len(shape1), len(shape2)]) + extended_shape1 = np.concatenate([np.ones(max_len - len(shape1)), shape1], axis=0) + extended_shape2 = np.concatenate([np.ones(max_len - len(shape2)), shape2], axis=0) + output_shape = np.concatenate( + [np.maximum(*[extended_shape1[0:-2], extended_shape2[0:-2]]), [shape1[-2], shape2[-1]]], + axis=0).astype(np.int).tolist() + input1 = helper.make_tensor_value_info('input1', TensorProto.FLOAT, shape1) + input2 = helper.make_tensor_value_info('input2', TensorProto.FLOAT, shape2) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_def = onnx.helper.make_node( + 'MatMul', + inputs=['input1', 'input2'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input1, input2], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + test_data = [ + dict(shape1=[4, 6], shape2=[6, 4]), + dict(shape1=[1, 4, 6], shape2=[1, 6, 4]), + dict(shape1=[2, 4, 6], shape2=[2, 6, 4]), + dict(shape1=[1, 1, 4, 6], shape2=[1, 1, 6, 4]), + dict(shape1=[1, 2, 4, 6], shape2=[1, 2, 6, 4]), + dict(shape1=[2, 3, 4, 6], shape2=[2, 3, 6, 4]), + dict(shape1=[2, 3, 4, 4, 6], shape2=[2, 3, 4, 6, 4]) + ] + + test_data_broadcasting = [ + dict(shape1=[1, 4, 6], shape2=[6, 4]), + dict(shape1=[2, 4, 6], shape2=[6, 4]), + dict(shape1=[2, 4, 6], shape2=[1, 6, 4]), + dict(shape1=[1, 1, 4, 6], shape2=[6, 4]), + dict(shape1=[1, 1, 4, 6], shape2=[1, 6, 4]), + dict(shape1=[1, 2, 4, 6], shape2=[6, 4]), + dict(shape1=[1, 2, 4, 6], shape2=[2, 6, 4]), + dict(shape1=[2, 3, 4, 6], shape2=[6, 4]), + dict(shape1=[2, 3, 4, 6], shape2=[3, 6, 4]), + dict(shape1=[2, 3, 4, 6], shape2=[1, 3, 6, 4]), + dict(shape1=[2, 3, 4, 4, 6], shape2=[6, 4]), + dict(shape1=[2, 3, 4, 4, 6], shape2=[4, 6, 4]), + dict(shape1=[2, 3, 4, 4, 6], shape2=[3, 4, 6, 4]) + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_matmul(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_broadcasting) + @pytest.mark.nightly + def test_matmul_bc(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_dual_matmul(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_dual_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_broadcasting) + @pytest.mark.nightly + def test_dual_matmul_bc(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_dual_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_mean_variance_normalization.py b/tests/layer_tests/onnx_tests/test_mean_variance_normalization.py new file mode 100644 index 00000000000..eb9d5e3b262 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_mean_variance_normalization.py @@ -0,0 +1,73 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestMeanVarianceNormalization(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape, axes, ir_version): + """ + ONNX net IR net + + Input->MeanVarianceNormalization->Output => Input->MVN + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'MeanVarianceNormalization', + inputs=['input'], + outputs=['output'], + axes=axes + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output] + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + test_data = [ + dict(shape=[7, 2, 3, 5], axes=[2, 3]), + dict(shape=[7, 2, 3, 5], axes=[1, 2, 3]), + dict(shape=[7, 2, 3, 5, 11], axes=[2, 3, 4]), + dict(shape=[7, 2, 3, 5, 11], axes=[1, 2, 3, 4]) + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_mvn(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_neg.py b/tests/layer_tests/onnx_tests/test_neg.py new file mode 100644 index 00000000000..3dd2e0106fd --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_neg.py @@ -0,0 +1,91 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestNeg(Caffe2OnnxLayerTest): + def create_neg(self, shape, ir_version): + """ + ONNX net IR net + + Input->Neg->Output => Input->Power(scale=-1, shift=0, power=1) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_reduce_mean_def = onnx.helper.make_node( + 'Neg', + inputs=['input'], + outputs=['output'], + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_reduce_mean_def], + 'test_neg_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_neg_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'neg': {'kind': 'op', 'type': 'Negative'}, + 'neg_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'neg'), + ('neg', 'neg_data'), + ('neg_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[2, 3, 4]), + dict(shape=[1, 3, 124, 124])] + + test_data = [dict(shape=[1, 64]), + dict(shape=[2, 3, 4]), + dict(shape=[1, 3, 124, 124]), + ] + + @pytest.mark.parametrize('params', test_data_precommit) + @pytest.mark.precommit + def test_neg_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_neg(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize('params', test_data) + @pytest.mark.nightly + def test_neg(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_neg(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_non_zero.py b/tests/layer_tests/onnx_tests/test_non_zero.py new file mode 100644 index 00000000000..da83e8e7e26 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_non_zero.py @@ -0,0 +1,194 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestNonZero(Caffe2OnnxLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->NonZero->Output => Input->NonZero->Result + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'NonZero', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'NonZero', 'version': 'opset3', 'output_type': 'i64'}, + 'node_data': {'shape': [len(shape), np.prod(shape)], 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + return onnx_net, ref_net + + def create_net_const(self, input_value, output_value, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+NonZero const)->Output => Input->Concat(+const)->Result + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = list(output_value.shape) + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_value.shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=input_value.shape, + vals=input_value.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'NonZero', + inputs=['const1'], + outputs=['nonzero1'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'nonzero1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': output_value.shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': output_value.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': output_value.shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [ + dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12]) + ] + + test_const_data = [ + dict( + input_value=np.array([3, 0, 0, 0, 4, 0, 5, 6, 0]).reshape((3, 3)), + output_value=np.array([0, 1, 2, 2, 0, 1, 0, 1]).reshape(2, 4), + ), + dict( + input_value=np.array([0, 1, 0, 1]).reshape((4)), + output_value=np.array([1, 3]).reshape((1, 2)), + ), + dict( + input_value=np.array([0, 1, 0, 1, 1, 0, 1, 0]).reshape((2, 4)), + output_value=np.array([0, 0, 1, 1, 1, 3, 0, 2]).reshape((2, 4)), + ), + dict( + input_value=np.array([1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0]).reshape((2, 3, 3)), + output_value=np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 2, 2, 2, + 0, 0, 0, 1, 1, 2, 0, 2, 1, 0, 1, 2, 0, 1, 2, 0, 2, 1]).reshape((3, 12)), + ), + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_non_zero(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_const_data) + @pytest.mark.nightly + def test_non_zero_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_not.py b/tests/layer_tests/onnx_tests/test_not.py new file mode 100644 index 00000000000..da840eef28e --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_not.py @@ -0,0 +1,190 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestNot(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(0, 2, inputs_dict[input]).astype(np.bool) + return inputs_dict + + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Not->Output => Input->LogicalNot + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape) + + node_def = onnx.helper.make_node( + 'Not', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalNot'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, ir_version): + """ + ONNX net IR net + + Input->Concat(+not const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, output_shape) + + constant = np.random.randint(0, 2, shape).astype(np.bool) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Not', + inputs=['const1'], + outputs=['not'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'not'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.logical_not(constant) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[2, 3, 4]), + dict(shape=[2, 4, 6, 8, 10])] + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_not_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_not(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_not_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_or.py b/tests/layer_tests/onnx_tests/test_or.py new file mode 100644 index 00000000000..8580f7e8bfd --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_or.py @@ -0,0 +1,277 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestOr(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(0, 2, inputs_dict[input]).astype(np.bool) + return inputs_dict + + def create_net(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Or with 2nd input->Output => Input->LogicalOr + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input1 = helper.make_tensor_value_info('input1', TensorProto.BOOL, shape1) + input2 = helper.make_tensor_value_info('input2', TensorProto.BOOL, shape2) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape1) + + node_def = helper.make_node( + 'Or', + inputs=['input1', 'input2'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input1, input2], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input1': {'kind': 'op', 'type': 'Parameter'}, + 'input1_data': {'shape': shape1, 'kind': 'data'}, + 'input2': {'kind': 'op', 'type': 'Parameter'}, + 'input2_data': {'shape': shape2, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalOr'}, + 'node_data': {'shape': shape1, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input1', 'input1_data'), + ('input2', 'input2_data'), + ('input1_data', 'node'), + ('input2_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_one_const(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Or with const->Output => Input->LogicalOr + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape1) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape1) + + const = np.random.randint(0, 2, shape2).astype(np.bool) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = helper.make_node( + 'Or', + inputs=['input', 'const'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape1, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': const.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': const.shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalOr'}, + 'node_data': {'shape': shape1, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Concat with const or const->Output => Input->Concat + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = list(shape1) + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape1) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, output_shape) + + const1 = np.random.randint(0, 2, shape1).astype(np.bool) + const2 = np.random.randint(0, 2, shape2).astype(np.bool) + + node_const1_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const1.shape, + vals=const1.flatten(), + ), + ) + + node_const2_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const2.shape, + vals=const2.flatten(), + ), + ) + + node_def = helper.make_node( + 'Or', + inputs=['const1', 'const2'], + outputs=['node_out'] + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'node_out'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const1_def, node_const2_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + constant_calculated = np.logical_or(const1, const2) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': const1.shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant_calculated.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': const1.shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape1=[2, 3, 4], shape2=[2, 3, 4]), + dict(shape1=[2, 4, 6, 8, 10], shape2=[2, 4, 6, 8, 10])] + + test_data = [dict(shape1=[4, 6], shape2=[4, 6]), + dict(shape1=[4, 6, 8], shape2=[4, 6, 8]), + dict(shape1=[4, 6, 8, 10], shape2=[4, 6, 8, 10]), + dict(shape1=[4, 6, 8, 10, 12], shape2=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_or_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_or(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_or_one_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_one_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_or_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_pad.py b/tests/layer_tests/onnx_tests/test_pad.py new file mode 100644 index 00000000000..56c18702871 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_pad.py @@ -0,0 +1,212 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestPad(OnnxRuntimeLayerTest): + def create_net(self, shape, mode, pads, value, ir_version, opset=None): + """ + ONNX net IR net + + Input->Pad->Output => Input->Pad + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + _pads = np.array(pads).reshape([2, -1]) + output_shape = (np.array(shape) + _pads[0, :] + _pads[1, :]).tolist() + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + nodes = [] + if opset is not None and opset < 11: + args = dict(pads=pads) + if mode: + args['mode'] = mode + if value: + args['value'] = value + node_def = onnx.helper.make_node( + 'Pad', + inputs=['input'], + outputs=['pad'], + **args + ) + nodes.append(node_def) + else: + node_pads_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['pads'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(pads)], + vals=pads, + ), + ) + + inputs = ['input', 'pads'] + if value is not None: + node_value_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['value'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=[], + vals=[value], + ), + ) + inputs.append('value') + nodes.append(node_value_def) + + args = dict() + if mode: + args['mode'] = mode + node_def = onnx.helper.make_node( + 'Pad', + inputs=inputs, + outputs=['pad'], + **args + ) + nodes.extend([node_pads_def, node_def]) + + sigmoid_def = onnx.helper.make_node( + 'Elu', + inputs=['pad'], + outputs=['output'] + ) + nodes.append(sigmoid_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'pads_begin_indata': {'value': _pads[0, :], 'kind': 'data'}, + 'pads_begin': {'kind': 'op', 'type': 'Const'}, + 'pads_begin_data': {'shape': [len(_pads[0, :])], 'kind': 'data'}, + 'pads_end_indata': {'value': _pads[1, :], 'kind': 'data'}, + 'pads_end': {'kind': 'op', 'type': 'Const'}, + 'pads_end_data': {'shape': [len(_pads[1, :])], 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Pad', 'pad_mode': 'constant' if not mode else mode}, + 'node_data': {'shape': output_shape, 'kind': 'data'}, + 'elu': {'kind': 'op', 'type': 'Elu'}, + 'elu_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + edges = [('input', 'input_data'), + ('input_data', 'node'), + ('pads_begin_indata', 'pads_begin'), + ('pads_begin', 'pads_begin_data'), + ('pads_begin_data', 'node'), + ('pads_end_indata', 'pads_end'), + ('pads_end', 'pads_end_data'), + ('pads_end_data', 'node'), + ('node', 'node_data'), + ('node_data', 'elu'), + ('elu', 'elu_data'), + ('elu_data', 'result') + ] + + if mode in (None, "constant"): + nodes_attributes.update({'const_node_indata': {'value': value, 'kind': 'data'}, + 'const_node': {'kind': 'op', 'type': 'Const'}, + 'const_node_data': {'shape': None, 'kind': 'data'} + }) + edges += [('const_node_indata', 'const_node'), + ('const_node', 'const_node_data'), + ('const_node_data', 'node') + ] + + ref_net = build_graph(nodes_attributes, edges) + + return onnx_net, ref_net + + test_data_precommit = [ + pytest.param(dict(shape=[6, 8, 10, 12], pads=[1, 2, 3, 4, 5, 6, 7, 8]), + marks=pytest.mark.skip(reason="Skipped until fixed")), + pytest.param(dict(shape=[8, 10, 12, 14, 16], pads=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + marks=pytest.mark.skip(reason="Skipped until fixed"))] + + test_data = [dict(shape=[10, 12], pads=[1, 1, 1, 1]), + dict(shape=[10, 12], pads=[1, 2, 3, 4]), + dict(shape=[8, 10, 12], pads=[0, 0, 1, 0, 0, 1]), + dict(shape=[8, 10, 12], pads=[1, 2, 3, 4, 5, 6]), + dict(shape=[6, 8, 10, 12], pads=[0, 0, 1, 1, 0, 0, 1, 1]), + dict(shape=[6, 8, 10, 12], pads=[0, 0, 1, 2, 0, 0, 3, 4]), + dict(shape=[6, 8, 10, 12], pads=[1, 1, 1, 1, 1, 1, 1, 1]), + dict(shape=[6, 8, 10, 12], pads=[1, 2, 3, 4, 5, 6, 7, 8]), + dict(shape=[8, 10, 12, 14, 16], pads=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), + dict(shape=[8, 10, 12, 14, 16], pads=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("mode_value", [(None, None), + (None, 0.5), + ('constant', None), + ('constant', 0.5), + ('reflect', None), + ('edge', None)]) + @pytest.mark.nightly + def test_pad_opset_9(self, params, mode_value, ie_device, precision, ir_version, temp_dir): + mode, value = mode_value + self._test(*self.create_net(**params, mode=mode, value=value, ir_version=ir_version, opset=9), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("mode_value", [(None, None), + (None, 0.5), + ('constant', None), + ('constant', 0.5), + ('reflect', None), + ('edge', None)]) + @pytest.mark.precommit + def test_pad_opset_latest_precommit(self, params, mode_value, ie_device, precision, ir_version, temp_dir): + mode, value = mode_value + self._test(*self.create_net(**params, mode=mode, value=value, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("mode_value", [(None, None), + (None, 0.5), + ('constant', None), + ('constant', 0.5), + ('reflect', None), + ('edge', None)]) + @pytest.mark.nightly + def test_pad_opset_latest(self, params, mode_value, ie_device, precision, ir_version, temp_dir): + mode, value = mode_value + self._test(*self.create_net(**params, mode=mode, value=value, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_pooling.py b/tests/layer_tests/onnx_tests/test_pooling.py new file mode 100644 index 00000000000..3213718b9ac --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_pooling.py @@ -0,0 +1,432 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +def float_array(x): + return np.array(x, dtype=np.float) + + +class TestPooling(OnnxRuntimeLayerTest): + def create_net(self, shape, kernel_shape, pads, strides, op, ir_version, count_include_pad=None, auto_pad=None, + storage_order=None, ceil=False, opset=None): + """ + ONNX net IR net + + Input->Pooling>Output => Input->Pooling + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + node_args = dict(kernel_shape=kernel_shape) + if auto_pad is not None: + node_args['auto_pad'] = auto_pad + if auto_pad == 'VALID': + pads = np.zeros(len(shape[2:]) * 2, dtype=np.int) + else: + auto_pad = 'NOTSET' + if count_include_pad is not None: + node_args['count_include_pad'] = count_include_pad + else: + count_include_pad = 0 + if storage_order is not None: + node_args['storage_order'] = storage_order + if pads is not None: + if auto_pad == 'NOTSET': + node_args['pads'] = pads + _pads = np.transpose(np.array(pads).reshape([2, -1])) + else: + _pads = np.zeros([len(kernel_shape), 2]) + if strides is not None: + node_args['strides'] = strides + else: + strides = np.ones(len(kernel_shape)) + + if ceil: + node_args['ceil_mode'] = 1 + + if auto_pad in ['SAME_UPPER', 'SAME_LOWER']: + out_spacial_shape = np.ceil(np.array(shape[2:], dtype=np.float) / strides) + else: + rounding = np.ceil if ceil else np.floor + out_spacial_shape = rounding( + (float_array(shape[2:]) + np.add(_pads[:, 0], _pads[:, 1]) - float_array(kernel_shape)) / strides + 1) + + out_shape = np.array(shape) + out_shape[2:] = out_spacial_shape + out_shape = out_shape.astype(np.int).tolist() + concat_axis = 0 + out_concat_shape = out_shape.copy() + out_concat_shape[concat_axis] *= 2 + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, out_concat_shape) + + constant = np.random.randint(-127, 127, out_shape).astype(np.float) + + node_def = onnx.helper.make_node( + op, + inputs=['input'], + outputs=['pool'], + **node_args + ) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['pool', 'const1'], + outputs=['output'], + axis=concat_axis + ) + + graph_def = helper.make_graph( + [node_def, node_const_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': None, + 'pads_begin': _pads[:, 0] if len(shape) > 3 else _pads[0, 0], + 'pads_end': _pads[:, 1] if len(shape) > 3 else _pads[0, 1], + 'kernel': kernel_shape[0] if len(kernel_shape) == 1 else kernel_shape, + 'rounding_type': 'ceil' if auto_pad != 'NOTSET' or ceil else 'floor', + 'auto_pad': None}, + 'node_data': {'shape': out_shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': out_shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': out_concat_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + if op == 'AveragePool': + nodes_attributes['node']['type'] = 'AvgPool' + nodes_attributes['node']['exclude-pad'] = 'true' if count_include_pad == 0 else 'false' + else: + nodes_attributes['node']['type'] = 'MaxPool' + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('node_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + def create_global_net(self, shape, op, ir_version): + """ + ONNX net IR net + + Input->GlobalPooling>Output => Input->Pooling + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + out_shape = np.ones(len(shape)) + out_shape[:2] = np.array(shape)[:2] + out_shape = out_shape.astype(np.int).tolist() + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, out_shape) + + node_def = onnx.helper.make_node( + op, + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_axes_data': {'kind': 'data', 'value': list(range(2, len(shape)))}, + 'axes': {'kind': 'op', 'type': 'Const'}, + 'axes_data': {'shape': [len(shape) - 2], 'kind': 'data'}, + 'node': {'kind': 'op', 'type': None}, + 'node_data': {'shape': out_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + if op == 'GlobalAveragePool': + nodes_attributes['node']['type'] = 'ReduceMean' + else: + nodes_attributes['node']['type'] = 'ReduceMax' + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('input_axes_data', 'axes'), + ('axes', 'axes_data'), + ('axes_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[2, 3, 10], kernel_shape=[2], pads=None, strides=[3]), + dict(shape=[2, 3, 30, 30], kernel_shape=[5, 5], pads=None, strides=[3, 2]), + dict(shape=[2, 3, 28, 28, 28], kernel_shape=[5, 5, 5], pads=[2, 4, 2, 0, 0, 2], strides=None), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[5, 5, 5], pads=None, strides=[3, 3, 5])] + + test_data = [ + dict(shape=[2, 3, 10], kernel_shape=[2], pads=None, strides=None), + dict(shape=[2, 3, 10], kernel_shape=[2], pads=[2, 2], strides=None), + dict(shape=[2, 3, 10], kernel_shape=[2], pads=None, strides=[3]), + dict(shape=[2, 3, 30, 30], kernel_shape=[2, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30], kernel_shape=[4, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30], kernel_shape=[2, 4], pads=None, strides=None), + dict(shape=[2, 3, 28, 28], kernel_shape=[3, 3], pads=[2, 2, 2, 2], strides=None), + dict(shape=[2, 3, 28, 28], kernel_shape=[5, 5], pads=[0, 2, 0, 4], strides=None), + dict(shape=[2, 3, 28, 28], kernel_shape=[5, 5], pads=[2, 0, 4, 0], strides=None), + dict(shape=[2, 3, 30, 30], kernel_shape=[5, 5], pads=None, strides=[3, 3]), + dict(shape=[2, 3, 30, 30], kernel_shape=[5, 5], pads=None, strides=[2, 3]), + dict(shape=[2, 3, 30, 30], kernel_shape=[5, 5], pads=None, strides=[3, 2]), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[2, 2, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[4, 2, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[2, 4, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[2, 2, 4], pads=None, strides=None), + dict(shape=[2, 3, 28, 28, 28], kernel_shape=[3, 3, 3], pads=[2, 2, 2, 2, 2, 2], strides=None), + dict(shape=[2, 3, 28, 28, 28], kernel_shape=[5, 5, 5], pads=[2, 4, 2, 0, 0, 2], strides=None), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[5, 5, 5], pads=None, strides=[3, 3, 3]), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[5, 5, 5], pads=None, strides=[5, 3, 3]), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[5, 5, 5], pads=None, strides=[3, 5, 3]), + dict(shape=[2, 3, 30, 30, 30], kernel_shape=[5, 5, 5], pads=None, strides=[3, 3, 5])] + + test_data_autopad_precommit = [ + dict(shape=[2, 3, 30, 30, 30], auto_pad='VALID', kernel_shape=[2, 2, 4], pads=None, strides=None), + dict(shape=[2, 3, 21, 21, 21], auto_pad='VALID', kernel_shape=[3, 3, 3], pads=None, strides=[3, 2, 3]), + dict(shape=[2, 3, 21, 21, 21], auto_pad='VALID', kernel_shape=[3, 3, 3], pads=None, strides=[3, 3, 2])] + + test_data_autopad = [ + dict(shape=[2, 3, 10], auto_pad='SAME_UPPER', kernel_shape=[2], pads=[0, 1], strides=[3]), + dict(shape=[2, 3, 10], auto_pad='SAME_LOWER', kernel_shape=[2], pads=[0, 1], strides=[3]), + dict(shape=[2, 3, 10], auto_pad='VALID', kernel_shape=[2], pads=None, strides=[3]), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[2, 2], pads=[0, 0, 1, 1], strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[4, 2], pads=[1, 0, 2, 1], strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[2, 4], pads=[0, 1, 1, 2], strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5], pads=[1, 1, 1, 1], strides=[3, 3]), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5], pads=[1, 1, 2, 1], strides=[2, 3]), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5], pads=[1, 1, 1, 2], strides=[3, 2]), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[2, 2], pads=[0, 0, 1, 1], strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[4, 2], pads=[1, 0, 2, 1], strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[2, 4], pads=[0, 1, 1, 2], strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5], pads=[1, 1, 1, 1], strides=[3, 3]), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5], pads=[1, 1, 2, 1], strides=[2, 3]), + dict(shape=[2, 3, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5], pads=[1, 1, 1, 2], strides=[3, 2]), + dict(shape=[2, 3, 30, 30], auto_pad='VALID', kernel_shape=[2, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='VALID', kernel_shape=[4, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30], auto_pad='VALID', kernel_shape=[2, 4], pads=None, strides=None), + dict(shape=[2, 3, 21, 21], auto_pad='VALID', kernel_shape=[3, 3], pads=None, strides=[3, 3]), + dict(shape=[2, 3, 21, 21], auto_pad='VALID', kernel_shape=[3, 3], pads=None, strides=[2, 3]), + dict(shape=[2, 3, 21, 21], auto_pad='VALID', kernel_shape=[3, 3], pads=None, strides=[3, 2]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[2, 2, 2], pads=[0, 0, 0, 1, 1, 1], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[4, 2, 2], pads=[1, 0, 0, 2, 1, 1], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[2, 4, 2], pads=[0, 1, 0, 1, 2, 1], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[2, 2, 4], pads=[0, 0, 1, 1, 1, 2], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5, 5], pads=[1, 1, 1, 1, 1, 1], + strides=[3, 3, 3]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5, 5], pads=[0, 1, 1, 0, 1, 1], + strides=[5, 3, 3]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5, 5], pads=[1, 0, 1, 1, 0, 1], + strides=[3, 5, 3]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_UPPER', kernel_shape=[5, 5, 5], pads=[1, 1, 0, 1, 1, 0], + strides=[3, 3, 5]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[2, 2, 2], pads=[0, 0, 0, 1, 1, 1], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[4, 2, 2], pads=[1, 0, 0, 2, 1, 1], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[2, 4, 2], pads=[0, 1, 0, 1, 2, 1], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[2, 2, 4], pads=[0, 0, 1, 1, 1, 2], + strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5, 5], pads=[1, 1, 1, 1, 1, 1], + strides=[3, 3, 3]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5, 5], pads=[0, 1, 1, 0, 1, 1], + strides=[5, 3, 3]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5, 5], pads=[1, 0, 1, 1, 0, 1], + strides=[3, 5, 3]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='SAME_LOWER', kernel_shape=[5, 5, 5], pads=[1, 1, 0, 1, 1, 0], + strides=[3, 3, 5]), + dict(shape=[2, 3, 30, 30, 30], auto_pad='VALID', kernel_shape=[2, 2, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='VALID', kernel_shape=[4, 2, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='VALID', kernel_shape=[2, 4, 2], pads=None, strides=None), + dict(shape=[2, 3, 30, 30, 30], auto_pad='VALID', kernel_shape=[2, 2, 4], pads=None, strides=None), + dict(shape=[2, 3, 21, 21, 21], auto_pad='VALID', kernel_shape=[3, 3, 3], pads=None, strides=[3, 3, 3]), + dict(shape=[2, 3, 21, 21, 21], auto_pad='VALID', kernel_shape=[3, 3, 3], pads=None, strides=[2, 3, 3]), + dict(shape=[2, 3, 21, 21, 21], auto_pad='VALID', kernel_shape=[3, 3, 3], pads=None, strides=[3, 2, 3]), + dict(shape=[2, 3, 21, 21, 21], auto_pad='VALID', kernel_shape=[3, 3, 3], pads=None, strides=[3, 3, 2])] + + global_test_data = [dict(shape=[2, 3, 10]), + dict(shape=[2, 3, 32, 32]), + dict(shape=[2, 3, 32, 32, 32])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("incl_pad", [None, 1]) + @pytest.mark.nightly + def test_avgpool_opset7(self, params, incl_pad, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test( + *self.create_net(**params, op='AveragePool', count_include_pad=incl_pad, ir_version=ir_version, opset=7), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_autopad) + @pytest.mark.nightly + def test_avgpool_opset7_autopad(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='AveragePool', ir_version=ir_version, opset=7), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("incl_pad", [None, 1]) + @pytest.mark.parametrize("ceil", [True, False]) + @pytest.mark.nightly + def test_avgpool_opset10(self, params, incl_pad, ceil, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test( + *self.create_net(**params, op='AveragePool', count_include_pad=incl_pad, ceil=ceil, ir_version=ir_version, + opset=10), ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_autopad) + @pytest.mark.nightly + def test_avgpool_opset10_autopad(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='AveragePool', ir_version=ir_version, opset=10), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("st_order", [None, 1]) + @pytest.mark.nightly + def test_maxpool_opset8(self, params, st_order, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='MaxPool', storage_order=st_order, ir_version=ir_version, opset=8), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_autopad) + @pytest.mark.nightly + def test_maxpool_opset8_autopad(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='MaxPool', ir_version=ir_version, opset=8), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("st_order", [None, 1]) + @pytest.mark.parametrize("ceil", [True, False]) + @pytest.mark.nightly + def test_maxpool_opset10(self, params, st_order, ceil, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='MaxPool', storage_order=st_order, ceil=ceil, ir_version=ir_version, + opset=10), ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_autopad_precommit) + @pytest.mark.precommit + def test_maxpool_opset10_autopad(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='MaxPool', ir_version=ir_version, opset=10), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_autopad) + @pytest.mark.nightly + def test_maxpool_opset10_autopad(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_net(**params, op='MaxPool', ir_version=ir_version, opset=10), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", global_test_data) + @pytest.mark.nightly + def test_global_avgpool(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_global_net(**params, op='GlobalAveragePool', ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", global_test_data) + @pytest.mark.nightly + def test_global_maxpool(self, params, ie_device, precision, ir_version, temp_dir): + if not len(params['shape']) in [4, 5]: + pytest.skip("Pooling layer support only 4D and 5D input tensors") + self._test(*self.create_global_net(**params, op='GlobalMaxPool', ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_prelu.py b/tests/layer_tests/onnx_tests/test_prelu.py new file mode 100644 index 00000000000..303ff0270ab --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_prelu.py @@ -0,0 +1,161 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestPRelu(Caffe2OnnxLayerTest): + def create_net(self, shape, slope_shape, precision, ir_version, opset=None): + """ + ONNX net IR net + + Input->PRelu->Output => Input->PReLU + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + const = np.random.randn(*slope_shape).astype(np.float32) + + node_slope_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['slope'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'PRelu', + inputs=['input', 'slope'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_slope_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'weights_indata': {'kind': 'data', 'value': const.flatten()}, + 'weights': {'kind': 'op', 'type': 'Const'}, + 'weights_data': {'kind': 'data', 'shape': [len(const.flatten())]}, + 'node': {'kind': 'op', 'type': 'PReLU'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('weights_indata', 'weights'), + ('weights', 'weights_data'), + ('weights_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + # Note: IE only support slopes of one element or of size equal to number of channels. + test_data_shared_channels = [ + dict(shape=[10, 12], slope_shape=[12]), + dict(shape=[8, 10, 12], slope_shape=[10]), + dict(shape=[6, 8, 10, 12], slope_shape=[8]), + dict(shape=[4, 6, 8, 10, 12], slope_shape=[6])] + + test_data_scalar_precommit = [ + dict(shape=[2, 4, 6, 8], slope_shape=[1]), + dict(shape=[2, 4, 6, 8, 10], slope_shape=[1]) + ] + + test_data_scalar = [ + dict(shape=[10, 12], slope_shape=[1]), + dict(shape=[8, 10, 12], slope_shape=[1]), + dict(shape=[6, 8, 10, 12], slope_shape=[1]), + dict(shape=[4, 6, 8, 10, 12], slope_shape=[1])] + + test_data_precommit = [dict(shape=[8, 10, 12], slope_shape=[10])] + + @pytest.mark.parametrize("params", test_data_scalar) + @pytest.mark.nightly + def test_prelu_opset6_scalar(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_shared_channels) + @pytest.mark.nightly + def test_prelu_opset6_shared_channels(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_scalar) + @pytest.mark.nightly + def test_prelu_opset7_scalar(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=7, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_shared_channels) + @pytest.mark.nightly + def test_prelu_opset7_shared_channels(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=7, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_prelu_shared_channels_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_scalar_precommit) + @pytest.mark.precommit + def test_prelu_scalar_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_scalar) + @pytest.mark.nightly + def test_prelu_scalar(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_shared_channels) + @pytest.mark.nightly + def test_prelu_shared_channels(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_reciprocal.py b/tests/layer_tests/onnx_tests/test_reciprocal.py new file mode 100644 index 00000000000..4161ed28860 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_reciprocal.py @@ -0,0 +1,171 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestReciprocal(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input+258->Reciprocal->Output => Input->Power + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + # adding 258 is needed to avoid division by zero + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=[1], + vals=[258], + ), + ) + + node_add_def = helper.make_node( + 'Add', + inputs=['input', 'const'], + outputs=['add'] + ) + + node_def = helper.make_node( + 'Reciprocal', + inputs=['add'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_add_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat with reciprocal consts->Output => Input->Concat + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = list(shape) + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const = np.random.randint(1, 256, shape).astype(np.float) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = helper.make_node( + 'Reciprocal', + inputs=['const'], + outputs=['node_out'] + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'node_out'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + constant_calculated = 1 / const + if precision == 'FP16': + constant_calculated = constant_calculated.astype(np.float16) + + ref_net = None + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[2, 4]), + dict(shape=[2, 4, 6, 8])] + + test_data = [ + dict(shape=[4, 6]), + dict(shape=[4, 6, 8]), + dict(shape=[4, 6, 8, 10]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_reciprocal_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_reciprocal(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_reciprocal_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_reciprocal_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_reduce.py b/tests/layer_tests/onnx_tests/test_reduce.py new file mode 100644 index 00000000000..2df16ef3d85 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_reduce.py @@ -0,0 +1,179 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestReduce(OnnxRuntimeLayerTest): + def create_reduce(self, shape, reshapped_shape, op, axes, keep_dims, ir_version): + """ + ONNX net IR net + + Input->Reduce Operation (axes)->Output => Input->Reduce Operation + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + if op not in ['ReduceMin', 'ReduceMax', 'ReduceMean', 'ReduceProd', 'ReduceSum']: + raise ValueError("Operation has to be either Reduce(Min or Max or Mean or Sum or Prod") + + output_shape = shape.copy() + for axis in axes: + output_shape[axis] = 1 + + if not keep_dims: + output_shape = [dim for dim in output_shape if dim != 1] + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_def = onnx.helper.make_node( + op, + inputs=['input'], + outputs=['output'], + axes=axes, + keepdims=keep_dims + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_data_1': {'shape': [len(axes)], 'value': axes, 'kind': 'data'}, + 'const_1': {'kind': 'op', 'type': 'Const'}, + 'const_data_1': {'shape': [len(axes)], 'kind': 'data'}, + 'reduce': {'kind': 'op', 'type': op, 'keep_dims': keep_dims}, + 'reduce_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data_1', 'const_1'), + ('const_1', 'const_data_1'), + ('input_data', 'reduce'), + ('const_data_1', 'reduce'), + ('reduce', 'reduce_data'), + ('reduce_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[2, 4, 6], reshapped_shape=[2, 1, 4 * 6, 1], axes=[1, 2]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 1, 4 * 6 * 8, 1], axes=[1, 2, 3]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4, 6 * 8 * 10, 1], axes=[2, 3, 4]) + ] + + test_data = [ + dict(shape=[2, 4, 6], reshapped_shape=[1, 1, 2, 4 * 6], axes=[0]), + dict(shape=[2, 4, 6], reshapped_shape=[2, 1, 4, 6], axes=[1]), + dict(shape=[2, 4, 6], reshapped_shape=[2, 4, 6, 1], axes=[2]), + dict(shape=[2, 4, 6], reshapped_shape=[1, 1, 2 * 4, 6], axes=[0, 1]), + dict(shape=[2, 4, 6], reshapped_shape=[2, 1, 4 * 6, 1], axes=[1, 2]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[1, 1, 2, 4 * 6 * 8], axes=[0]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 1, 4, 6 * 8], axes=[1]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 4, 6, 8], axes=[2]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 4 * 6, 8, 1], axes=[3]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[1, 1, 2 * 4, 6 * 8], axes=[0, 1]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 1, 4 * 6, 8], axes=[1, 2]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 4, 6 * 8, 1], axes=[2, 3]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[1, 1, 2 * 4 * 6, 8], axes=[0, 1, 2]), + dict(shape=[2, 4, 6, 8], reshapped_shape=[2, 1, 4 * 6 * 8, 1], axes=[1, 2, 3]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[1, 1, 2, 4 * 6 * 8 * 10], axes=[0]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 1, 4, 6 * 8 * 10], axes=[1]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4, 6, 8 * 10], axes=[2]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4 * 6, 8, 10], axes=[3]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4 * 6 * 8, 10, 1], axes=[4]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[1, 1, 2 * 4, 6 * 8 * 10], axes=[0, 1]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 1, 4 * 6, 8 * 10], axes=[1, 2]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4, 6 * 8, 10], axes=[2, 3]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4 * 6, 8 * 10, 1], axes=[3, 4]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[1, 1, 2 * 4 * 6, 8 * 10], axes=[0, 1, 2]), + dict(shape=[2, 4, 6, 8, 10], reshapped_shape=[2, 4, 6 * 8 * 10, 1], axes=[2, 3, 4]) + ] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.precommit + def test_reduce_max_precommit(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceMax', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.nightly + def test_reduce_max(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceMax', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.nightly + def test_reduce_sum(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceSum', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.nightly + def test_reduce_prod(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceProd', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.precommit + def test_reduce_mean_precommit(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceMean', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_reduce_mean(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceMean', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.precommit + def test_reduce_min_precommit(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceMin', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.nightly + def test_reduce_min(self, params, keep_dims, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce(**params, op='ReduceMin', keep_dims=keep_dims, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_reduce_lp.py b/tests/layer_tests/onnx_tests/test_reduce_lp.py new file mode 100644 index 00000000000..4f8c4284833 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_reduce_lp.py @@ -0,0 +1,247 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestReduceL1L2(OnnxRuntimeLayerTest): + def create_reduce_lp(self, shape, axes, keep_dims, reduce_p, ir_version): + """ + ONNX net IR net + + Input->ReduceLX(axes)->Output => Input->ReduceLX + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + output_shape = shape.copy() + _axes = axes.copy() if axes is not None else list(range(len(shape))) + for axis in _axes: + output_shape[axis] = 1 + + if not keep_dims: + output_shape = [dim for dim in output_shape if dim != 1] + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + args = dict(keepdims=keep_dims) + if axes: + args['axes'] = axes + node_def = onnx.helper.make_node( + "ReduceL" + str(reduce_p), + inputs=['input'], + outputs=['output'], + **args + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_data_1': {'shape': [len(_axes)], 'value': _axes, 'kind': 'data'}, + 'const_1': {'kind': 'op', 'type': 'Const'}, + 'const_data_1': {'shape': [len(_axes)], 'kind': 'data'}, + 'reduce': {'kind': 'op', 'type': "ReduceL" + str(reduce_p), 'keep_dims': keep_dims}, + 'reduce_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data_1', 'const_1'), + ('const_1', 'const_data_1'), + ('input_data', 'reduce'), + ('const_data_1', 'reduce'), + ('reduce', 'reduce_data'), + ('reduce_data', 'result') + ]) + + return onnx_net, ref_net + + def create_reduce_lp_const(self, shape, axes, keep_dims, reduce_p, ir_version): + """ + ONNX net IR net + + Input->ReduceLX(axes)->Output => Input->ReduceLX + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + output_shape = shape.copy() + _axes = axes.copy() if axes is not None else list(range(len(shape))) + for axis in _axes: + output_shape[axis] = 1 + + if not keep_dims: + output_shape = [dim for dim in output_shape if dim != 1] + if len(output_shape) == 0: + output_shape = [1] + + concat_axis = 0 + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + constant = np.random.randn(*shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + args = dict(keepdims=keep_dims) + if axes: + args['axes'] = axes + node_def = onnx.helper.make_node( + "ReduceL" + str(reduce_p), + inputs=['const1'], + outputs=['reduce'], + **args + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'reduce'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + constant = np.power(np.sum(a=np.abs(np.power(constant, reduce_p)), axis=tuple(_axes), keepdims=keep_dims), 1 / reduce_p) + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': output_shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': constant.shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': concat_output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [ + dict(shape=[2, 4, 6, 8], axes=[-3, -1, -2]), + dict(shape=[2, 4, 6, 8, 10], axes=[-4, -2]), + ] + + test_data = [ + dict(shape=[8], axes=None), + dict(shape=[8], axes=[0]), + dict(shape=[2, 4, 6], axes=None), + dict(shape=[2, 4, 6], axes=[1]), + dict(shape=[2, 4, 6], axes=[-2]), + dict(shape=[2, 4, 6], axes=[-2, -1]), + dict(shape=[2, 4, 6, 8], axes=[0]), + dict(shape=[2, 4, 6, 8], axes=[-3, -1, -2]), + dict(shape=[2, 4, 6, 8, 10], axes=None), + dict(shape=[2, 4, 6, 8, 10], axes=[-2]), + dict(shape=[2, 4, 6, 8, 10], axes=[1, 3]), + dict(shape=[2, 4, 6, 8, 10], axes=[-4, -2]), + ] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.parametrize("reduce_p", [1, 2]) + @pytest.mark.precommit + def test_reduce_lp_precommit(self, params, keep_dims, reduce_p, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce_lp(**params, keep_dims=keep_dims, reduce_p=reduce_p, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.parametrize("reduce_p", [1, 2]) + @pytest.mark.nightly + def test_reduce_lp(self, params, keep_dims, reduce_p, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce_lp(**params, keep_dims=keep_dims, reduce_p=reduce_p, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.parametrize("reduce_p", [1, 2]) + @pytest.mark.precommit + def test_reduce_lp_const_precommit(self, params, keep_dims, reduce_p, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_reduce_lp_const(**params, keep_dims=keep_dims, reduce_p=reduce_p, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("keep_dims", [True, False]) + @pytest.mark.parametrize("reduce_p", [1, 2]) + @pytest.mark.nightly + def test_reduce_lp_const(self, params, keep_dims, reduce_p, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reduce_lp_const(**params, keep_dims=keep_dims, reduce_p=reduce_p,ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_relu.py b/tests/layer_tests/onnx_tests/test_relu.py new file mode 100644 index 00000000000..19cc96d1530 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_relu.py @@ -0,0 +1,183 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestRelu(Caffe2OnnxLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Relu->Output => Input->ReLU + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Relu', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'ReLU'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+Relu const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const_number = np.prod(shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Relu', + inputs=['const1'], + outputs=['relu1'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'relu1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.clip(constant, 0, np.inf) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12]) + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_relu(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_relu_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_reshape.py b/tests/layer_tests/onnx_tests/test_reshape.py new file mode 100644 index 00000000000..4483d125045 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_reshape.py @@ -0,0 +1,283 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestReshape(Caffe2OnnxLayerTest): + def create_reshape_net(self, input_shape, output_shape, ir_version): + """ + ONNX net IR net + + Input->Reshape->Output => Input->Reshape + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_shape_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['shape'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(output_shape)], + vals=output_shape, + ), + ) + + node_reshape_def = onnx.helper.make_node( + 'Reshape', + inputs=['input', 'shape'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_shape_def, node_reshape_def], + 'test_reshape_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_reshape_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + 'input_data_1': {'shape': [len(output_shape)], 'value': output_shape, 'kind': 'data'}, + 'const_1': {'kind': 'op', 'type': 'Const'}, + 'const_data_1': {'shape': [len(output_shape)], 'value': None, 'kind': 'data'}, # 'value': output_shape, + 'reshape': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data_1', 'const_1'), + ('const_1', 'const_data_1'), + ('const_data_1', 'reshape'), + ('input_data', 'reshape'), + ('reshape', 'reshape_data'), + ('reshape_data', 'result') + ]) + + return onnx_net, ref_net + + def create_reshape_net_const(self, input_shape, output_shape, ir_version): + """ + ONNX net IR net + + Input->Concat(+reshaped const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + const_number = np.prod(input_shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=input_shape, + vals=constant, + ), + ) + + node_shape_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['shape'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(output_shape)], + vals=output_shape, + ), + ) + + node_reshape_def = onnx.helper.make_node( + 'Reshape', + inputs=['const1', 'shape'], + outputs=['reshape1'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'reshape1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_shape_def, node_reshape_def, node_concat_def], + 'test_reshape_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_reshape_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': output_shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': output_shape, 'value': None, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': concat_output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result'), + ]) + + return onnx_net, ref_net + + test_data_5D = [dict(input_shape=[4, 6, 8, 10, 12], output_shape=[4, 6, 8, 120]), + dict(input_shape=[4, 6, 8, 10, 12], output_shape=[4, 6, 80, 12]), + dict(input_shape=[4, 6, 8, 10, 12], output_shape=[4, 48, 10, 12]), + dict(input_shape=[4, 6, 8, 10, 12], output_shape=[24, 8, 10, 12]), + dict(input_shape=[4, 6, 8, 10], output_shape=[2, 2, 6, 8, 10]), + dict(input_shape=[4, 6, 8, 10], output_shape=[4, 2, 3, 8, 10]), + dict(input_shape=[4, 6, 8, 10], output_shape=[4, 6, 2, 4, 10]), + dict(input_shape=[4, 6, 8, 10], output_shape=[4, 6, 8, 2, 5])] + + test_data_5D_precommit = [dict(input_shape=[2, 4, 6, 8, 10], output_shape=[8, 6, 8, 10])] + + test_data_4D = [dict(input_shape=[4, 6, 8, 10], output_shape=[24, 8, 10]), + dict(input_shape=[4, 6, 8, 10], output_shape=[4, 48, 10]), + dict(input_shape=[4, 6, 8, 10], output_shape=[4, 6, 80]), + dict(input_shape=[4, 6, 8, 10], output_shape=[192, 10]), + dict(input_shape=[4, 6, 8, 10], output_shape=[4, 480]), + dict(input_shape=[4, 6, 8], output_shape=[2, 2, 6, 8]), + dict(input_shape=[4, 6, 8], output_shape=[4, 2, 3, 8]), + dict(input_shape=[4, 6, 8], output_shape=[4, 6, 2, 4]), + dict(input_shape=[4, 6], output_shape=[2, 2, 2, 3])] + + test_data_4D_precommit = [dict(input_shape=[2, 4, 6, 8], output_shape=[48, 8])] + + test_data_3D = [dict(input_shape=[4, 6, 8], output_shape=[24, 8]), + dict(input_shape=[4, 6, 8], output_shape=[4, 48]), + dict(input_shape=[4, 6], output_shape=[2, 2, 6]), + dict(input_shape=[4, 6], output_shape=[4, 2, 3]), + dict(input_shape=[4, 6], output_shape=[2, 4, 3])] + + test_data_3D_precommit = [dict(input_shape=[2, 4, 6], output_shape=[8, 6])] + + @pytest.mark.parametrize("params", test_data_5D_precommit) + @pytest.mark.precommit + def test_reshape_5D_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D_precommit) + @pytest.mark.precommit + def test_reshape_4D_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D_precommit) + @pytest.mark.precommit + def test_reshape_3D_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_reshape_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_reshape_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_reshape_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_reshape_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_reshape_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_reshape_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_reshape_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_resize.py b/tests/layer_tests/onnx_tests/test_resize.py new file mode 100644 index 00000000000..3106ed52a77 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_resize.py @@ -0,0 +1,676 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from mo.front.common.partial_infer.utils import int64_array +from mo.middle.passes.convert_data_type import data_type_str_to_np, np_data_type_to_destination_type +from unit_tests.utils.graph import build_graph + + +class TestResize(OnnxRuntimeLayerTest): + def create_resize_net(self, input_shape, output_shape, scales, sizes, + coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, precision, ir_version): + import onnx + from onnx import helper + from onnx import TensorProto + + input_rank = len(input_shape) + + roi_node = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['roi'], + value=helper.make_tensor( + name='roi_consts', + data_type=TensorProto.FLOAT, + dims=[2 * input_rank], + vals=np.array([*np.zeros(input_rank), *np.ones(input_rank)]) + ) + ) + + onnx_scales = scales + if scales is None: + onnx_scales = np.array(output_shape).astype(np.float) / np.array(input_shape).astype(np.float) + scales_node = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['scales'], + value=helper.make_tensor( + name='scales_const', + data_type=TensorProto.FLOAT, + dims=[len(output_shape)], + vals=onnx_scales + ) + ) + + nodes_list = [roi_node, scales_node] + inputs_list = ['input', 'roi', 'scales'] + + if sizes is not None: + sizes_node = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['sizes'], + value=helper.make_tensor( + name='sizes_const', + data_type=TensorProto.INT64, + dims=[len(output_shape)], + vals=sizes + ) + ) + + nodes_list.append(sizes_node) + inputs_list.append('sizes') + + args = dict() + + onnx_mode = mode or 'nearest' + onnx_nearest_mode = nearest_mode or 'round_prefer_floor' + cube_coeff = -0.75 if cubic_coeff_a is None else cubic_coeff_a + onnx_coordinate_transformation_mode = coordinate_transformation_mode or 'half_pixel' + + args['nearest_mode'] = onnx_nearest_mode + args['mode'] = onnx_mode + args['cubic_coeff_a'] = cube_coeff + args['coordinate_transformation_mode'] = onnx_coordinate_transformation_mode + + x = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + y = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + resize_node = onnx.helper.make_node( + 'Resize', + inputs=inputs_list, + outputs=['output'], + **args, + ) + + nodes_list.append(resize_node) + + graph_def = onnx.helper.make_graph(nodes_list, 'test_model', [x], [y]) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + onnx.checker.check_model(onnx_net) + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + if sizes is None and scales is None: + return onnx_net, ref_net + + input_shape_as_array = int64_array(input_shape) + + if sizes is not None and scales is not None: + shape_calculation_mode = 'sizes' + sizes_value = int64_array(sizes) + scales_value = np.array(scales).astype(np.float) + elif sizes is not None and scales is None: + shape_calculation_mode = 'sizes' + sizes_value = int64_array(sizes) + scales_value = sizes_value / input_shape_as_array + else: + shape_calculation_mode = 'scales' + scales_value = np.array(scales).astype(np.float) + sizes_value = np.floor(input_shape_as_array * scales_value + 1e-5).astype(np.int64) + + if precision == 'FP16': + sizes_value = sizes_value.astype(np.float16) + scales_value = scales_value.astype(np.float16) + + interp_mode = convert_onnx_mode(onnx_mode) + + interp_attrs = { + 'type': 'Interpolate', + 'kind': 'op', + 'mode': interp_mode, + 'shape_calculation_mode': shape_calculation_mode, + 'coordinate_transformation_mode': onnx_coordinate_transformation_mode, + 'nearest_mode': onnx_nearest_mode, + 'antialias': 0, + 'cube_coeff': cube_coeff, + 'pads_begin': np.zeros(input_rank).astype(np.int64), + 'pads_end': np.zeros(input_rank).astype(np.int64), + 'version': 'opset4' + } + + if shape_calculation_mode == 'scales': + ref_net = create_ref_net_in_scales_mode(precision, input_shape_as_array, output_shape, + sizes_value, scales_value, interp_attrs) + else: + ref_net = create_ref_net_in_sizes_mode(precision, input_shape_as_array, output_shape, + sizes_value, scales_value, interp_attrs) + + return onnx_net, ref_net + + test_data = [ + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 3, 3], + scales=[1.0, 1.0, 0.8, 0.8], sizes=None, + coordinate_transformation_mode='half_pixel', + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 3, 3], + scales=[1.0, 1.0, 0.8, 0.8], sizes=None, + coordinate_transformation_mode='align_corners', + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 2, 4], output_shape=[1, 1, 1, 2], + scales=[1.0, 1.0, 0.6, 0.6], sizes=None, + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='linear', nearest_mode=None), + dict(input_shape=[1, 1, 2, 4], output_shape=[1, 1, 1, 2], + scales=[1.0, 1.0, 0.6, 0.6], sizes=None, + coordinate_transformation_mode='align_corners', + cubic_coeff_a=None, mode='linear', nearest_mode=None), + dict(input_shape=[1, 1, 2, 4], output_shape=[1, 1, 1, 2], + scales=[1.0, 1.0, 0.6, 0.6], sizes=None, + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='nearest', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 8, 8], + scales=[1.0, 1.0, 2.0, 2.0], sizes=None, + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 8, 8], + scales=[1.0, 1.0, 2.0, 2.0], sizes=None, + coordinate_transformation_mode='align_corners', + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 8, 8], + scales=[1.0, 1.0, 2.0, 2.0], sizes=None, + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 2, 2], output_shape=[1, 1, 4, 4], + scales=[1.0, 1.0, 2.0, 2.0], sizes=None, + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='linear', nearest_mode=None), + dict(input_shape=[1, 1, 2, 2], output_shape=[1, 1, 4, 4], + scales=[1.0, 1.0, 2.0, 2.0], sizes=None, + coordinate_transformation_mode='align_corners', + cubic_coeff_a=None, mode='linear', nearest_mode=None), + dict(input_shape=[1, 1, 2, 2], output_shape=[1, 1, 4, 4], + scales=[1.0, 1.0, 2.0, 2.0], sizes=None, + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='nearest', nearest_mode=None) + ] + + @pytest.mark.parametrize("params", test_data) + def test_resize(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, custom_eps=2.0e-4, temp_dir=temp_dir) + + test_data_cubic = [ + dict(input_shape=[1, 3, 100, 200], output_shape=[1, 3, 350, 150], + scales=[1.0, 1.0, 3.5, 150 / 200], sizes=None), + dict(input_shape=[16, 7, 190, 400], output_shape=[16, 7, 390, 600], + scales=[1.0, 1.0, 390 / 190, 600 / 400], sizes=None), + dict(input_shape=[4, 33, 1024, 800], output_shape=[4, 33, 512, 800], + scales=[1.0, 1.0, 0.5, 1.0], sizes=None), + dict(input_shape=[4, 33, 3, 800], output_shape=[4, 33, 1, 800], + scales=[1.0, 1.0, 0.3333334, 1.0], sizes=None), + dict(input_shape=[100, 200], output_shape=[350, 150], + scales=[3.5, 150 / 200], sizes=None), + dict(input_shape=[190, 400], output_shape=[390, 600], + scales=[390 / 190, 600 / 400], sizes=None), + dict(input_shape=[1024, 800], output_shape=[512, 800], + scales=[0.5, 1.0], sizes=None), + dict(input_shape=[3, 800], output_shape=[1, 800], + scales=[0.3333334, 1.0], sizes=None) + ] + + @pytest.mark.parametrize("params", test_data_cubic) + @pytest.mark.parametrize("coordinate_transformation_mode", + ['half_pixel', 'pytorch_half_pixel', 'align_corners', + 'asymmetric', 'tf_half_pixel_for_nn']) + @pytest.mark.parametrize("cubic_coeff_a", [-0.75]) + @pytest.mark.parametrize("mode", ['cubic']) + @pytest.mark.parametrize("nearest_mode", ['round_prefer_floor']) + def test_resize_combined_cubic(self, params, coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, + coordinate_transformation_mode=coordinate_transformation_mode, + cubic_coeff_a=cubic_coeff_a, mode=mode, nearest_mode=nearest_mode, + precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, custom_eps=2.6e-2, temp_dir=temp_dir) + + test_data_nearest = [ + dict(input_shape=[1, 3, 100, 200], output_shape=[1, 3, 350, 150], + scales=[1.0, 1.0, 3.5, 150 / 200], sizes=None), + dict(input_shape=[16, 7, 190, 400], output_shape=[16, 7, 390, 600], + scales=[1.0, 1.0, 390 / 190, 600 / 400], sizes=None), + dict(input_shape=[4, 33, 600, 800], output_shape=[4, 33, 300, 800], + scales=[1.0, 1.0, 0.5, 1.0], sizes=None), + dict(input_shape=[4, 33, 3, 800], output_shape=[4, 33, 1, 800], + scales=[1.0, 1.0, 0.3333334, 1.0], sizes=None), + ] + + @pytest.mark.parametrize("params", test_data_nearest) + @pytest.mark.parametrize("coordinate_transformation_mode", + ['half_pixel', 'pytorch_half_pixel', 'align_corners', + 'asymmetric', 'tf_half_pixel_for_nn']) + @pytest.mark.parametrize("cubic_coeff_a", [-0.75]) + @pytest.mark.parametrize("mode", ['nearest']) + @pytest.mark.parametrize("nearest_mode", ['round_prefer_floor', 'round_prefer_ceil', + 'floor', 'ceil']) + def test_resize_combined_nearest(self, params, coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, + coordinate_transformation_mode=coordinate_transformation_mode, + cubic_coeff_a=cubic_coeff_a, mode=mode, nearest_mode=nearest_mode, + precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_linear = [ + dict(input_shape=[1, 3, 100, 200], output_shape=[1, 3, 350, 150], + scales=[1.0, 1.0, 3.5, 150 / 200], sizes=None), + dict(input_shape=[16, 7, 190, 400], output_shape=[16, 7, 390, 600], + scales=[1.0, 1.0, 390 / 190, 600 / 400], sizes=None), + dict(input_shape=[4, 33, 600, 800], output_shape=[4, 33, 300, 800], + scales=[1.0, 1.0, 0.5, 1.0], sizes=None), + dict(input_shape=[4, 33, 3, 800], output_shape=[4, 33, 1, 800], + scales=[1.0, 1.0, 0.3333334, 1.0], sizes=None), + dict(input_shape=[100, 200], output_shape=[350, 150], + scales=[3.5, 150 / 200], sizes=None), + dict(input_shape=[190, 400], output_shape=[390, 600], + scales=[390 / 190, 600 / 400], sizes=None), + dict(input_shape=[600, 800], output_shape=[300, 800], + scales=[0.5, 1.0], sizes=None), + dict(input_shape=[3, 800], output_shape=[1, 800], + scales=[0.3333334, 1.0], sizes=None), + ] + + @pytest.mark.parametrize("params", test_data_linear) + @pytest.mark.parametrize("coordinate_transformation_mode", + ['half_pixel', 'pytorch_half_pixel', 'align_corners', + 'asymmetric', 'tf_half_pixel_for_nn']) + @pytest.mark.parametrize("cubic_coeff_a", [-0.75]) + @pytest.mark.parametrize("mode", ['linear']) + @pytest.mark.parametrize("nearest_mode", ['round_prefer_floor']) + def test_resize_combined_linear(self, params, coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, + coordinate_transformation_mode=coordinate_transformation_mode, + cubic_coeff_a=cubic_coeff_a, mode=mode, nearest_mode=nearest_mode, + precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, custom_eps=2.0e-2, temp_dir=temp_dir) + + test_data_sizes = [ + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 3, 3], + scales=None, sizes=[1, 1, 3, 3], + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 3, 1], + scales=None, sizes=[1, 1, 3, 1], + coordinate_transformation_mode='pytorch_half_pixel', + cubic_coeff_a=None, mode='linear', nearest_mode=None), + dict(input_shape=[1, 1, 2, 4], output_shape=[1, 1, 1, 3], + scales=None, sizes=[1, 1, 1, 3], + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='nearest', nearest_mode=None), + dict(input_shape=[1, 1, 2, 4], output_shape=[1, 1, 1, 2], + scales=None, sizes=[1, 1, 1, 2], + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='nearest', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 3, 2], + scales=None, sizes=[1, 1, 3, 2], + coordinate_transformation_mode='tf_half_pixel_for_nn', + cubic_coeff_a=None, mode='nearest', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 9, 10], + scales=None, sizes=[1, 1, 9, 10], + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='cubic', nearest_mode=None), + dict(input_shape=[1, 1, 2, 2], output_shape=[1, 1, 7, 8], + scales=None, sizes=[1, 1, 7, 8], + coordinate_transformation_mode=None, + cubic_coeff_a=None, mode='nearest', nearest_mode=None), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 8, 8], + scales=None, sizes=[1, 1, 8, 8], + coordinate_transformation_mode='half_pixel', + cubic_coeff_a=None, mode='nearest', nearest_mode='ceil'), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 8, 8], + scales=None, sizes=[1, 1, 8, 8], + coordinate_transformation_mode='align_corners', + cubic_coeff_a=None, mode='nearest', nearest_mode='floor'), + dict(input_shape=[1, 1, 4, 4], output_shape=[1, 1, 8, 8], + scales=None, sizes=[1, 1, 8, 8], + coordinate_transformation_mode='asymmetric', + cubic_coeff_a=None, mode='nearest', nearest_mode='round_prefer_ceil'), + ] + + @pytest.mark.parametrize("params", test_data_sizes) + def test_resize_sizes(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_sizes_cubic = [ + dict(input_shape=[1, 3, 100, 200], output_shape=[1, 3, 350, 150], + scales=None, sizes=[1, 3, 350, 150]), + dict(input_shape=[16, 7, 190, 400], output_shape=[16, 7, 390, 600], + scales=None, sizes=[16, 7, 390, 600]), + dict(input_shape=[4, 15, 700, 800], output_shape=[4, 15, 350, 800], + scales=None, sizes=[4, 15, 350, 800]), + dict(input_shape=[4, 15, 3, 200], output_shape=[4, 15, 1, 200], + scales=None, sizes=[4, 15, 1, 200]), + dict(input_shape=[100, 200], output_shape=[350, 150], + scales=None, sizes=[350, 150]), + dict(input_shape=[190, 400], output_shape=[390, 600], + scales=None, sizes=[390, 600]), + dict(input_shape=[700, 800], output_shape=[350, 800], + scales=None, sizes=[350, 800]), + dict(input_shape=[3, 200], output_shape=[1, 200], + scales=None, sizes=[1, 200]), + ] + + @pytest.mark.parametrize("params", test_data_sizes_cubic) + @pytest.mark.parametrize("coordinate_transformation_mode", + ['half_pixel', 'pytorch_half_pixel', 'align_corners', + 'asymmetric', 'tf_half_pixel_for_nn']) + @pytest.mark.parametrize("cubic_coeff_a", [-0.75]) + @pytest.mark.parametrize("mode", ['cubic']) + @pytest.mark.parametrize("nearest_mode", ['round_prefer_floor']) + def test_resize_combined_sizes_cubic(self, params, coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, + coordinate_transformation_mode=coordinate_transformation_mode, + cubic_coeff_a=cubic_coeff_a, mode=mode, nearest_mode=nearest_mode, + precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, custom_eps=2.6e-2, temp_dir=temp_dir) + + test_data_sizes_nearest = [ + dict(input_shape=[1, 3, 100, 200], output_shape=[1, 3, 350, 150], + scales=None, sizes=[1, 3, 350, 150]), + dict(input_shape=[16, 7, 190, 400], output_shape=[16, 7, 390, 600], + scales=None, sizes=[16, 7, 390, 600]), + dict(input_shape=[4, 33, 600, 800], output_shape=[4, 33, 300, 800], + scales=None, sizes=[4, 33, 300, 800]), + dict(input_shape=[4, 33, 3, 800], output_shape=[4, 33, 1, 800], + scales=None, sizes=[4, 33, 1, 800]), + dict(input_shape=[3, 100, 200], output_shape=[3, 350, 150], + scales=None, sizes=[3, 350, 150]), + dict(input_shape=[7, 190, 400], output_shape=[7, 390, 600], + scales=None, sizes=[7, 390, 600]), + dict(input_shape=[33, 600, 800], output_shape=[33, 300, 800], + scales=None, sizes=[33, 300, 800]), + dict(input_shape=[33, 3, 800], output_shape=[33, 1, 800], + scales=None, sizes=[33, 1, 800]), + dict(input_shape=[100, 200], output_shape=[350, 150], + scales=None, sizes=[350, 150]), + dict(input_shape=[190, 400], output_shape=[390, 600], + scales=None, sizes=[390, 600]), + dict(input_shape=[600, 800], output_shape=[300, 800], + scales=None, sizes=[300, 800]), + dict(input_shape=[3, 800], output_shape=[1, 800], + scales=None, sizes=[1, 800]), + dict(input_shape=[100], output_shape=[350], + scales=None, sizes=[350]), + dict(input_shape=[190], output_shape=[390], + scales=None, sizes=[390]), + dict(input_shape=[600], output_shape=[300], + scales=None, sizes=[300]), + dict(input_shape=[3], output_shape=[1], + scales=None, sizes=[1]), + ] + + @pytest.mark.parametrize("params", test_data_sizes_nearest) + @pytest.mark.parametrize("coordinate_transformation_mode", + ['half_pixel', 'pytorch_half_pixel', 'align_corners', + 'asymmetric', 'tf_half_pixel_for_nn']) + @pytest.mark.parametrize("cubic_coeff_a", [-0.75]) + @pytest.mark.parametrize("mode", ['nearest']) + @pytest.mark.parametrize("nearest_mode", ['round_prefer_floor', 'round_prefer_ceil', + 'floor', 'ceil']) + def test_resize_combined_sizes_nearest(self, params, coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, + coordinate_transformation_mode=coordinate_transformation_mode, + cubic_coeff_a=cubic_coeff_a, mode=mode, nearest_mode=nearest_mode, + precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_sizes_linear = [ + dict(input_shape=[1, 3, 100, 200], output_shape=[1, 3, 350, 150], + scales=None, sizes=[1, 3, 350, 150]), + dict(input_shape=[16, 7, 190, 400], output_shape=[16, 7, 390, 600], + scales=None, sizes=[16, 7, 390, 600]), + dict(input_shape=[4, 33, 600, 800], output_shape=[4, 33, 300, 800], + scales=None, sizes=[4, 33, 300, 800]), + dict(input_shape=[4, 33, 3, 800], output_shape=[4, 33, 1, 800], + scales=None, sizes=[4, 33, 1, 800]), + dict(input_shape=[100, 200], output_shape=[350, 150], + scales=None, sizes=[350, 150]), + dict(input_shape=[190, 400], output_shape=[390, 600], + scales=None, sizes=[390, 600]), + dict(input_shape=[600, 800], output_shape=[300, 800], + scales=None, sizes=[300, 800]), + dict(input_shape=[3, 800], output_shape=[1, 800], + scales=None, sizes=[1, 800]), + ] + + @pytest.mark.parametrize("params", test_data_sizes_linear) + @pytest.mark.parametrize("coordinate_transformation_mode", + ['half_pixel', 'pytorch_half_pixel', 'align_corners', + 'asymmetric', 'tf_half_pixel_for_nn']) + @pytest.mark.parametrize("cubic_coeff_a", [-0.75]) + @pytest.mark.parametrize("mode", ['linear']) + @pytest.mark.parametrize("nearest_mode", ['round_prefer_floor']) + def test_resize_combined_sizes_linear(self, params, coordinate_transformation_mode, cubic_coeff_a, mode, + nearest_mode, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resize_net(**params, + coordinate_transformation_mode=coordinate_transformation_mode, + cubic_coeff_a=cubic_coeff_a, mode=mode, nearest_mode=nearest_mode, + precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, custom_eps=2.0e-2, temp_dir=temp_dir) + + +def create_ref_net_in_sizes_mode(precision, input_shape, output_shape, sizes_value, scales_value, attrs): + input_data_type = np_data_type_to_destination_type(data_type_str_to_np(precision)) + input_rank = len(input_shape) + epsilon = np.array([1.0e-5]) + spatial_dims = spatial_dimensions(input_shape) + begin_dim = spatial_dims[0] + end_dim = input_rank + + spatial_sizes_value = sizes_value[spatial_dims] + + nodes_attrs = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + 'shape_of': {'kind': 'op', 'type': 'ShapeOf'}, + 'shape_of_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'shape_to_float': {'kind': 'op', 'type': 'Convert', 'destination_type': input_data_type}, + 'shape_to_float_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'div': {'kind': 'op', 'type': 'Divide'}, + 'div_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'div_sizes_const_data': {'kind': 'data', 'value': sizes_value}, + 'div_sizes_const': {'kind': 'op', 'type': 'Const'}, + 'div_sizes_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'eps_const_data': {'kind': 'data', 'value': epsilon}, + 'eps_const': {'kind': 'op', 'type': 'Const'}, + 'eps_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'add': {'kind': 'op', 'type': 'Add'}, + 'add_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'ss_scales': { + 'kind': 'op', 'type': 'StridedSlice', 'begin_mask': 0, + 'end_mask': 0, 'new_axis_mask': 0, + 'shrink_axis_mask': 0, 'ellipsis_mask': 0 + }, + 'ss_scales_data': {'shape': int64_array([len(spatial_sizes_value)]), 'kind': 'data'}, + 'ss_scales_begin_const_data': {'kind': 'data', 'value': int64_array([begin_dim])}, + 'ss_scales_begin_const': {'kind': 'op', 'type': 'Const'}, + 'ss_scales_begin_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'ss_scales_end_const_data': {'kind': 'data', 'value': int64_array([end_dim])}, + 'ss_scales_end_const': {'kind': 'op', 'type': 'Const'}, + 'ss_scales_end_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'ss_scales_stride_const_data': {'kind': 'data', 'value': int64_array([1])}, + 'ss_scales_stride_const': {'kind': 'op', 'type': 'Const'}, + 'ss_scales_stride_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'sizes_const_data': {'kind': 'data', 'value': spatial_sizes_value}, + 'sizes_const': {'kind': 'op', 'type': 'Const'}, + 'sizes_data': {'shape': int64_array([len(spatial_sizes_value)]), 'kind': 'data'}, + 'axes_const_data': {'kind': 'data', 'value': spatial_dims}, + 'axes_const': {'kind': 'op', 'type': 'Const'}, + 'axes_data': {'shape': int64_array([len(spatial_dims)]), 'kind': 'data'}, + 'interpolate': attrs, + 'interpolate_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + edges = [ + ('input', 'input_data'), + ('input_data', 'interpolate', {'in': 0, 'out': 0}), + ('input_data', 'shape_of', {'in': 0, 'out': 0}), + ('shape_of', 'shape_of_data'), + ('shape_of_data', 'shape_to_float'), + ('shape_to_float', 'shape_to_float_data'), + ('shape_to_float_data', 'div', {'in': 1}), + ('div_sizes_const_data', 'div_sizes_const'), + ('div_sizes_const', 'div_sizes_data'), + ('div_sizes_data', 'div', {'in': 0}), + ('div', 'div_data'), + ('eps_const_data', 'eps_const'), + ('eps_const', 'eps_data'), + ('div_data', 'add', {'in': 0}), + ('eps_data', 'add', {'in': 1}), + ('add', 'add_data'), + ('add_data', 'ss_scales', {'in': 0}), + ('ss_scales', 'ss_scales_data'), + ('ss_scales_begin_const_data', 'ss_scales_begin_const'), + ('ss_scales_begin_const', 'ss_scales_begin_data'), + ('ss_scales_begin_data', 'ss_scales', {'in': 1}), + ('ss_scales_end_const_data', 'ss_scales_end_const'), + ('ss_scales_end_const', 'ss_scales_end_data'), + ('ss_scales_end_data', 'ss_scales', {'in': 2}), + ('ss_scales_stride_const_data', 'ss_scales_stride_const'), + ('ss_scales_stride_const', 'ss_scales_stride_data'), + ('ss_scales_stride_data', 'ss_scales', {'in': 3}), + ('ss_scales_data', 'interpolate', {'in': 2}), + ('sizes_const_data', 'sizes_const'), + ('sizes_const', 'sizes_data'), + ('sizes_data', 'interpolate', {'in': 1}), + ('axes_const_data', 'axes_const'), + ('axes_const', 'axes_data'), + ('axes_data', 'interpolate', {'in': 3}), + ('interpolate', 'interpolate_data'), + ('interpolate_data', 'result') + ] + + return build_graph(nodes_attrs, edges) + + +def create_ref_net_in_scales_mode(precision, input_shape, output_shape, sizes_value, scales_value, attrs): + input_data_type = np_data_type_to_destination_type(data_type_str_to_np(precision)) + input_rank = len(input_shape) + epsilon = np.array([1.0e-5]) + spatial_dims = spatial_dimensions(input_shape) + begin_dim = spatial_dims[0] + end_dim = input_rank + + spatial_scales_value = scales_value[spatial_dims] + + nodes_attrs = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + 'shape_of': {'kind': 'op', 'type': 'ShapeOf'}, + 'shape_of_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'shape_to_float': {'kind': 'op', 'type': 'Convert', 'destination_type': input_data_type}, + 'shape_to_float_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'mul': {'kind': 'op', 'type': 'Multiply'}, + 'mul_scales_const_data': {'kind': 'data', 'value': scales_value}, + 'mul_scales_const': {'kind': 'op', 'type': 'Const'}, + 'mul_scales_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'mul_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'eps_const_data': {'kind': 'data', 'value': epsilon}, + 'eps_const': {'kind': 'op', 'type': 'Const'}, + 'eps_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'add': {'kind': 'op', 'type': 'Add'}, + 'add_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'floor': {'type': 'Floor', 'kind': 'op'}, + 'floor_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'to_int': {'kind': 'op', 'type': 'Convert', 'destination_type': 'i64'}, + 'to_int_data': {'shape': int64_array([input_rank]), 'kind': 'data'}, + 'strided_slice': { + 'kind': 'op', 'type': 'StridedSlice', 'begin_mask': 0, + 'end_mask': 0, 'new_axis_mask': 0, + 'shrink_axis_mask': 0, 'ellipsis_mask': 0 + }, + 'strided_slice_data': {'shape': int64_array([len(spatial_scales_value)]), 'kind': 'data'}, + 'begin_const_data': {'kind': 'data', 'value': int64_array([begin_dim])}, + 'begin_const': {'kind': 'op', 'type': 'Const'}, + 'begin_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'end_const_data': {'kind': 'data', 'value': int64_array([end_dim])}, + 'end_const': {'kind': 'op', 'type': 'Const'}, + 'end_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'stride_const_data': {'kind': 'data', 'value': int64_array([1])}, + 'stride_const': {'kind': 'op', 'type': 'Const'}, + 'stride_data': {'shape': int64_array([1]), 'kind': 'data'}, + 'scales_const_data': {'kind': 'data', 'value': spatial_scales_value}, + 'scales_const': {'kind': 'op', 'type': 'Const'}, + 'scales_data': {'shape': int64_array([len(spatial_scales_value)]), 'kind': 'data'}, + 'axes_const_data': {'kind': 'data', 'value': spatial_dims}, + 'axes_const': {'kind': 'op', 'type': 'Const'}, + 'axes_data': {'shape': int64_array([len(spatial_dims)]), 'kind': 'data'}, + 'interpolate': attrs, + 'interpolate_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + edges = [ + ('input', 'input_data'), + ('input_data', 'interpolate', {'in': 0, 'out': 0}), + ('input_data', 'shape_of', {'in': 0, 'out': 0}), + ('shape_of', 'shape_of_data'), + ('shape_of_data', 'shape_to_float'), + ('shape_to_float', 'shape_to_float_data'), + ('shape_to_float_data', 'mul', {'in': 0}), + ('mul_scales_const_data', 'mul_scales_const'), + ('mul_scales_const', 'mul_scales_data'), + ('mul_scales_data', 'mul', {'in': 1}), + ('mul', 'mul_data'), + ('eps_const_data', 'eps_const'), + ('eps_const', 'eps_data'), + ('mul_data', 'add', {'in': 0}), + ('eps_data', 'add', {'in': 1}), + ('add', 'add_data'), + ('add_data', 'floor'), + ('floor', 'floor_data'), + ('floor_data', 'to_int'), + ('to_int', 'to_int_data'), + ('to_int_data', 'strided_slice', {'in': 0}), + ('strided_slice', 'strided_slice_data'), + ('begin_const_data', 'begin_const'), + ('begin_const', 'begin_data'), + ('begin_data', 'strided_slice', {'in': 1}), + ('end_const_data', 'end_const'), + ('end_const', 'end_data'), + ('end_data', 'strided_slice', {'in': 2}), + ('stride_const_data', 'stride_const'), + ('stride_const', 'stride_data'), + ('stride_data', 'strided_slice', {'in': 3}), + ('strided_slice_data', 'interpolate', {'in': 1}), + ('scales_const_data', 'scales_const'), + ('scales_const', 'scales_data'), + ('scales_data', 'interpolate', {'in': 2}), + ('axes_const_data', 'axes_const'), + ('axes_const', 'axes_data'), + ('axes_data', 'interpolate', {'in': 3}), + ('interpolate', 'interpolate_data'), + ('interpolate_data', 'result') + ] + + return build_graph(nodes_attrs, edges) + + +def spatial_dimensions(shape): + rank = len(shape) + if rank >= 4: + return np.arange(2, rank) + elif rank in [1, 2]: + return np.arange(0, rank) + else: + return np.arange(1, rank) + + +def convert_onnx_mode(mode: str) -> str: + return {'nearest': 'nearest', 'linear': 'linear_onnx', 'cubic': 'cubic'}[mode] diff --git a/tests/layer_tests/onnx_tests/test_roi_align.py b/tests/layer_tests/onnx_tests/test_roi_align.py new file mode 100644 index 00000000000..b7b89dd05b9 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_roi_align.py @@ -0,0 +1,109 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestROIAlign(OnnxRuntimeLayerTest): + def create_net(self, input_shape, rois_shape, indices_shape, output_shape, + pooled_h, pooled_w, mode, sampling_ratio, spatial_scale, ir_version): + """ + ONNX net IR net + + Input->ROIAlign->Output => Parameter->ROIAlign->Result + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + rois = helper.make_tensor_value_info('rois', TensorProto.FLOAT, rois_shape) + indices = helper.make_tensor_value_info('indices', TensorProto.FLOAT, indices_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_def = onnx.helper.make_node( + 'ROIAlign', + inputs=['input', 'rois', 'indices'], + outputs=['output'], + **{'output_height': pooled_h, 'output_width': pooled_w, 'mode': mode, + 'sampling_ratio': sampling_ratio, 'spatial_scale': spatial_scale}, + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input, rois, indices], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + # comparison in these tests starts from input node, as we have 3 of them IREngine gets confused + # and takes the first input node in inputs list sorted by lexicographical order + '1_input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + + '2_rois': {'kind': 'op', 'type': 'Parameter'}, + 'rois_data': {'shape': rois_shape, 'kind': 'data'}, + + '3_indices': {'kind': 'op', 'type': 'Parameter'}, + 'indices_data': {'shape': indices_shape, 'kind': 'data'}, + + 'node': {'kind': 'op', 'type': 'ROIAlign', 'pooled_h': pooled_h, 'pooled_w': pooled_w, + 'mode': mode, 'sampling_ratio': sampling_ratio, 'spatial_scale': spatial_scale}, + 'node_data': {'shape': output_shape, 'kind': 'data'}, + + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [ + ('1_input', 'input_data'), + ('input_data', 'node', {'in': 0}), + ('2_rois', 'rois_data'), + ('rois_data', 'node', {'in': 1}), + ('3_indices', 'indices_data'), + ('indices_data', 'node', {'in': 2}), + + ('node', 'node_data'), + ('node_data', 'result') + ]) + return onnx_net, ref_net + + test_data = [ + dict(input_shape=[1, 256, 200, 272], rois_shape=[1000, 4], indices_shape=[1000], + pooled_h=7, pooled_w=7, mode="avg", sampling_ratio=2, spatial_scale=0.25, + output_shape=[1000, 256, 7, 7]), + dict(input_shape=[7, 256, 200, 200], rois_shape=[1000, 4], indices_shape=[1000], + pooled_h=6, pooled_w=6, mode="max", sampling_ratio=2, spatial_scale=16.0, + output_shape=[1000, 256, 6, 6]), + dict(input_shape=[7, 256, 200, 200], rois_shape=[1000, 4], indices_shape=[1000], + pooled_h=5, pooled_w=6, mode="max", sampling_ratio=2, spatial_scale=16.0, + output_shape=[1000, 256, 5, 6]), + + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_roi_align(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_scale.py b/tests/layer_tests/onnx_tests/test_scale.py new file mode 100644 index 00000000000..a624a19596b --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_scale.py @@ -0,0 +1,144 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestScale(Caffe2OnnxLayerTest): + def create_net(self, shape, scale, ir_version): + """ + ONNX net IR net + + Input->Scale->Output => Input->Power + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Scale', + inputs=['input'], + outputs=['output'], + scale=scale + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + def create_net_const(self, shape, scale, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+scaled const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randint(-127, 127, shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Scale', + inputs=['const1'], + outputs=['scale'], + scale=scale + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'scale'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ir_const = constant.flatten() * scale + if precision == 'FP16': + ir_const = ir_const.astype(np.float16) + + ref_net = None + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12], scale=0.1), + dict(shape=[8, 10, 12], scale=0.9), + dict(shape=[6, 8, 10, 12], scale=1.5), + dict(shape=[4, 6, 8, 10, 12], scale=4.5)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_scale(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_scale_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_scatter.py b/tests/layer_tests/onnx_tests/test_scatter.py new file mode 100644 index 00000000000..eda9fc29dcc --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_scatter.py @@ -0,0 +1,125 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestScatters(Caffe2OnnxLayerTest): + op = None + + def create_net(self, input_shape, indices_shape, updates_shape, output_shape, + axis, ir_version): + """ + ONNX net IR net + + Input->Scatter->Output => Parameter->ScatterElementsUpdate->Result + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + data = helper.make_tensor_value_info('data', TensorProto.FLOAT, input_shape) + indices = helper.make_tensor_value_info('indices', TensorProto.INT64, indices_shape) + updates = helper.make_tensor_value_info('updates', TensorProto.FLOAT, indices_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + params = {'axis': axis} if axis is not None else {} + node_def = onnx.helper.make_node( + self.op, + inputs=['data', 'indices', 'updates'], + outputs=['output'], + **params, + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [data, indices, updates], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + # comparison in these tests starts from input node, as we have 3 of them IREngine gets confused + # and takes the first input node in inputs list sorted by lexicographical order + '1_input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + + '2_indices': {'kind': 'op', 'type': 'Parameter'}, + 'indices_data': {'shape': indices_shape, 'kind': 'data'}, + + '3_updates': {'kind': 'op', 'type': 'Parameter'}, + 'updates_data': {'shape': updates_shape, 'kind': 'data'}, + + 'const_indata': {'kind': 'data', 'value': np.int64(axis) if axis is not None else np.int64(0)}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'kind': 'data'}, + + 'node': {'kind': 'op', 'type': 'ScatterElementsUpdate'}, + 'node_data': {'shape': output_shape, 'kind': 'data'}, + + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [ + ('1_input', 'input_data'), + ('input_data', 'node', {'in': 0}), + ('2_indices', 'indices_data'), + ('indices_data', 'node', {'in': 1}), + ('3_updates', 'updates_data'), + ('updates_data', 'node', {'in': 2}), + ('const_indata', 'const'), + ('const', 'const_data'), + ('const_data', 'node', {'in': 3}), + + ('node', 'node_data'), + ('node_data', 'result') + ]) + return onnx_net, ref_net + + +test_data = [ + dict(input_shape=[1, 5], indices_shape=[1, 2], updates_shape=[1, 2], + axis=1, output_shape=[1, 5]), + dict(input_shape=[1, 256, 200, 272], indices_shape=[1, 256, 200, 272], updates_shape=[1, 256, 200, 272], + axis=None, output_shape=[1, 256, 200, 272])] + + +class TestScatter(TestScatters): + op = 'Scatter' + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_scatter(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + +class TestScatterElements(TestScatters): + op = 'ScatterElements' + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_scatter_elements(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_sigmoid.py b/tests/layer_tests/onnx_tests/test_sigmoid.py new file mode 100644 index 00000000000..bf0c517f68b --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_sigmoid.py @@ -0,0 +1,196 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestSigmoid(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Sigmoid->Output => Input->sigmoid + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Sigmoid', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Sigmoid'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+sigmoid const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + const_number = np.prod(shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Sigmoid', + inputs=['const1'], + outputs=['sigmoid1'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'sigmoid1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = 1.0 / (1.0 + np.exp(np.negative(constant))) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[2, 4, 6, 8, 10])] + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_sigmoid_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sigmoid(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sigmoid_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_sign.py b/tests/layer_tests/onnx_tests/test_sign.py new file mode 100644 index 00000000000..90794322561 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_sign.py @@ -0,0 +1,176 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestSign(OnnxRuntimeLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Sign->Output => Input->Sign + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Sign', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Sign'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+sign const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.randn(*shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Sign', + inputs=['const1'], + outputs=['sign'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'sign'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.sign(constant) + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[12]), + dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sign(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sign_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_slice.py b/tests/layer_tests/onnx_tests/test_slice.py new file mode 100644 index 00000000000..28b1786b5c2 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_slice.py @@ -0,0 +1,399 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestSlice(OnnxRuntimeLayerTest): + def create_net(self, shape, axes, ends, starts, ir_version, opset=6, steps=None): + """ + ONNX net IR net + + Input->Slice->Output => Input->Crop + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + # calculate output shape + test_arr = np.zeros(shape) + slice_idx = [None] * len(shape) + for i, axis in enumerate(axes): + slice_idx[axis] = slice(starts[i], ends[i], steps[i] if steps is not None else 1) + for axis, s in enumerate(slice_idx): + if s is None: + slice_idx[axis] = slice(0, shape[axis], 1) + test_arr = test_arr[tuple(slice_idx)] + + output_shape = list(test_arr.shape) + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + nodes = list() + if opset < 10: + node_def = onnx.helper.make_node( + 'Slice', + inputs=['input'], + outputs=['slice'], + starts=starts, + ends=ends, + axes=axes + ) + nodes.append(node_def) + else: + node_starts_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['starts'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(starts)], + vals=starts + ) + ) + node_ends_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['ends'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(ends)], + vals=ends + ) + ) + node_axes_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['axes'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(axes)], + vals=axes + ) + ) + inputs = ['input', 'starts', 'ends', 'axes'] + if steps: + node_steps_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['steps'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(steps)], + vals=steps + ) + ) + nodes.append(node_steps_def) + inputs.append('steps') + + node_def = onnx.helper.make_node( + 'Slice', + inputs=inputs, + outputs=['slice'] + ) + nodes.extend([node_starts_def, node_ends_def, node_axes_def, node_def]) + + elu_def = onnx.helper.make_node( + 'Elu', + inputs=['slice'], + outputs=['output'] + ) + nodes.append(elu_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output] + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + def create_net_const(self, shape, axes, ends, starts, ir_version, opset=6, steps=None): + """ + ONNX net IR net + + Input->Concat(+sliced const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + # calculate output shape + constant = np.random.randint(-127, 127, shape).astype(np.float) + + slice_idx = [None] * len(shape) + for i, axis in enumerate(axes): + slice_idx[axis] = slice(starts[i], ends[i], steps[i] if steps is not None else 1) + + for axis, s in enumerate(slice_idx): + if s is None: + slice_idx[axis] = slice(0, shape[axis], 1) + + constant_after = constant[tuple(slice_idx)] + + output_shape = list(constant_after.shape) + + concat_axis = 0 + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=shape, + vals=constant.flatten(), + ), + ) + + nodes = [node_const_def] + if opset < 10: + node_def = onnx.helper.make_node( + 'Slice', + inputs=['const1'], + outputs=['slice'], + starts=starts, + ends=ends, + axes=axes + ) + nodes.append(node_def) + else: + node_starts_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['starts'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(starts)], + vals=starts + ) + ) + node_ends_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['ends'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(ends)], + vals=ends + ) + ) + node_axes_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['axes'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(axes)], + vals=axes + ) + ) + + inputs = ['const1', 'starts', 'ends', 'axes'] + if steps: + node_steps_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['steps'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[len(steps)], + vals=steps + ) + ) + nodes.append(node_steps_def) + inputs.append('steps') + + node_def = onnx.helper.make_node( + 'Slice', + inputs=inputs, + outputs=['slice'] + ) + nodes.extend([node_starts_def, node_ends_def, node_axes_def, node_def]) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'slice'], + outputs=['output'], + axis=concat_axis + ) + nodes.append(node_concat_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_reshape_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + test_data_no_steps = [ + dict(shape=[10, 12], axes=[0], starts=[1], ends=[9]), + dict(shape=[10, 12], axes=[1], starts=[1], ends=[11]), + dict(shape=[10, 12], axes=[0, 1], starts=[1, 1], ends=[9, 11]), + dict(shape=[8, 10, 12], axes=[0], starts=[1], ends=[7]), + dict(shape=[8, 10, 12], axes=[1], starts=[1], ends=[9]), + dict(shape=[8, 10, 12], axes=[2], starts=[1], ends=[11]), + dict(shape=[8, 10, 12], axes=[0, 1], starts=[1, 1], ends=[7, 9]), + dict(shape=[8, 10, 12], axes=[1, 2], starts=[1, 1], ends=[9, 11]), + dict(shape=[8, 10, 12], axes=[0, 2], starts=[1, 1], ends=[7, 11]), + dict(shape=[8, 10, 12], axes=[0, 1, 2], starts=[1, 1, 1], ends=[7, 9, 11]), + dict(shape=[6, 8, 10, 12], axes=[0], starts=[1], ends=[5]), + dict(shape=[6, 8, 10, 12], axes=[1], starts=[1], ends=[7]), + dict(shape=[6, 8, 10, 12], axes=[2], starts=[1], ends=[9]), + dict(shape=[6, 8, 10, 12], axes=[3], starts=[1], ends=[11]), + dict(shape=[6, 8, 10, 12], axes=[0, 1], starts=[1, 1], ends=[5, 7]), + dict(shape=[6, 8, 10, 12], axes=[1, 2], starts=[1, 1], ends=[7, 9]), + dict(shape=[6, 8, 10, 12], axes=[2, 3], starts=[1, 1], ends=[9, 11]), + dict(shape=[6, 8, 10, 12], axes=[0, 2], starts=[1, 1], ends=[5, 9]), + dict(shape=[6, 8, 10, 12], axes=[0, 3], starts=[1, 1], ends=[5, 11]), + dict(shape=[6, 8, 10, 12], axes=[1, 3], starts=[1, 1], ends=[7, 11]), + dict(shape=[6, 8, 10, 12], axes=[0, 1, 2], starts=[1, 1, 1], ends=[5, 7, 9]), + dict(shape=[6, 8, 10, 12], axes=[1, 2, 3], starts=[1, 1, 1], ends=[7, 9, 11]), + dict(shape=[6, 8, 10, 12], axes=[0, 2, 3], starts=[1, 1, 1], ends=[5, 9, 11]), + dict(shape=[6, 8, 10, 12], axes=[0, 1, 3], starts=[1, 1, 1], ends=[5, 7, 11]), + dict(shape=[6, 8, 10, 12], axes=[0, 1, 2, 3], starts=[1, 1, 1, 1], ends=[5, 7, 9, 11]), + dict(shape=[4, 6, 8, 10, 12], axes=[0], starts=[1], ends=[3]), + dict(shape=[4, 6, 8, 10, 12], axes=[1], starts=[1], ends=[5]), + dict(shape=[4, 6, 8, 10, 12], axes=[2], starts=[1], ends=[7]), + dict(shape=[4, 6, 8, 10, 12], axes=[3], starts=[1], ends=[9]), + dict(shape=[4, 6, 8, 10, 12], axes=[4], starts=[1], ends=[11]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1], starts=[1, 1], ends=[3, 5]), + dict(shape=[4, 6, 8, 10, 12], axes=[2, 3], starts=[1, 1], ends=[7, 9]), + dict(shape=[4, 6, 8, 10, 12], axes=[3, 4], starts=[1, 1], ends=[9, 11]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1, 2], starts=[1, 1, 1], ends=[3, 5, 7]), + dict(shape=[4, 6, 8, 10, 12], axes=[1, 2, 3], starts=[1, 1, 1], ends=[5, 7, 9]), + dict(shape=[4, 6, 8, 10, 12], axes=[2, 3, 4], starts=[1, 1, 1], ends=[7, 9, 11]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1, 2, 3], starts=[1, 1, 1, 1], ends=[3, 5, 7, 9]), + dict(shape=[4, 6, 8, 10, 12], axes=[1, 2, 3, 4], starts=[1, 1, 1, 1], ends=[5, 7, 9, 11]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1, 2, 3, 4], starts=[1, 1, 1, 1, 1], ends=[3, 5, 7, 9, 11]), + ] + + test_data_with_steps = [ + dict(shape=[10, 12], axes=[0, 1], starts=[1, 1], ends=[9, 11], steps=[2, 2]), + dict(shape=[10, 12], axes=[0, 1], starts=[9, 11], ends=[1, 1], steps=[-1, -1]), + dict(shape=[10, 12], axes=[0], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[10, 12], axes=[1], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[10, 12], axes=[0, 1], starts=[9, 11], ends=[1, 1], steps=[-2, -2]), + dict(shape=[8, 10, 12], axes=[0, 1, 2], starts=[1, 1, 1], ends=[7, 9, 11], steps=[2, 2, 2]), + dict(shape=[8, 10, 12], axes=[0, 1, 2], starts=[7, 9, 11], ends=[1, 1, 1], steps=[-1, -1, -1]), + dict(shape=[8, 10, 12], axes=[0], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[8, 10, 12], axes=[1], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[8, 10, 12], axes=[2], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[8, 10, 12], axes=[0, 1, 2], starts=[7, 9, 11], ends=[1, 1, 1], steps=[-2, -2, -2]), + dict(shape=[6, 8, 10, 12], axes=[0, 1, 2, 3], starts=[1, 1, 1, 1], ends=[5, 7, 9, 11], steps=[2, 2, 2, 2]), + dict(shape=[6, 8, 10, 12], axes=[0, 1, 2, 3], starts=[5, 7, 9, 11], ends=[1, 1, 1, 1], steps=[-1, -1, -1, -1]), + dict(shape=[6, 8, 10, 12], axes=[0], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[6, 8, 10, 12], axes=[1], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[6, 8, 10, 12], axes=[2], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[6, 8, 10, 12], axes=[3], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[6, 8, 10, 12], axes=[0, 1, 2, 3], starts=[5, 7, 9, 11], ends=[1, 1, 1, 1], steps=[-2, -2, -2, -2]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1, 2, 3, 4], starts=[1, 1, 1, 1, 1], ends=[3, 5, 7, 9, 11], + steps=[2, 2, 2, 2, 2]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1, 2, 3, 4], starts=[3, 5, 7, 9, 11], ends=[1, 1, 1, 1, 1], + steps=[-1, -1, -1, -1, -1]), + dict(shape=[4, 6, 8, 10, 12], axes=[0], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[4, 6, 8, 10, 12], axes=[1], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[4, 6, 8, 10, 12], axes=[2], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[4, 6, 8, 10, 12], axes=[3], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[4, 6, 8, 10, 12], axes=[4], starts=[-1], ends=[-9999], steps=[-1]), + dict(shape=[4, 6, 8, 10, 12], axes=[0, 1, 2, 3, 4], starts=[3, 5, 7, 9, 11], ends=[1, 1, 1, 1, 1], + steps=[-2, -2, -2, -2, -2]), + ] + + @pytest.mark.parametrize("params", test_data_no_steps) + @pytest.mark.nightly + def test_slice_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, opset=6, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_no_steps) + @pytest.mark.nightly + def test_slice_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, opset=6, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_no_steps + test_data_with_steps) + @pytest.mark.nightly + def test_slice_opset10(self, params, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_net(**params, opset=10, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_no_steps + test_data_with_steps) + @pytest.mark.nightly + def test_slice_const_opset10(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, opset=10, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_no_steps + test_data_with_steps) + @pytest.mark.nightly + def test_slice_opset11(self, params, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_net(**params, opset=11, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_no_steps + test_data_with_steps) + @pytest.mark.nightly + def test_slice_const_opset11(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, opset=11, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_softmax.py b/tests/layer_tests/onnx_tests/test_softmax.py new file mode 100644 index 00000000000..cf4a5d4d6cd --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_softmax.py @@ -0,0 +1,167 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from mo.front.common.partial_infer.utils import int64_array +from unit_tests.utils.graph import build_graph + + +def second_input_data_of_reshape(src_shape, axis): + if axis == 0: + return [1, -1] + if axis == 1: + return [0, -1] + if axis > 1: + return [int(np.prod(int64_array(src_shape[: axis]))), -1] + return [-1, int(np.prod(int64_array(src_shape[len(src_shape) + axis: ])))] + + +def get_flatten_shape(src_shape, axis): + flatten_axis = axis if axis >= 0 else len(src_shape) + axis + if flatten_axis == 0: + fst_dim = 1 + snd_dim = int(np.prod(int64_array(src_shape))) + elif flatten_axis == 1: + fst_dim = src_shape[0] + snd_dim = int(np.prod(int64_array(src_shape[1: ]))) + else: + fst_dim = int(np.prod(int64_array(src_shape[: flatten_axis]))) + snd_dim = int(np.prod(int64_array(src_shape[flatten_axis: ]))) + return [fst_dim, snd_dim] + + +class TestSoftmax(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randn(*inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape, softmax_axis, ir_version): + """ + ONNX net IR net + + Input->Softmax->Output => Input->Reshape->SoftMax->Reshape + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Softmax', + inputs=['input'], + outputs=['output'], + axis=softmax_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + converted_shape = shape if len(shape) != 1 else shape[0] + flatten_shape = get_flatten_shape(shape, softmax_axis) + reshape_data_val = second_input_data_of_reshape(shape, softmax_axis) + + if check_ir_version(10, None, ir_version): + if len(shape) == 2 and shape == flatten_shape: + ref_nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter', 'shape': converted_shape}, + 'input_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'flatten_shape_val': {'shape': int64_array(reshape_data_val).shape, + 'kind': 'data', + 'value': int64_array(reshape_data_val)}, + 'flatten_shape': {'type': 'Const', 'kind': 'op', 'shape': 2}, + 'flatten_shape_data': {'shape': int64_array([2]), 'kind': 'data', 'value': None}, + 'reshape': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_data': {'kind': 'data', 'shape': flatten_shape, 'value': None}, + 'softmax': {'type': 'SoftMax', 'kind': 'op', 'axis': 1}, + 'softmax_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_edges = [ + ('input', 'input_data'), + ('flatten_shape_val', 'flatten_shape'), + ('flatten_shape', 'flatten_shape_data'), + ('flatten_shape_data', 'reshape', {'in': 1}), + ('input_data', 'reshape', {'in': 0}), + ('reshape', 'reshape_data'), + ('reshape_data', 'softmax'), + ('softmax', 'softmax_data'), + ('softmax_data', 'result'), + ] + else: + ref_nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter', 'shape': converted_shape}, + 'input_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'flatten_shape_val': {'shape': int64_array(reshape_data_val).shape, + 'kind': 'data', + 'value': int64_array(reshape_data_val)}, + 'flatten_shape': {'type': 'Const', 'kind': 'op', 'shape': 2}, + 'flatten_shape_data': {'shape': int64_array([2]), 'kind': 'data', 'value': None}, + 'reshape': {'kind': 'op', 'type': 'Reshape'}, + 'reshape_data': {'kind': 'data', 'shape': flatten_shape, 'value': None}, + 'softmax': {'type': 'SoftMax', 'kind': 'op', 'axis': 1}, + 'softmax_data': {'shape': flatten_shape, 'kind': 'data', 'value': None}, + 'last_shape_val': {'shape': int64_array(shape).shape, 'kind': 'data', 'value': int64_array(shape)}, + 'last_shape': {'type': 'Const', 'kind': 'op', 'shape': len(shape)}, + 'last_shape_data': {'shape': int64_array([len(shape)]), 'kind': 'data', 'value': None}, + 'last_reshape': {'kind': 'op', 'type': 'Reshape'}, + 'last_reshape_data': {'kind': 'data', 'shape': shape, 'value': None}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_edges = [ + ('input', 'input_data'), + ('flatten_shape_val', 'flatten_shape'), + ('flatten_shape', 'flatten_shape_data'), + ('flatten_shape_data', 'reshape', {'in': 1}), + ('input_data', 'reshape', {'in': 0}), + ('reshape', 'reshape_data'), + ('reshape_data', 'softmax'), + ('softmax', 'softmax_data'), + ('last_shape_val', 'last_shape'), + ('last_shape', 'last_shape_data'), + ('last_shape_data', 'last_reshape', {'in': 1}), + ('softmax_data', 'last_reshape', {'in': 0}), + ('last_reshape', 'last_reshape_data'), + ('last_reshape_data', 'result'), + ] + + ref_net = build_graph(ref_nodes_attributes, ref_edges) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12], softmax_axis=0), + dict(shape=[10, 12], softmax_axis=1)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_softmax(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_softplus.py b/tests/layer_tests/onnx_tests/test_softplus.py new file mode 100644 index 00000000000..ea5b56071e0 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_softplus.py @@ -0,0 +1,182 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import Caffe2OnnxLayerTest +from unit_tests.utils.graph import build_graph + + +class TestSoftplus(Caffe2OnnxLayerTest): + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Softplus->Output => Input->Softplus + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Softplus', + inputs=['input'], + outputs=['output'], + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'SoftPlus'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result') + ]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+Softplus const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.rand(*shape).astype(np.float32) * 255 + 0.5 + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Softplus', + inputs=['const1'], + outputs=['Softplus1'], + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'Softplus1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.log(np.exp(constant) + 1.0) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result') + ]) + + return onnx_net, ref_net + + test_data = [dict(shape=[12]), + dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_softplus(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_softplus_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_split_concat.py b/tests/layer_tests/onnx_tests/test_split_concat.py new file mode 100644 index 00000000000..b9bac5aaf25 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_split_concat.py @@ -0,0 +1,307 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +test_data_3D = [ + dict(input_shape=[1, 50, 50], output_shapes=[[1, 50, 25], [1, 50, 25]], axis=2), + dict(input_shape=[2, 50, 50], output_shapes=[[2, 20, 50], [2, 15, 50], [2, 15, 50]], axis=1), + dict(input_shape=[4, 50, 50], output_shapes=[[1, 50, 50], [1, 50, 50], [1, 50, 50], [1, 50, 50]], axis=0)] + +test_data_4D = [ + dict(input_shape=[1, 32, 800, 800], output_shapes=[[1, 16, 800, 800], [1, 16, 800, 800]], axis=1), + dict(input_shape=[4, 32, 80, 80], output_shapes=[[4, 8, 80, 80], [4, 8, 80, 80], [4, 8, 80, 80], [4, 8, 80, 80]], + axis=1), + dict(input_shape=[2, 21, 80, 80], output_shapes=[[2, 7, 80, 80], [2, 7, 80, 80], [2, 7, 80, 80]], axis=1), + dict(input_shape=[3, 21, 80, 80], output_shapes=[[3, 14, 80, 80], [3, 5, 80, 80], [3, 2, 80, 80]], axis=1), + dict(input_shape=[3, 21, 80, 80], output_shapes=[[1, 21, 80, 80], [1, 21, 80, 80], [1, 21, 80, 80]], axis=0), + dict(input_shape=[3, 21, 80, 80], output_shapes=[[3, 21, 20, 80], [3, 21, 35, 80], [3, 21, 25, 80]], axis=2), + dict(input_shape=[3, 21, 80, 80], output_shapes=[[3, 21, 80, 40], [3, 21, 80, 10], [3, 21, 80, 30]], axis=3)] + +test_data_5D = [ + dict(input_shape=[1, 50, 50, 80, 60], + output_shapes=[[1, 50, 10, 80, 60], + [1, 50, 10, 80, 60], + [1, 50, 10, 80, 60], + [1, 50, 10, 80, 60], + [1, 50, 10, 80, 60]], axis=2), + dict(input_shape=[1, 50, 50, 80, 60], output_shapes=[[1, 25, 50, 80, 60], [1, 25, 50, 80, 60]], axis=1)] + + +class TestSplitConcat(Caffe2OnnxLayerTest): + # TODO Add test with default values (axis=0) + def create_split_concat_net(self, input_shape, output_shapes, axis, ir_version): + """ + ONNX net IR net + + Input->Split->Concat->Output => Input->Split->Concat + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + outputs, split = [], [] + for id, output_shape in enumerate(output_shapes): + helper.make_tensor_value_info('output_{}'.format(id), TensorProto.FLOAT, output_shape) + outputs.append('output_{}'.format(id)) + split.append(output_shape[axis]) + + # Output for concat + output_concat = helper.make_tensor_value_info('output_concat', TensorProto.FLOAT, input_shape) + + node_split_def = onnx.helper.make_node( + 'Split', + inputs=['input'], + outputs=outputs, + axis=axis, + split=split + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=outputs, + outputs=['output_concat'], + axis=axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_split_def, node_concat_def], + 'test_split_model', + [input], + [output_concat], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_split_model') + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + # TODO Add test with default values (axis=0) + def create_split_concat_net_const(self, input_shape, output_shapes, axis, ir_version): + """ + ONNX net IR net + + Input(const)->Split->Concat--->Concat->Output => Input--->Concat + Input-' Const-' + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + concat_output_shape = input_shape.copy() + concat_output_shape[concat_axis] *= 2 + + const_number = np.prod(input_shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + outputs, split = [], [] + for id, output_shape in enumerate(output_shapes): + helper.make_tensor_value_info('output_{}'.format(id), TensorProto.FLOAT, output_shape) + outputs.append('output_{}'.format(id)) + split.append(output_shape[axis]) + + # Output for concat + output_concat = helper.make_tensor_value_info('output_dyn_concat', TensorProto.FLOAT, concat_output_shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=input_shape, + vals=constant, + ), + ) + + node_split_def = onnx.helper.make_node( + 'Split', + inputs=['const1'], + outputs=outputs, + axis=axis, + split=split + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=outputs, + outputs=['output_concat'], + axis=axis + ) + + node_dyn_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'output_concat'], + outputs=['output_dyn_concat'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_split_def, node_concat_def, node_dyn_concat_def], + 'test_split_model', + [input], + [output_concat], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_split_model') + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_split_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_split_concat_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_split_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_split_concat_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_split_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_split_concat_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_split_3D_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_split_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_split_4D_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_split_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_split_5D_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_split_concat_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + +class TestSplit(Caffe2OnnxLayerTest): + # TODO Add test with default values (axis=0) + def create_split_net(self, input_shape, output_shapes, axis, ir_version): + """ + ONNX net IR net + + Input->Split->Output => Input->Split + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + outputs, split = [], [] + for id, output_shape in enumerate(output_shapes): + out = helper.make_tensor_value_info('output_{}'.format(id), TensorProto.FLOAT, output_shape) + outputs.append((out, 'output_{}'.format(id))) + split.append(output_shape[axis]) + + node_split_def = onnx.helper.make_node( + 'Split', + inputs=['input'], + outputs=['node_{}'.format(x[1]) for x in outputs], + axis=axis, + split=split + ) + nodes = [node_split_def] + + for x in outputs: + nodes.append(onnx.helper.make_node( + 'Elu', + inputs=['node_{}'.format(x[1])], + outputs=[x[1]] + )) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_split_model', + [input], + [x[0] for x in outputs], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_split_model') + + # + # Create reference IR net + # Please, spesify 'type': 'Input' for inpit node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_split_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_split_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_split_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_split_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_split_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_split_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_sqrt.py b/tests/layer_tests/onnx_tests/test_sqrt.py new file mode 100644 index 00000000000..09b6df3b481 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_sqrt.py @@ -0,0 +1,187 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestSqrt(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(0, 255, inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_net(self, shape, ir_version): + """ + ONNX net IR net + + Input->Sqrt->Output => Input->Power + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + 'Sqrt', + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'const_indata': {'shape': None, 'kind': 'data'}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': np.ones(len(shape)), 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'Power'}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('const_indata', 'const'), + ('const', 'const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape, precision, ir_version): + """ + ONNX net IR net + + Input->Concat(+sqrt const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.rand(*shape).astype(np.float) * 255 + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + 'Sqrt', + inputs=['const'], + outputs=['sqrt'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'sqrt'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + constant = np.sqrt(constant) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sqrt(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sqrt_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_squeeze.py b/tests/layer_tests/onnx_tests/test_squeeze.py new file mode 100644 index 00000000000..413d3d86ec9 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_squeeze.py @@ -0,0 +1,210 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestSqueeze(Caffe2OnnxLayerTest): + def create_squeeze_net(self, axes, input_shape, output_shape, ir_version): + """ + ONNX net IR net + + Input->Squeeze(axes=0)->Output => Input->Reshape + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_squeeze_def = onnx.helper.make_node( + 'Squeeze', + inputs=['input'], + outputs=['output'], + axes=axes + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_squeeze_def], + 'test_squeeze_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_squeeze_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + def create_squeeze_net_const(self, axes, input_shape, output_shape, ir_version): + """ + ONNX net IR net + + Input->Concat(+squeezed const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 0 + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + const_number = np.prod(input_shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, input_shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_squeeze_def = onnx.helper.make_node( + 'Squeeze', + inputs=['const1'], + outputs=['squeeze1'], + axes=axes + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'squeeze1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_squeeze_def, node_concat_def], + 'test_squeeze_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_squeeze_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + test_data_5D = [ + dict(axes=[0], input_shape=[1, 2, 3, 10, 10], output_shape=[2, 3, 10, 10]), + dict(axes=[1], input_shape=[2, 1, 3, 10, 10], output_shape=[2, 3, 10, 10]), + dict(axes=[2], input_shape=[2, 3, 1, 10, 10], output_shape=[2, 3, 10, 10]), + dict(axes=[3], input_shape=[2, 3, 10, 1, 10], output_shape=[2, 3, 10, 10]), + dict(axes=[4], input_shape=[2, 3, 10, 10, 1], output_shape=[2, 3, 10, 10]), + dict(axes=[0, 1], input_shape=[1, 1, 3, 10, 10], output_shape=[3, 10, 10]), + dict(axes=[0, 2], input_shape=[1, 3, 1, 10, 10], output_shape=[3, 10, 10]), + dict(axes=[0, 3], input_shape=[1, 3, 10, 1, 10], output_shape=[3, 10, 10]), + dict(axes=[0, 4], input_shape=[1, 3, 10, 10, 1], output_shape=[3, 10, 10]), + dict(axes=[1, 2], input_shape=[3, 1, 1, 10, 10], output_shape=[3, 10, 10]), + dict(axes=[1, 3], input_shape=[3, 1, 10, 1, 10], output_shape=[3, 10, 10]), + dict(axes=[1, 4], input_shape=[3, 1, 10, 10, 1], output_shape=[3, 10, 10]), + dict(axes=[2, 3], input_shape=[3, 10, 1, 1, 10], output_shape=[3, 10, 10]), + dict(axes=[2, 4], input_shape=[3, 10, 1, 10, 1], output_shape=[3, 10, 10]), + dict(axes=[3, 4], input_shape=[3, 10, 10, 1, 1], output_shape=[3, 10, 10]), + dict(axes=[0, 1, 2], input_shape=[1, 1, 1, 10, 10], output_shape=[10, 10]), + dict(axes=[0, 1, 3], input_shape=[1, 1, 10, 1, 10], output_shape=[10, 10]), + dict(axes=[0, 1, 4], input_shape=[1, 1, 10, 10, 1], output_shape=[10, 10]), + dict(axes=[0, 2, 3], input_shape=[1, 10, 1, 1, 10], output_shape=[10, 10]), + dict(axes=[0, 2, 4], input_shape=[1, 10, 1, 10, 1], output_shape=[10, 10]), + dict(axes=[0, 3, 4], input_shape=[1, 10, 10, 1, 1], output_shape=[10, 10]), + dict(axes=[1, 2, 3], input_shape=[10, 1, 1, 1, 10], output_shape=[10, 10]), + dict(axes=[1, 2, 4], input_shape=[10, 1, 1, 10, 1], output_shape=[10, 10]), + dict(axes=[1, 3, 4], input_shape=[10, 1, 10, 1, 1], output_shape=[10, 10]), + dict(axes=[2, 3, 4], input_shape=[10, 10, 1, 1, 1], output_shape=[10, 10])] + + test_data_4D = [ + dict(axes=[0], input_shape=[1, 3, 10, 10], output_shape=[3, 10, 10]), + dict(axes=[1], input_shape=[3, 1, 10, 10], output_shape=[3, 10, 10]), + dict(axes=[2], input_shape=[3, 10, 1, 10], output_shape=[3, 10, 10]), + dict(axes=[3], input_shape=[3, 10, 10, 1], output_shape=[3, 10, 10]), + dict(axes=[0, 1], input_shape=[1, 1, 10, 10], output_shape=[10, 10]), + dict(axes=[0, 2], input_shape=[1, 10, 1, 10], output_shape=[10, 10]), + dict(axes=[0, 3], input_shape=[1, 10, 10, 1], output_shape=[10, 10]), + dict(axes=[1, 2], input_shape=[10, 1, 1, 10], output_shape=[10, 10]), + dict(axes=[1, 3], input_shape=[10, 1, 10, 1], output_shape=[10, 10]), + dict(axes=[2, 3], input_shape=[10, 10, 1, 1], output_shape=[10, 10])] + + test_data_3D = [ + dict(axes=[0], input_shape=[1, 10, 10], output_shape=[10, 10]), + dict(axes=[1], input_shape=[10, 1, 10], output_shape=[10, 10]), + dict(axes=[2], input_shape=[10, 10, 1], output_shape=[10, 10])] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_squeeze_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_squeeze_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_squeeze_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_squeeze_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_squeeze_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_squeeze_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_sum.py b/tests/layer_tests/onnx_tests/test_sum.py new file mode 100644 index 00000000000..bd4896144c0 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_sum.py @@ -0,0 +1,322 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestSum(OnnxRuntimeLayerTest): + def create_net(self, dyn_shapes, const_shapes, precision, ir_version, opset=None): + """ + ONNX net IR net + + Inputs->Sum with consts->Output => Input->Eltwise + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + inputs = list() + input_names = list() + out_shape_len = 0 + for i, shape in enumerate(dyn_shapes): + input_name = 'input{}'.format(i + 1) + inputs.append(helper.make_tensor_value_info(input_name, TensorProto.FLOAT, shape)) + input_names.append(input_name) + if len(shape) > out_shape_len: + out_shape_len = len(shape) + output_shape = shape + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + nodes = list() + consts = list() + for i, shape in enumerate(const_shapes): + const = np.random.randint(-127, 127, shape).astype(np.float) + const_name = 'const{}'.format(i + 1) + nodes.append(helper.make_node( + 'Constant', + inputs=[], + outputs=[const_name], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + )) + input_names.append(const_name) + consts.append(const) + + nodes.append(helper.make_node( + 'Sum', + inputs=input_names, + outputs=['output'] + )) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + inputs, + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # Create reference IR net + + ref_net = None + # Too complicated IR to generate by hand + + return onnx_net, ref_net + + def create_const_net(self, const_shapes, ir_version, opset=None): + """ + ONNX net IR net + + Inputs->Concat with Sum of consts->Output => Input->Concat with consts + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + shape_len = 0 + for shape in const_shapes: + if len(shape) > shape_len: + shape_len = len(shape) + input_shape = shape + + concat_axis = 0 + output_shape = input_shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + nodes = list() + input_names = list() + consts = list() + for i, shape in enumerate(const_shapes): + const = np.random.randint(-127, 127, shape).astype(np.float) + const_name = 'const{}'.format(i + 1) + nodes.append(helper.make_node( + 'Constant', + inputs=[], + outputs=[const_name], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=const.shape, + vals=const.flatten(), + ), + )) + input_names.append(const_name) + consts.append(const) + + nodes.append(helper.make_node( + 'Sum', + inputs=input_names, + outputs=['sum'] + )) + + nodes.append(helper.make_node( + 'Concat', + inputs=['input', 'sum'], + outputs=['output'], + axis=concat_axis + )) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # Create reference IR net + + ref_net = None + + return onnx_net, ref_net + + test_data_precommit = [ + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]], + const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12]], + const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], + const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]])] + + test_data = [ + # TODO: Add broadcasting tests. Note: Sum-6 doesn't support broadcasting + dict(dyn_shapes=[[4, 6]], const_shapes=[[4, 6]]), + dict(dyn_shapes=[[4, 6]], const_shapes=[[4, 6], [4, 6]]), + dict(dyn_shapes=[[4, 6]], const_shapes=[[4, 6], [4, 6], [4, 6]]), + dict(dyn_shapes=[[4, 6], [4, 6]], const_shapes=[]), + dict(dyn_shapes=[[4, 6], [4, 6]], const_shapes=[[4, 6]]), + dict(dyn_shapes=[[4, 6], [4, 6]], const_shapes=[[4, 6], [4, 6]]), + dict(dyn_shapes=[[4, 6], [4, 6]], const_shapes=[[4, 6], [4, 6], [4, 6]]), + dict(dyn_shapes=[[4, 6], [4, 6], [4, 6]], const_shapes=[]), + dict(dyn_shapes=[[4, 6], [4, 6], [4, 6]], const_shapes=[[4, 6]]), + dict(dyn_shapes=[[4, 6], [4, 6], [4, 6]], const_shapes=[[4, 6], [4, 6]]), + dict(dyn_shapes=[[4, 6], [4, 6], [4, 6]], const_shapes=[[4, 6], [4, 6], [4, 6]]), + dict(dyn_shapes=[[4, 6, 8]], const_shapes=[[4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8]], const_shapes=[[4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8]], const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8]], const_shapes=[]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8]], const_shapes=[[4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8]], const_shapes=[[4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8]], const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]], const_shapes=[]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]], const_shapes=[[4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]], const_shapes=[[4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]], const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]]), + dict(dyn_shapes=[[4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]], const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]], + const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12]], const_shapes=[[4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12]], const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12]], const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], const_shapes=[]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], const_shapes=[[4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], + const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], const_shapes=[]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], const_shapes=[[4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], + const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(dyn_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]], + const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]])] + + const_test_data_precommit = [ + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]) + ] + + const_test_data = [ + dict(const_shapes=[[4, 6], [4, 6]]), + dict(const_shapes=[[4, 6], [4, 6], [4, 6]]), + dict(const_shapes=[[4, 6], [4, 6], [4, 6], [4, 6]]), + dict(const_shapes=[[4, 6, 8], [4, 6, 8]]), + dict(const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8]]), + dict(const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8], [4, 6, 8]]), + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10]]), + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]]), + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12]]) + ] + + const_test_data_broadcasting_precommit = [ + dict(const_shapes=[[4, 6, 8, 10], [10], [10], [10]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [12]]) + ] + + const_test_data_broadcasting = [ + dict(const_shapes=[[4, 6], [6]]), + dict(const_shapes=[[4, 6], [6], [6]]), + dict(const_shapes=[[4, 6], [4, 6], [6]]), + dict(const_shapes=[[4, 6], [6], [6], [6]]), + dict(const_shapes=[[4, 6], [4, 6], [6], [6]]), + dict(const_shapes=[[4, 6], [4, 6], [4, 6], [6]]), + dict(const_shapes=[[4, 6, 8], [8]]), + dict(const_shapes=[[4, 6, 8], [8], [8]]), + dict(const_shapes=[[4, 6, 8], [4, 6, 8], [8]]), + dict(const_shapes=[[4, 6, 8], [8], [8], [8]]), + dict(const_shapes=[[4, 6, 8], [4, 6, 8], [8], [8]]), + dict(const_shapes=[[4, 6, 8], [4, 6, 8], [4, 6, 8], [8]]), + dict(const_shapes=[[4, 6, 8, 10], [10]]), + dict(const_shapes=[[4, 6, 8, 10], [10], [10]]), + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [10]]), + dict(const_shapes=[[4, 6, 8, 10], [10], [10], [10]]), + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [10], [10]]), + dict(const_shapes=[[4, 6, 8, 10], [4, 6, 8, 10], [4, 6, 8, 10], [10]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [12], [12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [12], [12], [12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [12], [12]]), + dict(const_shapes=[[4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [4, 6, 8, 10, 12], [12]]) + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sum_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, opset=6, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_sum_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, precision=precision, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sum(self, params, ie_device, precision, ir_version, temp_dir): + self._test( + *self.create_net(**params, precision=precision, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", const_test_data) + @pytest.mark.nightly + def test_sum_const_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_const_net(**params, opset=6, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", const_test_data_precommit) + @pytest.mark.precommit + def test_sum_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_const_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", const_test_data) + @pytest.mark.nightly + def test_sum_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_const_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", const_test_data_broadcasting_precommit) + @pytest.mark.precommit + def test_sum_const_broadcasting_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_const_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", const_test_data_broadcasting) + @pytest.mark.nightly + def test_sum_const_broadcasting(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_const_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_topk.py b/tests/layer_tests/onnx_tests/test_topk.py new file mode 100644 index 00000000000..863bf95352e --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_topk.py @@ -0,0 +1,167 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestTopK(OnnxRuntimeLayerTest): + def create_net(self, shape, k, axis, ir_version, largest=None, sorted=None, opset=None): + """ + ONNX net IR net + + Input->TopK->Output => Input->TopK + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + output_shape = shape.copy() + if axis is not None: + output_shape[axis] = k + else: + output_shape[-1] = k + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + values = helper.make_tensor_value_info('cvalues', TensorProto.FLOAT, output_shape) + indices = helper.make_tensor_value_info('cindices', TensorProto.INT64, output_shape) + + const1 = np.ones(output_shape).astype(np.int64) + const2 = np.ones(output_shape).astype(np.float) + + nodes = list() + inputs = ['input'] + if opset > 9: + node_k_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['k'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.INT64, + dims=[1], + vals=[k], + ), + ) + nodes.append(node_k_def) + inputs.append('k') + + args = dict() + if opset < 10: + args['k'] = k + if axis is not None: + args['axis'] = axis + if sorted is not None: + args['sorted'] = sorted + if largest is not None: + args['largest'] = largest + + node_def = onnx.helper.make_node( + 'TopK', + inputs=inputs, + outputs=['values', 'indices'], + **args + ) + + node_const1_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor2', + data_type=TensorProto.INT64, + dims=const1.shape, + vals=const1.flatten(), + ), + ) + + node_add1_def = onnx.helper.make_node( + 'Add', + inputs=['indices', 'const1'], + outputs=['cindices'] + ) + + node_const2_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const_tensor3', + data_type=TensorProto.FLOAT, + dims=const2.shape, + vals=const2.flatten(), + ), + ) + + node_add2_def = onnx.helper.make_node( + 'Add', + inputs=['values', 'const2'], + outputs=['cvalues'] + ) + + nodes.extend([node_def, node_const1_def, node_add1_def, node_const2_def, node_add2_def]) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [values, indices], + ) + + # Create the model (ModelProto) + args = dict(producer_name='test_model') + if opset: + args['opset_imports'] = [helper.make_opsetid("", opset)] + onnx_net = helper.make_model(graph_def, **args) + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + test_data = [dict(shape=[10, 12], k=3, axis=0), + dict(shape=[10, 12], k=5, axis=1), + dict(shape=[8, 10, 12], k=3, axis=0), + dict(shape=[8, 10, 12], k=4, axis=1), + dict(shape=[8, 10, 12], k=5, axis=2), + dict(shape=[6, 8, 10, 12], k=3, axis=0), + dict(shape=[6, 8, 10, 12], k=4, axis=1), + dict(shape=[6, 8, 10, 12], k=5, axis=2), + dict(shape=[6, 8, 10, 12], k=6, axis=3), + dict(shape=[4, 6, 8, 10, 12], k=3, axis=0), + dict(shape=[4, 6, 8, 10, 12], k=4, axis=1), + dict(shape=[4, 6, 8, 10, 12], k=5, axis=2), + dict(shape=[4, 6, 8, 10, 12], k=6, axis=3), + dict(shape=[4, 6, 8, 10, 12], k=7, axis=4)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_topk_opset6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, opset=6, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_topk_opset10(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, opset=10, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("largest", [1, 0, None]) + @pytest.mark.parametrize("sorted", [1, 0, None]) + @pytest.mark.nightly + def test_topk_opset11(self, params, ie_device, precision, ir_version, largest, sorted, temp_dir): + self._test(*self.create_net(**params, largest=largest, sorted=sorted, + opset=11, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_transpose.py b/tests/layer_tests/onnx_tests/test_transpose.py new file mode 100644 index 00000000000..13e26b626c4 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_transpose.py @@ -0,0 +1,178 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import itertools + +import numpy as np +import pytest +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestTranspose(Caffe2OnnxLayerTest): + def create_net(self, shape, perm, ir_version): + """ + ONNX net IR net + + Input->Transpose->Sigmoid->Output => Input->Permute->sigmoid + + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + output_shape = np.transpose(np.ones(shape), perm).shape + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + args = dict() + if perm: + args['perm'] = perm + node_def = helper.make_node( + 'Transpose', + inputs=['input'], + outputs=['transpose'], + **args + ) + + sigmoid_def = helper.make_node( + 'Sigmoid', + inputs=['transpose'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def, sigmoid_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + if not perm: + perm = list(reversed(range(len(shape)))) + + return onnx_net, ref_net + + def create_net_const(self, shape, perm, ir_version): + """ + ONNX net IR net + + Input->Concat(+transposed const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + constant = np.random.randint(-127, 127, shape).astype(np.float) + constant_transposed = np.transpose(constant, perm) + + concat_axis = 0 + input_shape = list(constant_transposed.shape) + output_shape = input_shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + args = dict() + if perm: + args['perm'] = perm + node_def = helper.make_node( + 'Transpose', + inputs=['const1'], + outputs=['transpose'], + **args + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'transpose'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + + ref_net = None + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[4, 6, 8, 10, 12], perm=None), + dict(shape=[8, 10, 12], perm=[2, 1, 0]), + dict(shape=[6, 8, 10, 12], perm=[0, 3, 1, 2]), + dict(shape=[4, 6, 8, 10, 12], perm=[1, 0, 4, 3, 2])] + + test_data = [dict(shape=[10, 12], perm=None), + dict(shape=[8, 10, 12], perm=None), + dict(shape=[6, 8, 10, 12], perm=None), + dict(shape=[4, 6, 8, 10, 12], perm=None)] + + for shape in [[10, 12], [8, 10, 12], [6, 8, 10, 12], [4, 6, 8, 10, 12]]: + for perm in itertools.permutations(np.arange(len(shape))): + test_data.append(dict(shape=shape, perm=list(perm))) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_transpose_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_transpose(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.nightly + def test_transpose_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_transpose_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_trigonometry.py b/tests/layer_tests/onnx_tests/test_trigonometry.py new file mode 100644 index 00000000000..6499f007a33 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_trigonometry.py @@ -0,0 +1,298 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestTrigonomery(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.rand(*(inputs_dict[input])).astype(np.float32) + return inputs_dict + + def create_net(self, shape, op, ir_version): + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + assert op in ['Sin', 'Sinh', 'Asin', 'Cos', 'Cosh', 'Acos', 'Tan', 'Tanh', 'Atan'] + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, shape) + + node_def = onnx.helper.make_node( + op, + inputs=['input'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': op}, + 'node_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape, op, precision, ir_version): + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + assert op in ['Sin', 'Sinh', 'Asin', 'Cos', 'Cosh', 'Acos', 'Tan', 'Tanh', 'Atan'] + + concat_axis = 0 + output_shape = shape.copy() + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + constant = np.random.rand(*shape).astype(np.float) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_def = onnx.helper.make_node( + op, + inputs=['const'], + outputs=['res'] + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'res'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # + # Create reference IR net + # + if op == 'Sin': + constant = np.sin(constant) + elif op == 'Sinh': + constant = np.sinh(constant) + elif op == 'Asin': + constant = np.arcsin(constant) + elif op == 'Cos': + constant = np.cos(constant) + elif op == 'Cosh': + constant = np.cosh(constant) + elif op == 'Acos': + constant = np.arccos(constant) + elif op == 'Tan': + constant = np.tan(constant) + elif op == 'Tanh': + constant = np.tanh(constant) + elif op == 'Atan': + constant = np.arctan(constant) + if precision == 'FP16': + constant = constant.astype(np.float16) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data_precommit = [dict(shape=[2, 4, 6, 8])] + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sin(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Sin'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sinh(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Sinh'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_asin(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Asin'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_cos_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Cos'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_cos(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Cos'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_cosh(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Cosh'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_acos(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Acos'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_tan(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Tan'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_tanh(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Tanh'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_atan(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version, op='Atan'), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sin_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Sin'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_sinh_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Sinh'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_asin_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Asin'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_cos_const_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Cos'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_cos_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Cos'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_cosh_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Cosh'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_acos_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Acos'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_tan_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Tan'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_tanh_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Tanh'), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_atan_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version, precision=precision, op='Atan'), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_unsqueeze.py b/tests/layer_tests/onnx_tests/test_unsqueeze.py new file mode 100644 index 00000000000..4042a2cda63 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_unsqueeze.py @@ -0,0 +1,210 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.onnx_layer_test_class import Caffe2OnnxLayerTest + + +class TestUnsqueeze(Caffe2OnnxLayerTest): + def create_unsqueeze_net(self, axes, input_shape, output_shape, ir_version): + """ + ONNX net IR net + + Input->Unsqueeze(axes=0)->Output => Input->Reshape + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, input_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + node_squeeze_def = onnx.helper.make_node( + 'Unsqueeze', + inputs=['input'], + outputs=['output'], + axes=axes + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_squeeze_def], + 'test_squeeze_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_squeeze_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return onnx_net, ref_net + + def create_unsqueeze_net_const(self, axes, input_shape, output_shape, ir_version): + """ + ONNX net IR net + + Input->Concat(+unsqueezed const)->Output => Input->Concat(+const) + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + import numpy as np + + concat_axis = 1 + concat_output_shape = output_shape.copy() + concat_output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, output_shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, concat_output_shape) + + const_number = np.prod(input_shape) + constant = np.random.randint(-127, 127, const_number).astype(np.float) + constant = np.reshape(constant, input_shape) + + node_const_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=constant.shape, + vals=constant.flatten(), + ), + ) + + node_squeeze_def = onnx.helper.make_node( + 'Unsqueeze', + inputs=['const1'], + outputs=['unsqueeze1'], + axes=axes + ) + + node_concat_def = onnx.helper.make_node( + 'Concat', + inputs=['input', 'unsqueeze1'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_squeeze_def, node_concat_def], + 'test_unsqueeze_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_unsqueeze_model') + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + ref_net = None + + return onnx_net, ref_net + + test_data_5D = [ + dict(axes=[0], input_shape=[2, 3, 10, 10], output_shape=[1, 2, 3, 10, 10]), + dict(axes=[1], input_shape=[2, 3, 10, 10], output_shape=[2, 1, 3, 10, 10]), + dict(axes=[2], input_shape=[2, 3, 10, 10], output_shape=[2, 3, 1, 10, 10]), + dict(axes=[3], input_shape=[2, 3, 10, 10], output_shape=[2, 3, 10, 1, 10]), + dict(axes=[4], input_shape=[2, 3, 10, 10], output_shape=[2, 3, 10, 10, 1]), + dict(axes=[0, 1], input_shape=[3, 10, 10], output_shape=[1, 1, 3, 10, 10]), + dict(axes=[0, 2], input_shape=[3, 10, 10], output_shape=[1, 3, 1, 10, 10]), + dict(axes=[0, 3], input_shape=[3, 10, 10], output_shape=[1, 3, 10, 1, 10]), + dict(axes=[0, 4], input_shape=[3, 10, 10], output_shape=[1, 3, 10, 10, 1]), + dict(axes=[1, 2], input_shape=[3, 10, 10], output_shape=[3, 1, 1, 10, 10]), + dict(axes=[1, 3], input_shape=[3, 10, 10], output_shape=[3, 1, 10, 1, 10]), + dict(axes=[1, 4], input_shape=[3, 10, 10], output_shape=[3, 1, 10, 10, 1]), + dict(axes=[2, 3], input_shape=[3, 10, 10], output_shape=[3, 10, 1, 1, 10]), + dict(axes=[2, 4], input_shape=[3, 10, 10], output_shape=[3, 10, 1, 10, 1]), + dict(axes=[3, 4], input_shape=[3, 10, 10], output_shape=[3, 10, 10, 1, 1]), + dict(axes=[0, 1, 2], input_shape=[10, 10], output_shape=[1, 1, 1, 10, 10]), + dict(axes=[0, 1, 3], input_shape=[10, 10], output_shape=[1, 1, 10, 1, 10]), + dict(axes=[0, 1, 4], input_shape=[10, 10], output_shape=[1, 1, 10, 10, 1]), + dict(axes=[0, 2, 3], input_shape=[10, 10], output_shape=[1, 10, 1, 1, 10]), + dict(axes=[0, 2, 4], input_shape=[10, 10], output_shape=[1, 10, 1, 10, 1]), + dict(axes=[0, 3, 4], input_shape=[10, 10], output_shape=[1, 10, 10, 1, 1]), + dict(axes=[1, 2, 3], input_shape=[10, 10], output_shape=[10, 1, 1, 1, 10]), + dict(axes=[1, 2, 4], input_shape=[10, 10], output_shape=[10, 1, 1, 10, 1]), + dict(axes=[1, 3, 4], input_shape=[10, 10], output_shape=[10, 1, 10, 1, 1]), + dict(axes=[2, 3, 4], input_shape=[10, 10], output_shape=[10, 10, 1, 1, 1])] + + test_data_4D = [ + dict(axes=[0], input_shape=[3, 10, 10], output_shape=[1, 3, 10, 10]), + dict(axes=[1], input_shape=[3, 10, 10], output_shape=[3, 1, 10, 10]), + dict(axes=[2], input_shape=[3, 10, 10], output_shape=[3, 10, 1, 10]), + dict(axes=[3], input_shape=[3, 10, 10], output_shape=[3, 10, 10, 1]), + dict(axes=[3], input_shape=[3, 10, 10], output_shape=[3, 10, 10, 1]), + dict(axes=[0, 1], input_shape=[10, 10], output_shape=[1, 1, 10, 10]), + dict(axes=[0, 2], input_shape=[10, 10], output_shape=[1, 10, 1, 10]), + dict(axes=[0, 3], input_shape=[10, 10], output_shape=[1, 10, 10, 1]), + dict(axes=[1, 2], input_shape=[10, 10], output_shape=[10, 1, 1, 10]), + dict(axes=[1, 3], input_shape=[10, 10], output_shape=[10, 1, 10, 1]), + dict(axes=[2, 3], input_shape=[10, 10], output_shape=[10, 10, 1, 1])] + + test_data_3D = [ + dict(axes=[0], input_shape=[10, 10], output_shape=[1, 10, 10]), + dict(axes=[1], input_shape=[10, 10], output_shape=[10, 1, 10]), + dict(axes=[2], input_shape=[10, 10], output_shape=[10, 10, 1])] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_unsqueeze_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_unsqueeze_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_unsqueeze_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_unsqueeze_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_unsqueeze_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_unsqueeze_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_unsqueeze_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_unsqueeze_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_unsqueeze_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_unsqueeze_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_unsqueeze_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_unsqueeze_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_upsample.py b/tests/layer_tests/onnx_tests/test_upsample.py new file mode 100644 index 00000000000..0a796dbbeab --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_upsample.py @@ -0,0 +1,188 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import math +import os + +import pytest +import torch +from common.layer_test_class import CommonLayerTest +from common.onnx_layer_test_class import OnnxRuntimeLayerTest + + +class TestUpsample(OnnxRuntimeLayerTest): + def create_net(self, shape, mode, scales, opset, ir_version): + """ + ONNX net IR net + + Input->Upsample->Output => Input->Resample + + """ + + # + # Create ONNX model + # + + import onnx + from onnx import helper + from onnx import TensorProto + + assert opset in [7, 9] + + output_shape = shape.copy() + output_shape[-1] = math.floor(scales[-1] * shape[-1]) + output_shape[-2] = math.floor(scales[-2] * shape[-2]) + input = helper.make_tensor_value_info('input', TensorProto.FLOAT, shape) + output = helper.make_tensor_value_info('output', TensorProto.FLOAT, output_shape) + + args = dict() + nodes = [] + if opset == 7: + args['scales'] = scales + else: + node_scales_def = onnx.helper.make_node( + 'Constant', + inputs=[], + outputs=['scales'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.FLOAT, + dims=[len(scales)], + vals=scales, + ), + ) + nodes.append(node_scales_def) + + if mode: + args['mode'] = mode + node_def = helper.make_node( + 'Upsample', + inputs=['input'] if opset == 7 else ['input', 'scales'], + outputs=['output'], + **args + ) + nodes.append(node_def) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + nodes, + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, + producer_name='test_model', + opset_imports=[helper.make_opsetid("", opset)]) + + # Create reference IR net + mode_to_resample_type = {None: 'caffe.ResampleParameter.NEAREST', + 'nearest': 'caffe.ResampleParameter.NEAREST', + 'linear': 'caffe.ResampleParameter.LINEAR'} + assert mode in mode_to_resample_type + + ref_net = None + + return onnx_net, ref_net + + test_data = [dict(shape=[1, 3, 10, 12], scales=[1., 1., 2., 2.]), + dict(shape=[1, 3, 10, 12], scales=[1., 1., 2.5, 2.5]), + dict(shape=[1, 3, 10, 12], scales=[1., 1., 2.5, 2.])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("mode", [None, 'nearest']) + @pytest.mark.parametrize("opset", [7, 9]) + @pytest.mark.nightly + def test_upsample_nearest(self, params, mode, opset, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, mode=mode, opset=opset, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("opset", [7, 9]) + @pytest.mark.nightly + @pytest.mark.xfail(reason='Both onnxruntime and caffe2 calculate linear upsampling differently from IE') + def test_upsample_linear(self, params, opset, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, mode='linear', opset=opset, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + +class PytorchLayerTest(CommonLayerTest): + def produce_model_path(self, framework_model, save_path): + path = os.path.join(save_path, 'model.onnx') + self.torch_model = framework_model['model'] + torch.onnx.export(self.torch_model, framework_model['var'], path, output_names=['output']) + assert os.path.isfile(path), "model.onnx haven't been saved here: {}".format(save_path) + return path + + def get_framework_results(self, inputs_dict, model_path): + x = torch.tensor(inputs_dict['input'], dtype=torch.float32) + return {'output': self.torch_model(x).numpy()} + + +class UpsampleModel(torch.nn.Module): + def __init__(self, mode, size, scale_factor): + super(UpsampleModel, self).__init__() + args = dict() + if mode: + args['mode'] = mode + if scale_factor: + args['scale_factor'] = scale_factor + elif size: + args['size'] = size + self.upsample = torch.nn.modules.upsampling.Upsample(**args) + + +class TestPytorchUpsample(PytorchLayerTest): + def create_net(self, shape, mode, size, scale_factor, ir_version): + """ + Pytorch net IR net + + Input->Upsample->Output => Input->Resample + + """ + + output_shape = shape.copy() + if size: + output_shape[2] = size[0] + output_shape[3] = size[1] + elif scale_factor: + output_shape[2] = scale_factor * output_shape[2] + output_shape[3] = scale_factor * output_shape[3] + + # Create Pytorch model + model = UpsampleModel(mode, size, scale_factor) + + # Create reference IR net + mode_to_resample_type = {None: 'caffe.ResampleParameter.NEAREST', + 'nearest': 'caffe.ResampleParameter.NEAREST', + 'bilinear': 'caffe.ResampleParameter.LINEAR'} + assert mode in mode_to_resample_type + + ref_net = None + + return {'model': model, 'var': torch.randn(shape)}, ref_net + + test_data_precommit = [dict(shape=[1, 3, 10, 10], size=(25, 25), scale_factor=None), + dict(shape=[1, 3, 10, 10], size=None, scale_factor=2)] + + test_data = [dict(shape=[1, 3, 10, 10], size=(20, 20), scale_factor=None), + dict(shape=[1, 3, 10, 10], size=(25, 25), scale_factor=None), + dict(shape=[1, 3, 10, 10], size=None, scale_factor=2)] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.parametrize("mode", [None, 'nearest']) + def test_pytorch_upsample_precommit(self, params, mode, ie_device, precision, ir_version, temp_dir): + if ie_device == 'GPU': + pytest.skip('Linear upsampling not supported on GPU') + self._test(*self.create_net(**params, mode=mode, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.parametrize("mode", [None, 'nearest', 'bilinear']) + @pytest.mark.nightly + def test_pytorch_upsample(self, params, mode, ie_device, precision, ir_version, temp_dir): + if ie_device == 'GPU' and mode == 'bilinear': + pytest.skip('Linear upsampling not supported on GPU') + self._test(*self.create_net(**params, mode=mode, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_where.py b/tests/layer_tests/onnx_tests/test_where.py new file mode 100644 index 00000000000..577685d65fa --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_where.py @@ -0,0 +1,97 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestWhere(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(0, 2, inputs_dict[input]).astype(np.bool) + return inputs_dict + + def create_net(self, condition_shape, shape_than, else_shape, ir_version): + """ + ONNX net IR net + + Input->Where->Output => Input->Select + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input_cond = helper.make_tensor_value_info('input_cond', TensorProto.BOOL, condition_shape) + input_than = helper.make_tensor_value_info('input_than', TensorProto.BOOL, shape_than) + input_else = helper.make_tensor_value_info('input_else', TensorProto.BOOL, else_shape) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, condition_shape) + + node_def = helper.make_node( + 'Where', + inputs=['input_cond', 'input_than', 'input_else'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input_cond, input_than, input_else], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input_cond': {'kind': 'op', 'type': 'Parameter'}, + 'input_cond_data': {'shape': condition_shape, 'kind': 'data'}, + + 'input_than': {'kind': 'op', 'type': 'Parameter'}, + 'input_than_data': {'shape': shape_than, 'kind': 'data'}, + + 'input_else': {'kind': 'op', 'type': 'Parameter'}, + 'input_else_data': {'shape': else_shape, 'kind': 'data'}, + + 'node': {'kind': 'op', 'type': 'Select'}, + 'node_data': {'shape': condition_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input_cond', 'input_cond_data'), + ('input_than', 'input_than_data'), + ('input_else', 'input_else_data'), + ('input_cond_data', 'node'), + ('input_than_data', 'node'), + ('input_else_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + test_data = [dict(condition_shape=[4, 6], shape_than=[4, 6], else_shape=[4, 6]), + dict(condition_shape=[4, 6], shape_than=[4, 6], else_shape=[1, 6]), + dict(condition_shape=[15, 3, 5], shape_than=[15, 1, 5], else_shape=[15, 3, 5]), + dict(condition_shape=[2, 3, 4, 5], shape_than=[], else_shape=[2, 3, 4, 5]), + dict(condition_shape=[2, 3, 4, 5], shape_than=[5], else_shape=[2, 3, 4, 5]), + dict(condition_shape=[2, 3, 4, 5], shape_than=[2, 1, 1, 5], else_shape=[2, 3, 4, 5]), + dict(condition_shape=[2, 3, 4, 5], shape_than=[2, 3, 4, 5], else_shape=[1, 3, 1, 5]), + ] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_where(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/onnx_tests/test_xor.py b/tests/layer_tests/onnx_tests/test_xor.py new file mode 100644 index 00000000000..8b6626d5241 --- /dev/null +++ b/tests/layer_tests/onnx_tests/test_xor.py @@ -0,0 +1,268 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.onnx_layer_test_class import OnnxRuntimeLayerTest +from unit_tests.utils.graph import build_graph + + +class TestXor(OnnxRuntimeLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(0, 2, inputs_dict[input]).astype(np.bool) + return inputs_dict + + def create_net(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Xor with 2nd input->Output => Input->LogicalXor + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input1 = helper.make_tensor_value_info('input1', TensorProto.BOOL, shape1) + input2 = helper.make_tensor_value_info('input2', TensorProto.BOOL, shape2) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape1) + + node_def = helper.make_node( + 'Xor', + inputs=['input1', 'input2'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_def], + 'test_model', + [input1, input2], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input1': {'kind': 'op', 'type': 'Parameter'}, + 'input1_data': {'shape': shape1, 'kind': 'data'}, + 'input2': {'kind': 'op', 'type': 'Parameter'}, + 'input2_data': {'shape': shape2, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalXor'}, + 'node_data': {'shape': shape1, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input1', 'input1_data'), + ('input2', 'input2_data'), + ('input1_data', 'node'), + ('input2_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_one_const(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Xor with const->Output => Input->LogicalXor + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape1) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, shape1) + + const = np.random.randint(0, 2, shape2).astype(np.bool) + + node_const_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const.shape, + vals=const.flatten(), + ), + ) + + node_def = helper.make_node( + 'Xor', + inputs=['input', 'const'], + outputs=['output'] + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const_def, node_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape1, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': const.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': const.shape, 'kind': 'data'}, + 'node': {'kind': 'op', 'type': 'LogicalXor'}, + 'node_data': {'shape': shape1, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'node'), + ('const_data', 'node'), + ('node', 'node_data'), + ('node_data', 'result')]) + + return onnx_net, ref_net + + def create_net_const(self, shape1, shape2, ir_version): + """ + ONNX net IR net + + Input->Concat with const xor const->Output => Input->Concat + """ + + # + # Create ONNX model + # + + from onnx import helper + from onnx import TensorProto + + concat_axis = 0 + output_shape = list(shape1) + output_shape[concat_axis] *= 2 + + input = helper.make_tensor_value_info('input', TensorProto.BOOL, shape1) + output = helper.make_tensor_value_info('output', TensorProto.BOOL, output_shape) + + const1 = np.random.randint(0, 2, shape1).astype(np.bool) + const2 = np.random.randint(0, 2, shape2).astype(np.bool) + + node_const1_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const1'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const1.shape, + vals=const1.flatten(), + ), + ) + + node_const2_def = helper.make_node( + 'Constant', + inputs=[], + outputs=['const2'], + value=helper.make_tensor( + name='const_tensor', + data_type=TensorProto.BOOL, + dims=const2.shape, + vals=const2.flatten(), + ), + ) + + node_def = helper.make_node( + 'Xor', + inputs=['const1', 'const2'], + outputs=['node_out'] + ) + + node_concat_def = helper.make_node( + 'Concat', + inputs=['input', 'node_out'], + outputs=['output'], + axis=concat_axis + ) + + # Create the graph (GraphProto) + graph_def = helper.make_graph( + [node_const1_def, node_const2_def, node_def, node_concat_def], + 'test_model', + [input], + [output], + ) + + # Create the model (ModelProto) + onnx_net = helper.make_model(graph_def, producer_name='test_model') + + # Create reference IR net + constant_calculated = np.logical_xor(const1, const2) + + ref_net = None + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': const1.shape, 'kind': 'data'}, + 'input_const_data': {'kind': 'data', 'value': constant_calculated.flatten()}, + 'const': {'kind': 'op', 'type': 'Const'}, + 'const_data': {'shape': const1.shape, 'kind': 'data'}, + 'concat': {'kind': 'op', 'type': 'Concat', 'axis': concat_axis}, + 'concat_data': {'shape': output_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_const_data', 'const'), + ('const', 'const_data'), + ('input_data', 'concat'), + ('const_data', 'concat'), + ('concat', 'concat_data'), + ('concat_data', 'result')]) + + return onnx_net, ref_net + + test_data = [dict(shape1=[4, 6], shape2=[4, 6]), + dict(shape1=[4, 6, 8], shape2=[4, 6, 8]), + dict(shape1=[4, 6, 8, 10], shape2=[4, 6, 8, 10]), + dict(shape1=[4, 6, 8, 10, 12], shape2=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_xor(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_xor_one_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_one_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_xor_const(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_net_const(**params, ir_version=ir_version), ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/requirements.txt b/tests/layer_tests/requirements.txt new file mode 100644 index 00000000000..717c816c904 --- /dev/null +++ b/tests/layer_tests/requirements.txt @@ -0,0 +1,2 @@ +requests>=2.25.1 +numpy~=1.19.5 diff --git a/tests/layer_tests/tensorflow_tests/conftest.py b/tests/layer_tests/tensorflow_tests/conftest.py new file mode 100644 index 00000000000..bbf635f5680 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/conftest.py @@ -0,0 +1,12 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import inspect + +from common.layer_test_class import get_params + + +def pytest_generate_tests(metafunc): + test_gen_attrs_names = list(inspect.signature(get_params).parameters) + params = get_params() + metafunc.parametrize(test_gen_attrs_names, params, scope="function") diff --git a/tests/layer_tests/tensorflow_tests/permutation_utils.py b/tests/layer_tests/tensorflow_tests/permutation_utils.py new file mode 100644 index 00000000000..c26c7b690f7 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/permutation_utils.py @@ -0,0 +1,22 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np + +from mo.ops.op import PermuteAttrs + + +def permute_nhwc_to_nchw(shape): + perm = PermuteAttrs.get_nhwc_to_nchw_permutation(len(shape)).perm + new_shape = np.array(shape)[perm] + return new_shape + + +def permute_nchw_to_nhwc(shape): + perm = PermuteAttrs.get_nchw_to_nhwc_permutation(len(shape)).perm + new_shape = np.array(shape)[perm] + return new_shape + + +def permute_axis(axis, permutation_inv): + return permutation_inv[axis] diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Add.py b/tests/layer_tests/tensorflow_tests/test_tf_Add.py new file mode 100644 index 00000000000..46508f610d7 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Add.py @@ -0,0 +1,281 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestAdd(CommonTFLayerTest): + def create_add_placeholder_const_net(self, x_shape, y_shape, ir_version): + """ + Tensorflow net IR net + + Placeholder->Add => Placeholder->Eltwise or Power or ScaleShift + / / + Const-------/ Const-------/ + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + tf_x_shape = x_shape.copy() + tf_y_shape = y_shape.copy() + # reshaping + if len(tf_x_shape) >= 3: + tf_x_shape.append(tf_x_shape.pop(1)) + if len(tf_y_shape) >= 3: + tf_y_shape.append(tf_y_shape.pop(1)) + + x = tf.compat.v1.placeholder(tf.float32, tf_x_shape, 'Input') + constant_value = np.random.randint(-256, 256, tf_y_shape).astype(np.float32) + if (constant_value == 0).all(): + # Avoid elimination of the layer from IR + constant_value = constant_value + 1 + y = tf.constant(constant_value) + + add = tf.add(x, y, name="Operation") + add_shape = add.shape.as_list() + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + if len(add_shape) >= 3: + # Permute add_shape to (N,C,...) format + order = [0, len(add_shape) - 1] + list(range(1, len(add_shape) - 1)) + add_shape = [add_shape[i] for i in order] + + y_shape_to_compare = tf_y_shape.copy() + while len(y_shape_to_compare) < len(x_shape): + # Expand shape of constant with 1 + y_shape_to_compare = [1] + y_shape_to_compare + constant_value = np.expand_dims(constant_value, axis=0) + + if len(y_shape_to_compare) >= 3: + # Permute constant_value to (N,C,...) format for correct further reshape + order = [0, len(y_shape_to_compare) - 1] + list(range(1, len(y_shape_to_compare) - 1)) + y_shape_to_compare = [y_shape_to_compare[i] for i in order] + constant_value = np.transpose(constant_value, order) + + ref_net = None + + return tf_net, ref_net + + # TODO: implement tests for 2 Consts + Add + + test_data_1D = [ + # Power + dict(x_shape=[1], y_shape=[1]), + # Eltwise + pytest.param(dict(x_shape=[3], y_shape=[3]), marks=pytest.mark.xfail(reason="*-19180")) + ] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_add_placeholder_const_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [ + # Power + dict(x_shape=[1, 1], y_shape=[1, 1]), + # ScaleShift + dict(x_shape=[1, 3], y_shape=[1, 3]), + # Eltwise + pytest.param(dict(x_shape=[3, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[2, 3], y_shape=[2, 3]) + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_add_placeholder_const_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + # Power + dict(x_shape=[1, 1, 1], y_shape=[1, 1, 1]), + # ScaleShift + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[1, 3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 3], y_shape=[1, 1, 3]), + marks=[pytest.mark.xfail(reason="*-19053"), pytest.mark.xfail(reason="*-18830")]), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 224], y_shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")) + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_add_placeholder_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + # Power + dict(x_shape=[1, 1, 1, 1], y_shape=[1, 1, 1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1], y_shape=[1, 3, 1, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1, 3], y_shape=[1, 1, 1, 3]), marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[1, 3, 222, 224], y_shape=[1, 3, 222, 224]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_add_placeholder_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + # Power + dict(x_shape=[1, 1, 1, 1, 1], y_shape=[1, 1, 1, 1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[1, 3, 1, 1, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1, 1, 3], y_shape=[1, 1, 1, 1, 3]), + marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[1, 3, 50, 100, 224], y_shape=[1, 3, 50, 100, 224]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_add_placeholder_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version=ir_version, temp_dir=temp_dir) + + ############################################################################################### + # # + # Broadcast cases # + # # + ############################################################################################### + + test_data_broadcast_1D = [ + # Power + dict(x_shape=[3], y_shape=[1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_1D) + @pytest.mark.nightly + def test_add_placeholder_const_broadcast_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version=ir_version, temp_dir=temp_dir) + + test_data_broadcast_2D = [ + # Power + dict(x_shape=[1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3], y_shape=[1]), + # ScaleShift + dict(x_shape=[1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[3, 1], y_shape=[3]), + # Eltwise + pytest.param(dict(x_shape=[3, 1], y_shape=[1, 3, 1, 1]), marks=pytest.mark.xfail(reason="*-19051")) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_2D) + @pytest.mark.nightly + def test_add_placeholder_const_broadcast_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version=ir_version, temp_dir=temp_dir) + + test_data_broadcast_3D = [ + # Power + dict(x_shape=[1, 1, 1], y_shape=[1]), + # Power + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[1]), marks=pytest.mark.xfail(reason="*-19053")), + # ScaleShift + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[3]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[3, 1, 224], y_shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[2, 3, 1], y_shape=[1, 3, 2]), marks=pytest.mark.xfail(reason="*-19053")), + ] + + @pytest.mark.parametrize("params", test_data_broadcast_3D) + @pytest.mark.nightly + def test_add_placeholder_const_broadcast_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version=ir_version, temp_dir=temp_dir) + + test_data_broadcast_4D = [ + # Power + dict(x_shape=[1, 1, 1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3, 1, 1], y_shape=[1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1], y_shape=[3]), + # ScaleShift + dict(x_shape=[1, 3, 100, 224], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 1, 1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 1, 1], y_shape=[3, 1]), + # Eltwise + dict(x_shape=[1, 3, 1, 2], y_shape=[3, 1, 2]), + # Eltwise + dict(x_shape=[1, 3, 1, 2], y_shape=[1, 3, 2]), + # Eltwise + dict(x_shape=[1, 3, 100, 224], y_shape=[1, 1, 1, 224]), + # Eltwise + dict(x_shape=[2, 3, 1, 2], y_shape=[1, 3, 2, 1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_add_placeholder_const_broadcast_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version=ir_version, temp_dir=temp_dir) + + test_data_broadcast_5D = [ + # Power + dict(x_shape=[1, 1, 1, 1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 1, 1, 1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[3, 1]), + # Eltwise + dict(x_shape=[1, 3, 1, 1, 2], y_shape=[1, 3, 2]), + # Eltwise + dict(x_shape=[1, 3, 5, 1, 2], y_shape=[5, 3, 2, 1]), + # Eltwise + dict(x_shape=[1, 3, 50, 100, 224], y_shape=[1, 1, 1, 1, 224]), + # Eltwise + dict(x_shape=[2, 3, 1, 2, 1], y_shape=[1, 3, 2, 1, 1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_5D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_add_placeholder_const_broadcast_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_add_placeholder_const_net(**params, ir_version=ir_version), ie_device, precision, + ir_version=ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py b/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py new file mode 100644 index 00000000000..f7d46dfcef4 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_BatchToSpace.py @@ -0,0 +1,80 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestBatchToSpace(CommonTFLayerTest): + def create_batch_to_space_net(self, in_shape, crops_value, block_shape_value, out_shape, ir_version): + """ + Tensorflow net IR net + + Input->BatchToSpace => Input->BatchToSpace + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, in_shape, 'Input') + crops = tf.constant(crops_value) + block_shape = tf.constant(block_shape_value) + tf.batch_to_space(x, block_shape, crops, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return tf_net, ref_net + + test_data_4D = [ + dict(in_shape=[4, 1, 1, 3], block_shape_value=[1], crops_value=[[0, 0]], + out_shape=[4, 1, 1, 3]), + dict(in_shape=[4, 1, 1, 3], block_shape_value=[2, 2], crops_value=[[0, 0], [0, 0]], + out_shape=[1, 2, 2, 3]), + dict(in_shape=[60, 100, 30, 30], block_shape_value=[3, 2], crops_value=[[1, 5], [4, 1]], + out_shape=[2, 2, 1, 1]), + # todo: enable these tests after supporting the general case on CPU + # dict(in_shape=[4, 1, 1, 1], block_shape_value=[2, 1, 2], crops_value=[[0, 0], [0, 0], [0, 0]], + # out_shape=[]), + # dict(in_shape=[12, 1, 1, 3], block_shape_value=[3, 2, 2], crops_value=[[1, 0], [0, 1], [1, 1]], + # out_shape=[1, 2, 1, 4]), + # dict(in_shape=[36, 2, 2, 3], block_shape_value=[2, 3, 3], crops_value=[[1, 0], [0, 0], [2, 2]], + # out_shape=[2, 3, 6, 5]) + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_batch_to_space_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_batch_to_space_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + dict(in_shape=[72, 2, 1, 4, 2], block_shape_value=[3, 4, 2], crops_value=[[1, 2], [0, 0], [3, 0]], + out_shape=[3, 3, 4, 5, 2]), + # todo: enable these tests after supporting the general case on CPU + # dict(in_shape=[144, 2, 1, 4, 1], block_shape_value=[3, 4, 2, 2], + # crops_value=[[1, 2], [0, 0], [3, 0], [0, 0]], out_shape=[3, 3, 4, 5, 2]), + ] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_batch_to_space_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_batch_to_space_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py b/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py new file mode 100644 index 00000000000..b7e997195f5 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_BiasAdd.py @@ -0,0 +1,194 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestBiasAdd(CommonTFLayerTest): + def create_bias_add_placeholder_const_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Placeholder->BiasAdd => Placeholder->Power or ScaleShift + / / + Const-------/ Const-------/ + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + import numpy as np + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + tf_x_shape = shape.copy() + # reshaping + if len(tf_x_shape) >= 3: + tf_x_shape.append(tf_x_shape.pop(1)) + tf_y_shape = tf_x_shape[-1:] + + x = tf.compat.v1.placeholder(tf.float32, tf_x_shape, 'Input') + constant_value = np.random.randint(0, 1, tf_y_shape).astype(np.float32) + if (constant_value == 0).all(): + # Avoid elimination of the layer from IR + constant_value = constant_value + 1 + y = tf.constant(constant_value) + + tf.nn.bias_add(x, y, name="Operation") + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return tf_net, ref_net + + def create_bias_add_2_consts_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Const->BiasAdd-->Concat => Const---->Concat + / / / + Const--/ / Placeholder-/ + / + Placeholder---/ + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + import numpy as np + + tf.compat.v1.reset_default_graph() + + tf_concat_axis = -1 + + # Create the graph and model + with tf.compat.v1.Session() as sess: + tf_x_shape = shape.copy() + # reshaping + if len(tf_x_shape) >= 3: + tf_x_shape.append(tf_x_shape.pop(1)) + tf_y_shape = tf_x_shape[-1:] + + constant_value_x = np.random.randint(-256, 256, tf_x_shape).astype(np.float32) + x = tf.constant(constant_value_x) + constant_value_y = np.random.randint(-256, 256, tf_y_shape).astype(np.float32) + y = tf.constant(constant_value_y) + + add = tf.nn.bias_add(x, y, name="Operation") + add_shape = add.shape.as_list() + add_value = add.eval() + + placeholder = tf.compat.v1.placeholder(tf.float32, tf_x_shape, 'Input') # Input_1 in graph_def + + concat = tf.concat([placeholder, add], axis=tf_concat_axis, name='Operation') + concat_shape = concat.shape.as_list() + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + # Format axis to positive value + concat_ax = tf_concat_axis if tf_concat_axis >= 0 else tf_concat_axis + len(shape) + if len(shape) >= 3: + # Permute shapes to (N,C,...) format + order = [0, len(concat_shape) - 1] + list(range(1, len(concat_shape) - 1)) + concat_shape = [concat_shape[i] for i in order] + concat_ax = order.index(concat_ax) + add_value = np.transpose(add_value, order) + + ref_net = None + + return tf_net, ref_net + + test_data_2D = [ + dict(shape=[1, 1]), + dict(shape=[1, 224]) + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_bias_add_placeholder_const_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_bias_add_2_consts_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_2_consts_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + pytest.param(dict(shape=[1, 1, 224]), marks=pytest.mark.xfail(reason="*-19053")), + pytest.param(dict(shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")) + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_bias_add_placeholder_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_bias_add_2_consts_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_2_consts_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + dict(shape=[1, 1, 100, 224]), + dict(shape=[1, 3, 100, 224]) + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_bias_add_placeholder_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_bias_add_2_consts_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_2_consts_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + dict(shape=[1, 1, 50, 100, 224]), + dict(shape=[1, 3, 220, 222, 224]) + ] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_bias_add_placeholder_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_bias_add_2_consts_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bias_add_2_consts_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py new file mode 100644 index 00000000000..10d0bc6de74 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Bucketize.py @@ -0,0 +1,83 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestBucketize(CommonTFLayerTest): + def create_bucketize_net(self, input_shape, input_type, boundaries_size, ir_version): + """ + Tensorflow net: IR net: + Input => Input Boundaries + | \ / + Bucketize Bucketize + {attrs: boundaries} + """ + + # create Tensorflow model + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'Input') + constant_value = np.arange(-boundaries_size * 5, boundaries_size * 5, 10, dtype=np.float32) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # create reference IR net + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': input_shape, 'kind': 'data'}, + 'boundaries_input_data': {'shape': constant_value.shape, 'kind': 'data'}, + 'boundaries': {'type': 'Const', 'kind': 'op'}, + 'boundaries_data': {'kind': 'data', 'shape': constant_value.shape}, + 'bucketize': {'kind': 'op', 'type': 'Bucketize'}, + 'bucketize_data': {'shape': input_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'bucketize', {'in': 0}), + ('boundaries_input_data', 'boundaries'), + ('boundaries', 'boundaries_data'), + ('boundaries_data', 'bucketize', {'in': 1}), + ('bucketize', 'bucketize_data'), + ('bucketize_data', 'result') + ]) + + return tf_net, ref_net + + test_data_float32 = [ + dict(input_shape=[5], input_type=tf.float32, boundaries_size=1), + dict(input_shape=[5], input_type=tf.float32, boundaries_size=3), + dict(input_shape=[4, 8], input_type=tf.float32, boundaries_size=5), + dict(input_shape=[2, 4, 7], input_type=tf.float32, boundaries_size=10), + dict(input_shape=[2, 4, 7, 8], input_type=tf.float32, boundaries_size=12), + dict(input_shape=[2, 4, 7, 8, 10], input_type=tf.float32, boundaries_size=14)] + + @pytest.mark.parametrize("params", test_data_float32) + @pytest.mark.nightly + def test_bucketize_float32(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bucketize_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_int32 = [ + dict(input_shape=[5], input_type=tf.int32, boundaries_size=1), + dict(input_shape=[5], input_type=tf.int32, boundaries_size=3), + dict(input_shape=[4, 8], input_type=tf.int32, boundaries_size=5), + dict(input_shape=[2, 4, 7], input_type=tf.int32, boundaries_size=10), + dict(input_shape=[2, 4, 7, 8], input_type=tf.float32, boundaries_size=12), + dict(input_shape=[2, 4, 7, 8, 10], input_type=tf.float32, boundaries_size=14)] + + @pytest.mark.parametrize("params", test_data_int32) + @pytest.mark.nightly + def test_bucketize_int32(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_bucketize_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Ceil.py b/tests/layer_tests/tensorflow_tests/test_tf_Ceil.py new file mode 100644 index 00000000000..c516a598b88 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Ceil.py @@ -0,0 +1,78 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestCeil(CommonTFLayerTest): + def create_ceil_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Ceil => Input->Ceil + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.math.ceil(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'Ceiling': {'kind': 'op', 'type': 'Ceiling'}, + 'Ceiling_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'Ceiling'), + ('Ceiling', 'Ceiling_data'), + ('Ceiling_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[3, 2, 3, 7, 6])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_ceil_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_ceil_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[2, 5]), + dict(shape=[5, 3, 7, 4]), + dict(shape=[3, 2, 3, 7, 6])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_ceil(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_ceil_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Concat.py b/tests/layer_tests/tensorflow_tests/test_tf_Concat.py new file mode 100644 index 00000000000..cfa6ca0e958 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Concat.py @@ -0,0 +1,114 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestConcat(CommonTFLayerTest): + def create_concat_net(self, shape, axis, ir_version): + """ + Tensorflow net IR net + + Input->Concat => Input->Concat + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + + ax = axis + + input_shape_x = shape.copy() + # reshaping + if len(input_shape_x) >= 3: + input_shape_x.append(input_shape_x.pop(1)) + + # TODO: add concat with const inputs to check fusing (as in ONNX) + + x = tf.compat.v1.placeholder(tf.float32, input_shape_x, 'Input') + y = tf.compat.v1.placeholder(tf.float32, input_shape_x, 'Input') # Input_1 in graph_def + + concat = tf.concat([x, y], axis=ax, name='Operation') + concat_shape = concat.shape.as_list() + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + # Format axis to positive value + concat_ax = axis if axis >= 0 else axis + len(shape) + if len(shape) >= 3: + # Permute shape to (N,C,...) format and compute correct axis value + order = [0, len(concat_shape) - 1] + list(range(1, len(concat_shape) - 1)) + concat_shape = [concat_shape[i] for i in order] + concat_ax = order.index(concat_ax) + + ref_net = None + + return tf_net, ref_net + + # TODO: create tests for concat with 1 input and multiple inputs + + test_data_1D = [dict(shape=[1], axis=0), + dict(shape=[1], axis=-1)] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_concat_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [dict(shape=[1, 224], axis=0), + dict(shape=[1, 224], axis=-1)] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_concat_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [pytest.param(dict(shape=[1, 3, 224], axis=0), marks=pytest.mark.xfail(reason="*-19053")), + pytest.param(dict(shape=[1, 3, 224], axis=-1), marks=pytest.mark.xfail(reason="*-19053")), + pytest.param(dict(shape=[1, 3, 224], axis=2), marks=pytest.mark.xfail(reason="*-19053"))] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_concat_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [dict(shape=[1, 3, 100, 224], axis=0), + dict(shape=[1, 3, 100, 224], axis=-1), + dict(shape=[1, 3, 100, 224], axis=2)] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_concat_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [dict(shape=[1, 3, 50, 100, 224], axis=0), + dict(shape=[1, 3, 50, 100, 224], axis=-1), + dict(shape=[1, 3, 50, 100, 224], axis=2)] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_concat_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_concat_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ELU.py b/tests/layer_tests/tensorflow_tests/test_tf_ELU.py new file mode 100644 index 00000000000..7e183cf9c34 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_ELU.py @@ -0,0 +1,88 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestELU(CommonTFLayerTest): + def create_elu_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->ELU => Input->ELU + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + + shapes = shape.copy() + # reshaping + if len(shapes) >= 4: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.nn.elu(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'ELU': {'kind': 'op', 'type': 'Elu'}, + 'ELU_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'ELU'), + ('ELU', 'ELU_data'), + ('ELU_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_elu_precommit(self, params, ie_device, precision, ir_version, temp_dir): + if ie_device == 'GPU': + pytest.skip("5D tensors is not supported on GPU") + self._test(*self.create_elu_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[10, 12]), + dict(shape=[8, 10, 12]), + dict(shape=[6, 8, 10, 12]), + dict(shape=[4, 6, 8, 10, 12])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_elu(self, params, ie_device, precision, ir_version, temp_dir): + if ie_device == 'GPU': + pytest.skip("5D tensors is not supported on GPU") + self._test(*self.create_elu_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Eltwise.py b/tests/layer_tests/tensorflow_tests/test_tf_Eltwise.py new file mode 100644 index 00000000000..d6880f3e0a1 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Eltwise.py @@ -0,0 +1,79 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestEltwise(CommonTFLayerTest): + def create_eltwise_net(self, shape, operation, ir_version): + """ + Tensorflow net IR net + + Inputs->Eltwise => Inputs->Eltwise + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + + shapes = shape.copy() + # reshaping + if len(shapes) >= 4: + shapes.append(shapes.pop(1)) + + x = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + y = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') # Input_1 in graph_def + + if operation == 'sum': + tf.add(x, y, name='Operation') + elif operation == 'max': + tf.maximum(x, y, name='Operation') + elif operation == 'mul': + tf.multiply(x, y, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return tf_net, ref_net + + test_data = [] + for operation in ['sum', 'max', 'mul']: + test_data.extend([dict(shape=[1, 224], operation=operation), + dict(shape=[1, 224, 224], operation=operation), + dict(shape=[1, 3, 224, 224], operation=operation)]) + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_eltwise(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_eltwise_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [] + for operation in ['sum', 'max', 'mul']: + test_data_5D.extend([dict(shape=[1, 3, 224, 224, 224], operation=operation)]) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.precommit + def test_eltwise_5D_precommit(self, params, ie_device, precision, ir_version, temp_dir): + if ie_device == 'GPU': + pytest.skip("5D tensors is not supported on GPU") + self._test(*self.create_eltwise_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantize.py b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantize.py new file mode 100644 index 00000000000..d4d8fe91b84 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_FakeQuantize.py @@ -0,0 +1,125 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from mo.front.common.partial_infer.utils import int64_array +from unit_tests.utils.graph import build_graph, regular_op_with_shaped_data, connect, \ + shaped_data, connect_front, regular_op + + +class TestFakeQuantize(CommonTFLayerTest): + def _prepare_input(self, inputs_dict, kwargs): + assert len(inputs_dict) == 1 + assert np.array(list(inputs_dict.values())[0]) == np.array([11]) + assert 'nudged_il' in kwargs and kwargs['nudged_il'] is not None + assert 'nudged_ih' in kwargs and kwargs['nudged_ih'] is not None + assert 'expected_step' in kwargs and kwargs['expected_step'] is not None + + expected_nudged_input_min = kwargs['nudged_il'] + expected_nudged_input_max = kwargs['nudged_ih'] + expected_step = kwargs['expected_step'] + + return {list(inputs_dict.keys())[0]: np.array([ + expected_nudged_input_min - expected_step, + expected_nudged_input_min - 0.01, expected_nudged_input_min, + expected_nudged_input_min + 0.01, + expected_nudged_input_min + expected_step - 0.01, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step + 0.01, + expected_nudged_input_max - 0.01, expected_nudged_input_max, + expected_nudged_input_max + 0.01, + expected_nudged_input_max + expected_step + ])} + + def create_fake_quantize_net(self, il, ih, num_bits, narrow_range, nudged_il, nudged_ih, expected_step, ir_version): + # original tf model + import tensorflow as tf + tf.compat.v1.reset_default_graph() + with tf.compat.v1.Session() as sess: + data = tf.compat.v1.placeholder(tf.float32, [11], 'parameter') + input_min = tf.constant(il, name='input_min') + input_max = tf.constant(ih, name='input_max') + tf.quantization.fake_quant_with_min_max_vars(data, input_min, input_max, num_bits, narrow_range, 'fq') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # reference graph to compare with IR + ref_net = None + if check_ir_version(10, None, ir_version): + levels = 2 ** num_bits - int(narrow_range) + + # data (shape, value) -> const (shape, vale) -> data (shape, no value) + const_for_layer_tests = lambda name, value: { + **{name + '_dd': {'kind': 'data', 'value': value, 'shape': value.shape}}, + **{name: {'kind': 'op', 'type': 'Const'}}, + **shaped_data(name + '_d', int64_array(value.shape))} + + connect_const_for_layer_tests = lambda first_tensor_name, second_tensor_name: [ + *connect_front(first_tensor_name + '_dd', first_tensor_name), + *connect(first_tensor_name, second_tensor_name)] + + nodes = { + **regular_op_with_shaped_data('parameter', [11], {'type': 'Parameter'}), + **const_for_layer_tests('il', np.array([nudged_il], dtype=np.float32)), + **const_for_layer_tests('ih', np.array([nudged_ih], dtype=np.float32)), + **const_for_layer_tests('ol', np.array([nudged_il], dtype=np.float32)), + **const_for_layer_tests('oh', np.array([nudged_ih], dtype=np.float32)), + **regular_op_with_shaped_data('fq', [11], {'type': 'FakeQuantize', 'levels': levels}), + **regular_op('result', {'type': 'Result'}), + } + edges = [ + *connect('parameter', '0:fq'), + *connect_const_for_layer_tests('il', '1:fq'), + *connect_const_for_layer_tests('ih', '2:fq'), + *connect_const_for_layer_tests('ol', '3:fq'), + *connect_const_for_layer_tests('oh', '4:fq'), + *connect('fq', 'result'), + ] + ref_net = build_graph(nodes, edges) + + return tf_net, ref_net + + test_data = [ + # with8BitsNoScalingNoNudging + dict(il=0.0, ih=255.0, num_bits=8, narrow_range=False, nudged_il=0.0, nudged_ih=255.0, expected_step=1.0), + # with8BitsScalingAndNudgingDown + dict(il=0.5, ih=128.0, num_bits=8, narrow_range=False, nudged_il=0.0, nudged_ih=127.5, expected_step=0.5), + # with8BitsScalingAndNudgingUp + dict(il=-128.0, ih=-0.5, num_bits=8, narrow_range=False, nudged_il=-127.5, nudged_ih=0.0, expected_step=0.5), + # with8BitsScalingAndNudgingBetween + dict(il=-0.1, ih=127.4, num_bits=8, narrow_range=False, nudged_il=0.0, nudged_ih=127.5, expected_step=0.5), + # with8BitsNarrowRangeNoScalingNoNudging + dict(il=0.0, ih=254.0, num_bits=8, narrow_range=True, nudged_il=0.0, nudged_ih=254.0, expected_step=1.0), + # with8BitsNarrowRangeScalingAndNudgingDown + dict(il=0.1, ih=127.1, num_bits=8, narrow_range=True, nudged_il=0.0, nudged_ih=127.0, expected_step=0.5), + # with8BitsNarrowRangeScalingAndNudgingUp + dict(il=-127.1, ih=-0.1, num_bits=8, narrow_range=True, nudged_il=-127.0, nudged_ih=0.0, expected_step=0.5), + # with8BitsNarrowRangeScalingAndNudgingBetween + dict(il=-0.1, ih=126.9, num_bits=8, narrow_range=True, nudged_il=0.0, nudged_ih=127.0, expected_step=0.5), + # with7BitsNoScalingNoNudging + dict(il=0.0, ih=127.0, num_bits=7, narrow_range=False, nudged_il=0.0, nudged_ih=127.0, expected_step=1.0), + # with7BitsScalingAndNudgingDown + dict(il=0.5, ih=64.0, num_bits=7, narrow_range=False, nudged_il=0.0, nudged_ih=63.5, expected_step=0.5), + # with7BitsScalingAndNudgingUp + dict(il=-64.0, ih=-0.5, num_bits=7, narrow_range=False, nudged_il=-63.5, nudged_ih=0.0, expected_step=0.5), + # with7BitsScalingAndNudgingBetween + dict(il=-0.1, ih=63.4, num_bits=7, narrow_range=False, nudged_il=0.0, nudged_ih=63.5, expected_step=0.5), + # with7BitsNarrowRangeNoScalingNoNudging + dict(il=0.0, ih=126.0, num_bits=7, narrow_range=True, nudged_il=0.0, nudged_ih=126.0, expected_step=1.0), + # with7BitsNarrowRangeScalingAndNudgingDown + dict(il=0.1, ih=63.1, num_bits=7, narrow_range=True, nudged_il=0.0, nudged_ih=63.0, expected_step=0.5), + # with7BitsNarrowRangeScalingAndNudgingUp + dict(il=-63.1, ih=-0.1, num_bits=7, narrow_range=True, nudged_il=-63.0, nudged_ih=0.0, expected_step=0.5), + # with7BitsNarrowRangeScalingAndNudgingBetween + dict(il=-0.1, ih=62.9, num_bits=7, narrow_range=True, nudged_il=0.0, nudged_ih=63.0, expected_step=0.5)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_fake_quantize(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_fake_quantize_net(**params, ir_version=ir_version), ie_device, precision, ir_version, + kwargs_to_prepare_input=params, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Floor.py b/tests/layer_tests/tensorflow_tests/test_tf_Floor.py new file mode 100644 index 00000000000..9c47f1f5910 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Floor.py @@ -0,0 +1,78 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestFloor(CommonTFLayerTest): + def create_floor_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Floor => Input->Floor + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.floor(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'Floor': {'kind': 'op', 'type': 'Floor'}, + 'Floor_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'Floor'), + ('Floor', 'Floor_data'), + ('Floor_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[3, 2, 3, 7, 6])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_floor_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_floor_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[2, 5]), + dict(shape=[5, 3, 7, 4]), + dict(shape=[3, 2, 3, 7, 6])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_floor(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_floor_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Identity.py b/tests/layer_tests/tensorflow_tests/test_tf_Identity.py new file mode 100644 index 00000000000..6e963e85c4e --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Identity.py @@ -0,0 +1,85 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestIdentity(CommonTFLayerTest): + def create_identity_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Identity->ReLU => Input->ReLU + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x_shape = shape.copy() + # reshaping + if len(x_shape) >= 3: + x_shape.append(x_shape.pop(1)) + + x = tf.compat.v1.placeholder(tf.float32, x_shape, 'Input') + id = tf.identity(x, name="Operation") + tf.nn.relu(id, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'inputX': {'kind': 'op', 'type': 'Parameter'}, + 'inputX_data': {'shape': shape, 'kind': 'data'}, + 'ReLU': {'kind': 'op', 'type': 'ReLU'}, + 'ReLU_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + ref_net = build_graph(nodes_attributes, + [('inputX', 'inputX_data'), + ('inputX_data', 'ReLU'), + ('ReLU', 'ReLU_data'), + ('ReLU_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_identity_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_identity_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[1, 224]), + pytest.param(dict(shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + dict(shape=[1, 3, 100, 224]), + dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_identity(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_identity_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Log.py b/tests/layer_tests/tensorflow_tests/test_tf_Log.py new file mode 100644 index 00000000000..f9790713951 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Log.py @@ -0,0 +1,79 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestLog(CommonTFLayerTest): + def create_log_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Log => Input->Log + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.math.log(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'log': {'kind': 'op', 'type': 'Log'}, + 'log_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'log'), + ('log', 'log_data'), + ('log_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [ + pytest.param(dict(shape=[3, 2, 3, 7, 6]), marks=pytest.mark.skip(reason="Skipped until fixed"))] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_log_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_log_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[2, 5]), + dict(shape=[5, 3, 7, 4]), + dict(shape=[3, 2, 3, 7, 6])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_log(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_log_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py new file mode 100644 index 00000000000..8cb3087cc3e --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_LogSoftmax.py @@ -0,0 +1,138 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from distutils.version import LooseVersion + +import numpy as np +import pytest +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from mo.front.common.partial_infer.utils import int64_array +from unit_tests.utils.graph import build_graph + + +class TestLogSoftmax(CommonTFLayerTest): + def create_log_softmax_net(self, shape, reduction_axis, ir_version): + """ + Tensorflow net IR net + + Input->LogSoftmax => Input->Softmax->Log + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + if LooseVersion(tf.__version__) < LooseVersion('2.0.0'): + tf.nn.log_softmax(input, name='Operation', axis=reduction_axis) + else: + tf.nn.log_softmax(input, axis=reduction_axis, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + ref_net = None + + reduce_sum_shape = np.copy(shape) + rank = len(shape) + if rank in {4, 5}: + reduction_axis = reduction_axis if reduction_axis >= 0 else rank + reduction_axis + if rank == 4: + reduction_axis = {0: 0, 1: 2, 2: 3, 3: 1}[reduction_axis] + else: + reduction_axis = {0: 0, 1: 2, 2: 3, 3: 4, 4: 1}[reduction_axis] + + reduce_sum_shape[reduction_axis] = 1 + + converted_shape = shape if rank != 1 else shape[0] + if check_ir_version(10, None, ir_version): + ref_nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter', 'shape': converted_shape}, + 'input_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'reduce_max_axis_val': {'shape': int64_array([reduction_axis]).shape, + 'kind': 'data', + 'value': int64_array([reduction_axis])}, + 'reduce_max_axis': {'type': 'Const', 'kind': 'op', 'shape': 1}, + 'reduce_max_axis_data': {'shape': int64_array([1]), 'kind': 'data', 'value': None}, + 'reduce_max': {'type': 'ReduceMax', 'kind': 'op', 'keep_dims': True}, + 'reduce_max_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'sub_first': {'type': 'Subtract', 'kind': 'op'}, + 'sub_first_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'reduce_sum_axis_val': {'shape': int64_array([reduction_axis]).shape, + 'kind': 'data', + 'value': int64_array([reduction_axis])}, + 'reduce_sum_axis': {'type': 'Const', 'kind': 'op', 'shape': 1}, + 'reduce_sum_axis_data': {'shape': int64_array([1]), 'kind': 'data', 'value': None}, + 'reduce_sum': {'type': 'ReduceSum', 'kind': 'op', 'keep_dims': True}, + 'reduce_sum_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'exp': {'type': 'Exp', 'kind': 'op'}, + 'exp_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'log': {'type': 'Log', 'kind': 'op'}, + 'log_data': {'shape': reduce_sum_shape, 'kind': 'data', 'value': None}, + 'sub_second': {'type': 'Subtract', 'kind': 'op'}, + 'sub_second_data': {'shape': shape, 'kind': 'data', 'value': None}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_edges = [ + ('input', 'input_data'), + ('reduce_max_axis_val', 'reduce_max_axis'), + ('reduce_max_axis', 'reduce_max_axis_data'), + ('reduce_max_axis_data', 'reduce_max', {'in': 1}), + ('reduce_max', 'reduce_max_data'), + ('input_data', 'reduce_max', {'out': 0, 'in': 0}), + ('input_data', 'sub_first', {'out': 0, 'in': 0}), + ('reduce_max_data', 'sub_first', {'in': 1}), + ('sub_first', 'sub_first_data'), + ('reduce_sum_axis_val', 'reduce_sum_axis'), + ('reduce_sum_axis', 'reduce_sum_axis_data'), + ('reduce_sum_axis_data', 'reduce_sum', {'in': 1}), + ('reduce_sum', 'reduce_sum_data'), + ('sub_first_data', 'exp'), + ('exp', 'exp_data'), + ('exp_data', 'reduce_sum', {'in': 0}), + ('reduce_sum_data', 'log'), + ('log', 'log_data'), + ('log_data', 'sub_second', {'in': 1}), + ('sub_second', 'sub_second_data'), + ('sub_first_data', 'sub_second', {'out': 0, 'in': 0}), + ('sub_second_data', 'result'), + ] + + ref_net = build_graph(ref_nodes_attributes, ref_edges) + + return tf_net, ref_net + + test_data_precommit = [ + pytest.param(dict(shape=[3, 2, 3, 7, 6], reduction_axis=-1), + marks=pytest.mark.skip(reason="Skipped until fixed")) + ] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_log_softmax_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_log_softmax_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1], reduction_axis=-1), + dict(shape=[2, 5], reduction_axis=-1), + dict(shape=[5, 3, 7, 4], reduction_axis=-1), + dict(shape=[3, 2, 3, 7, 6], reduction_axis=-1)] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_log_softmax(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_log_softmax_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Mul.py b/tests/layer_tests/tensorflow_tests/test_tf_Mul.py new file mode 100644 index 00000000000..a877e639d65 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Mul.py @@ -0,0 +1,279 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestMul(CommonTFLayerTest): + def create_mul_placeholder_const_net(self, x_shape, y_shape, ir_version): + """ + Tensorflow net IR net + + Placeholder->Mul => Placeholder->Eltwise or Power or ScaleShift + / / + Const-------/ Const-------/ + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + tf_x_shape = x_shape.copy() + tf_y_shape = y_shape.copy() + # reshaping + if len(tf_x_shape) >= 3: + tf_x_shape.append(tf_x_shape.pop(1)) + if len(tf_y_shape) >= 3: + tf_y_shape.append(tf_y_shape.pop(1)) + + x = tf.compat.v1.placeholder(tf.float32, tf_x_shape, 'Input') + constant_value = np.random.randint(-255, 255, tf_y_shape).astype(np.float32) + if (constant_value == 1).all(): + # Avoid elimination of the layer from IR + constant_value = constant_value + 1 + y = tf.constant(constant_value) + + mul = tf.multiply(x, y, name="Operation") + mul_shape = mul.shape.as_list() + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + if len(mul_shape) >= 3: + # Permute mul_shape to (N,C,...) format + order = [0, len(mul_shape) - 1] + list(range(1, len(mul_shape) - 1)) + mul_shape = [mul_shape[i] for i in order] + + y_shape_to_compare = tf_y_shape.copy() + while len(y_shape_to_compare) < len(x_shape): + # Expand shape of constant with 1 + y_shape_to_compare = [1] + y_shape_to_compare + constant_value = np.expand_dims(constant_value, axis=0) + + if len(y_shape_to_compare) >= 3: + # Permute constant_value to (N,C,...) format for correct further reshape + order = [0, len(y_shape_to_compare) - 1] + list(range(1, len(y_shape_to_compare) - 1)) + y_shape_to_compare = [y_shape_to_compare[i] for i in order] + constant_value = np.transpose(constant_value, order) + + ref_net = None + + return tf_net, ref_net + + # TODO: implement tests for 2 Consts + Mul + + test_data_1D = [ + # Power + dict(x_shape=[1], y_shape=[1]), + # Eltwise + pytest.param(dict(x_shape=[3], y_shape=[3]), marks=pytest.mark.xfail(reason="*-19180")) + ] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_mul_placeholder_const_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [ + # Power + dict(x_shape=[1, 1], y_shape=[1, 1]), + # ScaleShift + dict(x_shape=[1, 3], y_shape=[1, 3]), + # Eltwise + pytest.param(dict(x_shape=[3, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[2, 3], y_shape=[2, 3]) + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_mul_placeholder_const_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + # Power + dict(x_shape=[1, 1, 1], y_shape=[1, 1, 1]), + # ScaleShift + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[1, 3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 3], y_shape=[1, 1, 3]), + marks=[pytest.mark.xfail(reason="*-19053"), pytest.mark.xfail(reason="*-18830")]), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 224], y_shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")) + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_mul_placeholder_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + # Power + dict(x_shape=[1, 1, 1, 1], y_shape=[1, 1, 1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1], y_shape=[1, 3, 1, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1, 3], y_shape=[1, 1, 1, 3]), marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[1, 3, 222, 224], y_shape=[1, 3, 222, 224]) + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_mul_placeholder_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + # Power + dict(x_shape=[1, 1, 1, 1, 1], y_shape=[1, 1, 1, 1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[1, 3, 1, 1, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1, 1, 3], y_shape=[1, 1, 1, 1, 3]), + marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[1, 3, 50, 100, 224], y_shape=[1, 3, 50, 100, 224]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_mul_placeholder_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + ############################################################################################### + # # + # Broadcast cases # + # # + ############################################################################################### + + test_data_broadcast_1D = [ # Power + dict(x_shape=[3], y_shape=[1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_1D) + @pytest.mark.nightly + def test_mul_placeholder_const_broadcast_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_broadcast_2D = [ + # Power + dict(x_shape=[1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3], y_shape=[1]), + # ScaleShift + dict(x_shape=[1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[3, 1], y_shape=[3]), + # Eltwise + pytest.param(dict(x_shape=[3, 1], y_shape=[1, 3, 1, 1]), marks=pytest.mark.xfail(reason="*-19051")) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_2D) + @pytest.mark.nightly + def test_mul_placeholder_const_broadcast_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_broadcast_3D = [ + # Power + dict(x_shape=[1, 1, 1], y_shape=[1]), + # Power + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[1]), marks=pytest.mark.xfail(reason="*-19053")), + # ScaleShift + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[3]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[3, 1, 224], y_shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[2, 3, 1], y_shape=[1, 3, 2]), marks=pytest.mark.xfail(reason="*-19053")), + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_broadcast_3D) + @pytest.mark.nightly + def test_mul_placeholder_const_broadcast_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_broadcast_4D = [ + # Power + dict(x_shape=[1, 1, 1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3, 1, 1], y_shape=[1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1], y_shape=[3]), + # ScaleShift + dict(x_shape=[1, 3, 100, 224], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 1, 1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 1, 1], y_shape=[3, 1]), + # Eltwise + dict(x_shape=[1, 3, 1, 2], y_shape=[3, 1, 2]), + # Eltwise + dict(x_shape=[1, 3, 1, 2], y_shape=[1, 3, 2]), + # Eltwise + dict(x_shape=[1, 3, 100, 224], y_shape=[1, 1, 1, 224]), + # Eltwise + dict(x_shape=[2, 3, 1, 2], y_shape=[1, 3, 2, 1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_mul_placeholder_const_broadcast_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_broadcast_5D = [ + # Power + dict(x_shape=[1, 1, 1, 1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 1, 1, 1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[3, 1]), + # Eltwise + dict(x_shape=[1, 3, 1, 1, 2], y_shape=[1, 3, 2]), + # Eltwise + dict(x_shape=[1, 3, 5, 1, 2], y_shape=[5, 3, 2, 1]), + # Eltwise + dict(x_shape=[1, 3, 50, 100, 224], y_shape=[1, 1, 1, 1, 224]), + # Eltwise + dict(x_shape=[2, 3, 1, 2, 1], y_shape=[1, 3, 2, 1, 1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_5D) + @pytest.mark.nightly + def test_mul_placeholder_const_broadcast_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_mul_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py new file mode 100644 index 00000000000..1dc4b914172 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_NormalizeL2.py @@ -0,0 +1,207 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest +from mo.front.common.partial_infer.utils import int64_array +from unit_tests.utils.graph import build_graph +from tensorflow_tests.permutation_utils import permute_nchw_to_nhwc, permute_nhwc_to_nchw + + +class TestNormalizeL2(CommonTFLayerTest): + @staticmethod + def build_tf_graph(shape, axes): + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + # Permute NCHW -> NHWC for TF network creation + net_shape = permute_nchw_to_nhwc(shape) + + data = tf.compat.v1.placeholder(tf.float32, shape=net_shape, name='data') + + result = tf.math.l2_normalize(data, + axes, + name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + return tf_net + + @staticmethod + def create_normalize_l2_net_fusable(shape, axes, output_axes, ir_version): + tf_net = TestNormalizeL2.build_tf_graph(shape, axes) + + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'axes_input_data': {'shape': int64_array([len(axes)]), 'kind': 'data', 'value': int64_array(output_axes)}, + 'axes': {'kind': 'op', 'type': 'Const'}, + 'axes_data': {'shape': int64_array([len(axes)]), 'kind': 'data'}, + 'normalize_l2': {'kind': 'op', 'type': 'NormalizeL2'}, + 'normalize_l2_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'normalize_l2', {'out': 0, 'in': 0}), + ('axes_input_data', 'axes'), + ('axes', 'axes_data'), + ('axes_data', 'normalize_l2', {'in': 1, 'out': 0}), + ('normalize_l2', 'normalize_l2_data'), + ('normalize_l2_data', 'result'), + ]) + + return tf_net, ref_net + + @staticmethod + def create_normalize_l2_net_non_fusable(shape, axes, output_axes, ir_version): + tf_net = TestNormalizeL2.build_tf_graph(shape, axes) + + reduced_shape = permute_nchw_to_nhwc(shape).copy() + for axis in axes: + reduced_shape[axis] = 1 + reduced_shape = permute_nhwc_to_nchw(reduced_shape) + + eltwise_shapes = int64_array(np.ones(len(shape))) + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + + 'power_const_input_data': {'shape': int64_array([1]), 'kind': 'data', 'value': np.array([2.0])}, + 'power_const': {'kind': 'op', 'type': 'Const'}, + 'power_const_data': {'shape': eltwise_shapes, 'kind': 'data'}, + 'power': {'kind': 'op', 'type': 'Power'}, + 'power_data': {'shape': shape, 'kind': 'data'}, + + 'reduce': {'kind': 'op', 'type': 'ReduceSum', 'keep_dims': True}, + 'reduce_data': {'shape': reduced_shape, 'kind': 'data'}, + 'reduce_axes_input_data': {'shape': int64_array([len(axes)]), 'kind': 'data', + 'value': int64_array(output_axes)}, + 'reduce_axes': {'kind': 'op', 'type': 'Const'}, + 'reduce_axes_data': {'shape': int64_array([len(axes)]), 'kind': 'data'}, + + 'maximum_const_input_data': {'shape': int64_array([1]), 'kind': 'data', 'value': np.array([1e-12])}, + 'maximum_const': {'kind': 'op', 'type': 'Const'}, + 'maximum_const_data': {'shape': eltwise_shapes, 'kind': 'data'}, + 'maximum': {'kind': 'op', 'type': 'Maximum'}, + 'maximum_data': {'shape': reduced_shape, 'kind': 'data'}, + + 'power2_const_input_data': {'shape': int64_array([1]), 'kind': 'data', 'value': np.array([-0.5])}, + 'power2_const': {'kind': 'op', 'type': 'Const'}, + 'power2_const_data': {'shape': eltwise_shapes, 'kind': 'data'}, + 'power2': {'kind': 'op', 'type': 'Power'}, + 'power2_data': {'shape': reduced_shape, 'kind': 'data'}, + + 'multiply': {'kind': 'op', 'type': 'Multiply'}, + 'multiply_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'}, + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + + ('input_data', 'power', {'out': 0, 'in': 0}), + ('power_const_input_data', 'power_const'), + ('power_const', 'power_const_data'), + ('power_const_data', 'power', {'out': 0, 'in': 1}), + ('power', 'power_data'), + + ('power_data', 'reduce', {'out': 0, 'in': 0}), + ('reduce_axes_input_data', 'reduce_axes'), + ('reduce_axes', 'reduce_axes_data'), + ('reduce_axes_data', 'reduce', {'out': 0, 'in': 1}), + ('reduce', 'reduce_data'), + + ('reduce_data', 'maximum', {'out': 0, 'in': 0}), + ('maximum_const_input_data', 'maximum_const'), + ('maximum_const', 'maximum_const_data'), + ('maximum_const_data', 'maximum', {'out': 0, 'in': 1}), + ('maximum', 'maximum_data'), + + ('maximum_data', 'power2', {'out': 0, 'in': 0}), + ('power2_const_input_data', 'power2_const'), + ('power2_const', 'power2_const_data'), + ('power2_const_data', 'power2', {'out': 0, 'in': 1}), + ('power2', 'power2_data'), + + ('input_data', 'multiply', {'out': 0, 'in': 0}), + ('power2_data', 'multiply', {'out': 0, 'in': 1}), + ('multiply', 'multiply_data'), + ('multiply_data', 'result'), + ]) + + return tf_net, ref_net + + test_data_fusable_precommit = [ + pytest.param(dict(shape=[2, 3, 5], axes=[1, -1], output_axes=[1, 2]), + marks=pytest.mark.skip(reason="Skipped until fixed")), + pytest.param(dict(shape=[2, 3, 5, 7], axes=[1, 2, 3], output_axes=[2, 3, 1]), + marks=pytest.mark.skip(reason="Skipped until fixed")) + ] + + @pytest.mark.parametrize("params", test_data_fusable_precommit) + @pytest.mark.precommit + def test_NormalizeL2_fusable_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_normalize_l2_net_fusable(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_non_fusable_precommit = [ + pytest.param(dict(shape=[2, 3, 5], axes=[0, 1, 2], output_axes=[0, 1, 2]), + marks=pytest.mark.skip(reason="Skipped until fixed")), + pytest.param(dict(shape=[2, 3, 5, 7, 9], axes=[-1], output_axes=[1]), + marks=pytest.mark.skip(reason="Skipped until fixed")), + pytest.param(dict(shape=[2, 3, 5, 7, 9], axes=[1, 2, 3, 4], output_axes=[2, 3, 4, 1]), + marks=pytest.mark.skip(reason="Skipped until fixed")) + ] + + @pytest.mark.parametrize("params", test_data_non_fusable_precommit) + @pytest.mark.precommit + def test_NormalizeL2_non_fusable_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_normalize_l2_net_non_fusable(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_fusable = [ + dict(shape=[5, 6], axes=[1], output_axes=[1]), + dict(shape=[2, 3, 5], axes=[1], output_axes=[1]), + dict(shape=[2, 3, 5], axes=[-2], output_axes=[1]), + dict(shape=[2, 3, 5], axes=[1, -1], output_axes=[1, 2]), + dict(shape=[2, 3, 5, 7], axes=[-1], output_axes=[1]), + dict(shape=[2, 3, 5, 7], axes=[1, 2, 3], output_axes=[2, 3, 1]), + ] + + @pytest.mark.parametrize("params", test_data_fusable) + @pytest.mark.nightly + def test_NormalizeL2_fusable(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_normalize_l2_net_fusable(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_non_fusable = [ + dict(shape=[5], axes=[0], output_axes=[0]), + dict(shape=[5, 6], axes=[0], output_axes=[0]), + dict(shape=[5, 6], axes=[0, 1], output_axes=[0, 1]), + dict(shape=[2, 3, 5], axes=[0], output_axes=[0]), + dict(shape=[2, 3, 5], axes=[2], output_axes=[2]), + dict(shape=[2, 3, 5], axes=[0, 1, 2], output_axes=[0, 1, 2]), + dict(shape=[2, 3, 5, 7], axes=[0], output_axes=[0]), + dict(shape=[2, 3, 5, 7], axes=[1], output_axes=[2]), + dict(shape=[2, 3, 5, 7], axes=[2], output_axes=[3]), + dict(shape=[2, 3, 5, 7], axes=[1, 2], output_axes=[2, 3]), + dict(shape=[2, 3, 5, 7], axes=[1, 3], output_axes=[2, 1]), + dict(shape=[2, 3, 5, 7], axes=[0, 1, 2], output_axes=[0, 2, 3]), + dict(shape=[2, 3, 5, 7, 9], axes=[-1], output_axes=[1]), + dict(shape=[2, 3, 5, 7, 9], axes=[1, 2, 3, 4], output_axes=[2, 3, 4, 1]), + ] + + @pytest.mark.parametrize("params", test_data_non_fusable) + @pytest.mark.nightly + def test_NormalizeL2_non_fusable(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_normalize_l2_net_non_fusable(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_OneHot.py b/tests/layer_tests/tensorflow_tests/test_tf_OneHot.py new file mode 100644 index 00000000000..5e2d1d9131b --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_OneHot.py @@ -0,0 +1,159 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from common.tf_layer_test_class import CommonTFLayerTest +from tensorflow_tests.permutation_utils import permute_nchw_to_nhwc + + +class TestOneHot(CommonTFLayerTest): + @staticmethod + def create_one_hot_net(shape, depth, on_value, off_value, axis, ir_version): + """ + Tensorflow net + + Input -> OneHot + + IR net (can contain Permutes for input/output of OneHot, depending on shapes), all cases are: + + Input (< 3D) -> OneHot + + Input (3D) -> OneHot -> Permute (NHWC -> NCHW) + + Input (> 3D) -> Permute (NCHW -> NHWC) -> OneHot -> Permute (NHWC -> NCHW) + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + # Permute NCHW -> NHWC for TF network creation + net_shape = permute_nchw_to_nhwc(shape) + + indices = tf.compat.v1.placeholder(tf.int32, shape=net_shape, name='input_indices') + + result = tf.one_hot(indices, + depth, + on_value, + off_value, + axis, + name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # + + if on_value is None: + on_value = 1.0 + if off_value is None: + off_value = 0.0 + + axis = len(shape) if axis is None else axis + + ref_net = None + + return tf_net, ref_net + + test_data_1D = [ + # check for default on/off value, axis params + dict(shape=[5], depth=7, on_value=None, off_value=None, axis=None), + dict(shape=[5], depth=7, on_value=2.0, off_value=-1.0, axis=0)] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_OneHot_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_one_hot_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [ + dict(shape=[5, 6], depth=7, on_value=None, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6], depth=7, on_value=5.0, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6], depth=7, on_value=None, off_value=-1.0, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6], depth=7, on_value=None, off_value=None, axis=1), + # check for default on/off value, axis params + dict(shape=[5, 6], depth=7, on_value=2.0, off_value=-3.0, axis=0), + dict(shape=[5, 6], depth=7, on_value=2.0, off_value=-3.0, axis=1), + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_OneHot_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_one_hot_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + dict(shape=[5, 6, 7], depth=8, on_value=None, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6, 7], depth=8, on_value=6.0, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6, 7], depth=8, on_value=None, off_value=4.0, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6, 7], depth=8, on_value=None, off_value=None, axis=1), + # check for default on/off value, axis params + dict(shape=[5, 6, 7], depth=8, on_value=None, off_value=None, axis=0), + dict(shape=[5, 6, 7], depth=8, on_value=None, off_value=None, axis=1), + dict(shape=[5, 6, 7], depth=8, on_value=None, off_value=None, axis=2), + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_OneHot_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_one_hot_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=2), + # check for default on/off value, axis params + dict(shape=[5, 6, 7, 8], depth=9, on_value=5.0, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=6.0, axis=None), + # check for default on/off value, axis params + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=0), + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=1), + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=2), + dict(shape=[5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=3), + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_OneHot_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_one_hot_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=2.0, off_value=None, axis=None), + # check for default on/off value, axis params + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=4.0, axis=None), + # check for default on/off value, axis params + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=1), + # check for default on/off value, axis params + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=0), + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=1), + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=2), + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=3), + dict(shape=[4, 5, 6, 7, 8], depth=9, on_value=None, off_value=None, axis=4), + ] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_OneHot_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_one_hot_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py new file mode 100644 index 00000000000..c0dd7f96aae --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Pooling.py @@ -0,0 +1,230 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestPooling(CommonTFLayerTest): + def create_pooling_net(self, kernel_size, strides, pads, in_shape, out_shape, method, ir_version): + """ + Tensorflow net IR net + + Input->Pooling => Input->Pooling + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + + pads_begin, pads_end, padding = pads + + # 4D tensors + if len(in_shape) == 4: + input_shape = [in_shape[0], in_shape[2], in_shape[3], in_shape[1]] + input = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input') + + stride = [1, strides[0], strides[1], 1] + kernel = [1, kernel_size[0], kernel_size[1], 1] + + if method == 'max': + tf.nn.max_pool2d(input=input, ksize=kernel, strides=stride, padding=padding, name='Operation') + elif method == 'avg': + tf.nn.avg_pool2d(input=input, ksize=kernel, strides=stride, padding=padding, name='Operation') + + # 5D tensors + elif len(in_shape) == 5: + input_shape = [in_shape[0], in_shape[2], in_shape[3], in_shape[4], in_shape[1]] + input = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input') + + stride = [1, strides[0], strides[1], strides[2], 1] + kernel = [1, kernel_size[0], kernel_size[1], kernel_size[2], 1] + + if method == 'max': + tf.nn.max_pool3d(input, kernel, stride, padding, name='Operation') # , data_format='NCHW') + elif method == 'avg': + tf.nn.avg_pool3d(input, kernel, stride, padding, name='Operation') # , data_format='NCHW') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': in_shape, 'kind': 'data'}, + 'pooling': {'kernel': kernel_size, 'pads_begin': pads_begin, 'pads_end': pads_end, + 'strides': strides, 'kind': 'op', 'type': None}, + 'pooling_data': {'shape': out_shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + if method == 'avg': + nodes_attributes['pooling']['type'] = 'AvgPool' + elif method == 'max': + nodes_attributes['pooling']['type'] = 'MaxPool' + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'pooling'), + ('pooling', 'pooling_data'), + ('pooling_data', 'result') + ]) + + return tf_net, ref_net + + test_data_4D = [] + for method in ['max', 'avg']: + test_data_4D.extend([dict(kernel_size=[1, 1], strides=[1, 1], pads=[[0, 0], [0, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 224, 224], method=method), + dict(kernel_size=[2, 2], strides=[2, 2], pads=[[0, 0], [0, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], method=method), + dict(kernel_size=[2, 4], strides=[2, 4], pads=[[0, 0], [0, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 56], method=method), + dict(kernel_size=[4, 2], strides=[4, 2], pads=[[0, 0], [0, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 56, 112], method=method), + dict(kernel_size=[2, 3], strides=[2, 3], pads=[[0, 0], [0, 1], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 75], method=method), + dict(kernel_size=[3, 2], strides=[3, 2], pads=[[0, 0], [1, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 75, 112], method=method), + dict(kernel_size=[3, 3], strides=[2, 2], pads=[[0, 0], [1, 1], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], method=method), + dict(kernel_size=[3, 2], strides=[2, 2], pads=[[0, 0], [1, 0], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], method=method), + dict(kernel_size=[2, 3], strides=[2, 3], pads=[[0, 0], [0, 1], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 75], method=method), + dict(kernel_size=[111, 111], strides=[111, 111], pads=[[54, 54], [55, 55], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 3, 3], method=method), + dict(kernel_size=[111, 113], strides=[111, 113], pads=[[54, 1], [55, 1], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 3, 2], method=method), + dict(kernel_size=[113, 113], strides=[113, 113], pads=[[1, 1], [1, 1], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 2, 2], method=method), + dict(kernel_size=[113, 113], strides=[111, 111], pads=[[55, 55], [56, 56], 'SAME'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 3, 3], method=method)]) + + test_data_4D.extend([dict(kernel_size=[1, 1], strides=[1, 1], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 224, 224], method=method), + dict(kernel_size=[2, 2], strides=[2, 2], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 112], method=method), + dict(kernel_size=[2, 4], strides=[2, 4], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 56], method=method), + dict(kernel_size=[4, 2], strides=[4, 2], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 56, 112], method=method), + dict(kernel_size=[2, 3], strides=[2, 3], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 74], method=method), + dict(kernel_size=[3, 2], strides=[3, 2], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 74, 112], method=method), + dict(kernel_size=[3, 3], strides=[2, 2], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 111, 111], method=method), + dict(kernel_size=[3, 2], strides=[2, 2], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 111, 112], method=method), + dict(kernel_size=[2, 3], strides=[2, 3], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 112, 74], method=method), + dict(kernel_size=[111, 111], strides=[111, 111], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 2, 2], method=method), + dict(kernel_size=[111, 113], strides=[111, 113], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 2, 1], method=method), + dict(kernel_size=[113, 113], strides=[113, 113], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 1, 1], method=method), + dict(kernel_size=[113, 113], strides=[111, 111], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 2, 2], method=method), + dict(kernel_size=[224, 224], strides=[1, 1], pads=[[0, 0], [0, 0], 'VALID'], + in_shape=[1, 3, 224, 224], out_shape=[1, 3, 1, 1], method=method)]) + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_pool_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_pooling_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [] + for method in ['max', 'avg']: + test_data_5D.extend([dict(kernel_size=[1, 1, 1], strides=[1, 1, 1], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 224, 224, 224], method=method), + dict(kernel_size=[2, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), + dict(kernel_size=[2, 2, 4], strides=[2, 2, 4], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 56], method=method), + dict(kernel_size=[4, 2, 2], strides=[4, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 56, 112, 112], method=method), + dict(kernel_size=[2, 2, 3], strides=[2, 2, 3], pads=[[0, 0, 0], [0, 0, 1], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 75], method=method), + dict(kernel_size=[3, 2, 2], strides=[3, 2, 2], pads=[[0, 0, 0], [1, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 75, 112, 112], method=method), + dict(kernel_size=[3, 3, 3], strides=[2, 2, 2], pads=[[0, 0, 0], [1, 1, 1], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), + dict(kernel_size=[3, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [1, 0, 0], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), + dict(kernel_size=[2, 2, 3], strides=[2, 2, 3], pads=[[0, 0, 0], [0, 0, 1], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 75], method=method), + dict(kernel_size=[111, 111, 111], strides=[111, 111, 111], + pads=[[54, 54, 54], [55, 55, 55], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 3, 3, 3], method=method), + dict(kernel_size=[111, 111, 113], strides=[111, 111, 113], + pads=[[54, 54, 1], [55, 55, 1], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 3, 3, 2], method=method), + dict(kernel_size=[113, 113, 113], strides=[113, 113, 113], + pads=[[1, 1, 1], [1, 1, 1], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 2, 2, 2], method=method), + dict(kernel_size=[113, 113, 113], strides=[111, 111, 111], + pads=[[55, 55, 55], [56, 56, 56], 'SAME'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 3, 3, 3], method=method)]) + + test_data_5D.extend([dict(kernel_size=[1, 1, 1], strides=[1, 1, 1], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 224, 224, 224], method=method), + dict(kernel_size=[2, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 112], method=method), + dict(kernel_size=[2, 2, 4], strides=[2, 2, 4], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 56], method=method), + dict(kernel_size=[4, 2, 2], strides=[4, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 56, 112, 112], method=method), + dict(kernel_size=[2, 2, 3], strides=[2, 2, 3], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 74], method=method), + dict(kernel_size=[3, 2, 2], strides=[3, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 74, 112, 112], method=method), + dict(kernel_size=[3, 3, 3], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 111, 111, 111], method=method), + dict(kernel_size=[3, 2, 2], strides=[2, 2, 2], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 111, 112, 112], method=method), + dict(kernel_size=[2, 2, 3], strides=[2, 2, 3], pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 112, 112, 74], method=method), + dict(kernel_size=[111, 111, 111], strides=[111, 111, 111], + pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 2, 2, 2], method=method), + dict(kernel_size=[111, 111, 113], strides=[111, 111, 113], + pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 2, 2, 1], method=method), + dict(kernel_size=[113, 113, 113], strides=[113, 113, 113], + pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 1, 1, 1], method=method), + dict(kernel_size=[113, 113, 113], strides=[111, 111, 111], + pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 2, 2, 2], method=method), + dict(kernel_size=[224, 224, 224], strides=[1, 1, 1], + pads=[[0, 0, 0], [0, 0, 0], 'VALID'], + in_shape=[1, 3, 224, 224, 224], out_shape=[1, 3, 1, 1, 1], method=method)]) + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_pool_5D(self, params, ie_device, precision, ir_version, temp_dir): + if ie_device == 'GPU': + pytest.skip("5D tensors is not supported on GPU") + self._test(*self.create_pooling_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ReLU.py b/tests/layer_tests/tensorflow_tests/test_tf_ReLU.py new file mode 100644 index 00000000000..c178e6f8b55 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_ReLU.py @@ -0,0 +1,85 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestReLU(CommonTFLayerTest): + def create_relu_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->ReLU => Input->ReLU + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.nn.relu(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'ReLU': {'kind': 'op', 'type': 'ReLU'}, + 'ReLU_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'ReLU'), + ('ReLU', 'ReLU_data'), + ('ReLU_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_relu_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_relu_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[1, 224]), + pytest.param(dict(shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + dict(shape=[1, 3, 100, 224]), + dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_relu(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_relu_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py b/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py new file mode 100644 index 00000000000..fd846a82358 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_ReLU6.py @@ -0,0 +1,85 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestReLU6(CommonTFLayerTest): + def create_relu6_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->ReLU6 => Input->Clamp + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.nn.relu6(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'ReLU6': {'kind': 'op', 'type': 'Clamp', "max": 6, "min": 0}, + 'ReLU6_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'ReLU6'), + ('ReLU6', 'ReLU6_data'), + ('ReLU6_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_relu6_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_relu6_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[1, 224]), + pytest.param(dict(shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + dict(shape=[1, 3, 100, 224]), + dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_relu6(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_relu6_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py b/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py new file mode 100644 index 00000000000..e3afdda6972 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Resample_pattern_new.py @@ -0,0 +1,81 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestResamplePattern(CommonTFLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(1, 256, inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_resample_net(self, shape, factor): + """ + The sub-graph in TF that could be expressed as a single Resample operation. + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + tf_shape = shape.copy() + tf_shape = np.array(tf_shape)[[0, 2, 3, 1]] + + input = tf.compat.v1.placeholder(tf.float32, tf_shape, 'Input') + + transpose_1 = tf.transpose(a=input, perm=[1, 2, 3, 0]) + expand_dims = tf.expand_dims(transpose_1, 0) + tile = tf.tile(expand_dims, [factor * factor, 1, 1, 1, 1]) + bts = tf.batch_to_space(tile, [factor, factor], [[0, 0], [0, 0]]) + strided_slice = bts[0, ...] + tf.transpose(a=strided_slice, perm=[3, 0, 1, 2]) + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + new_shape = shape.copy() + new_shape[2] *= factor + new_shape[3] *= factor + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Input'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'resample': {'kind': 'op', 'type': 'caffe.ResampleParameter.NEAREST', "factor": factor, + "height": 0, "width": 0, "antialias": 0}, + 'resample_data': {'shape': new_shape, 'kind': 'data'}, + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'resample'), + ('resample', 'resample_data') + ]) + + return tf_net, ref_net + + test_data = [dict(shape=[1, 1, 100, 200], factor=2), + dict(shape=[1, 1, 200, 300], factor=3)] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + @pytest.mark.xfail(reason="*-22273") + def test_resample(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_resample_net(params['shape'], params['factor']), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Rsqrt.py b/tests/layer_tests/tensorflow_tests/test_tf_Rsqrt.py new file mode 100644 index 00000000000..953d73f6809 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Rsqrt.py @@ -0,0 +1,73 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestRsqrt(CommonTFLayerTest): + def _prepare_input(self, inputs_dict): + for input in inputs_dict.keys(): + inputs_dict[input] = np.random.randint(1, 256, inputs_dict[input]).astype(np.float32) + return inputs_dict + + def create_rsqrt_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Rsqrt => Input->Power + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) >= 3: + shapes.append(shapes.pop(1)) + input = tf.compat.v1.placeholder(tf.float32, shapes, 'Input') + + tf.math.rsqrt(input, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return tf_net, ref_net + + test_data_precommit = [dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_rsqrt_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_rsqrt_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[1, 224]), + pytest.param(dict(shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + dict(shape=[1, 3, 100, 224]), + dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_rsqrt(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_rsqrt_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Select.py b/tests/layer_tests/tensorflow_tests/test_tf_Select.py new file mode 100644 index 00000000000..ee24964589b --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Select.py @@ -0,0 +1,107 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest +from tensorflow_tests.permutation_utils import permute_nchw_to_nhwc + + +class TestSelect(CommonTFLayerTest): + def create_select_net(self, shape_condition, shape_input, ir_version): + """ + Tensorflow net IR net + + Condition --| Condition --| + v v + Input_1-> Select Input_1-> Select + ^ ^ + Input_2-----| Input_2-----| + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + # Permute shapes NCHW -> NHWC for TF network creation + shape_condition_net = permute_nchw_to_nhwc(shape_condition) + shape_input_net = permute_nchw_to_nhwc(shape_input) + + condition = tf.compat.v1.placeholder(tf.bool, shape_condition_net, 'Input_condition') + input_1 = tf.compat.v1.placeholder(tf.float32, shape_input_net, 'Input_1') + input_2 = tf.compat.v1.placeholder(tf.float32, shape_input_net, 'Input_2') + + tf.compat.v1.where(condition, input_1, input_2, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return tf_net, ref_net + + test_data_1D = [dict(shape_condition=[2], shape_input=[2])] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_select_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_select_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [ + dict(shape_condition=[2], shape_input=[2, 3]), + dict(shape_condition=[3, 5], shape_input=[3, 5]), + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_select_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_select_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + dict(shape_condition=[3], shape_input=[3, 4, 5]), + dict(shape_condition=[3, 4, 5], shape_input=[3, 4, 5]), + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_select_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_select_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + dict(shape_condition=[3], shape_input=[3, 4, 5, 6]), + dict(shape_condition=[3, 4, 5, 6], shape_input=[3, 4, 5, 6]), + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + @pytest.mark.precommit + def test_select_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_select_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + dict(shape_condition=[3], shape_input=[3, 4, 5, 6, 7]), + dict(shape_condition=[3, 4, 5, 6, 7], shape_input=[3, 4, 5, 6, 7]), + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_select_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_select_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Softplus.py b/tests/layer_tests/tensorflow_tests/test_tf_Softplus.py new file mode 100644 index 00000000000..a295e4660e9 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Softplus.py @@ -0,0 +1,90 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestSoftplus(CommonTFLayerTest): + def create_softplus_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Softplus => Input->Softplus + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.reset_default_graph() + + # Create the graph and model + with tf.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) > 3: + shapes.append(shapes.pop(1)) + input = tf.placeholder(tf.float32, shapes, 'Input') + + tf.math.softplus(input, name='Operation') + + tf.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'Softplus': {'kind': 'op', 'type': 'SoftPlus'}, + 'Softplus_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'Softplus'), + ('Softplus', 'Softplus_data'), + ('Softplus_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [ + pytest.param(dict(shape=[1, 3, 100, 224]), + marks=pytest.mark.skip(reason="Skipped until fixed")), + pytest.param(dict(shape=[1, 3, 50, 100, 224]), + marks=pytest.mark.skip(reason="Skipped until fixed")) + ] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_softplus_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_softplus_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[1, 224]), + dict(shape=[1, 3, 224]), + dict(shape=[1, 3, 100, 224]), + dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_softplus(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_softplus_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py new file mode 100644 index 00000000000..f86ab7dc2ac --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_SpaceToBatch.py @@ -0,0 +1,80 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestSpaceToBatch(CommonTFLayerTest): + def create_space_to_batch_net(self, in_shape, pads_value, block_shape_value, out_shape, ir_version): + """ + Tensorflow net IR net + + Input->SpaceToBatch => Input->SpaceToBatch + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, in_shape, 'Input') + pads = tf.constant(pads_value) + block_shape = tf.constant(block_shape_value) + tf.space_to_batch(x, block_shape, pads, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + return tf_net, ref_net + + test_data_4D = [ + dict(in_shape=[4, 1, 1, 3], block_shape_value=[1], pads_value=[[0, 0]], + out_shape=[4, 1, 1, 3]), + dict(in_shape=[1, 2, 2, 1], block_shape_value=[2, 2], pads_value=[[0, 0], [0, 0]], + out_shape=[4, 1, 1, 3]), + dict(in_shape=[1, 2, 2, 3], block_shape_value=[2, 2], pads_value=[[0, 0], [0, 0]], + out_shape=[4, 1, 1, 3]), + dict(in_shape=[1, 2, 9, 1], block_shape_value=[4, 3], pads_value=[[1, 1], [2, 4]], + out_shape=[12, 1, 1, 3]), + # todo: enable these tests after supporting the general case on CPU + # dict(in_shape=[1, 2, 1, 4], block_shape_value=[3, 2, 2], pads_value=[[1, 0], [0, 1], [1, 1]], + # out_shape=[12, 1, 1, 3]), + # dict(in_shape=[2, 3, 6, 5], block_shape_value=[2, 3, 3], pads_value=[[1, 0], [0, 0], [2, 2]], + # out_shape=[36, 2, 2, 3]) + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_space_to_batch_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_space_to_batch_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + dict(in_shape=[3, 3, 4, 5, 2], block_shape_value=[3, 4, 2], pads_value=[[1, 2], [0, 0], [3, 0]], + out_shape=[72, 2, 1, 4, 2]), + # todo: enable these tests after supporting the general case on CPU + # dict(in_shape=[3, 3, 4, 5, 2], block_shape_value=[3, 4, 2, 2], + # pads_value=[[1, 2], [0, 0], [3, 0], [0, 0]], out_shape=[144, 2, 1, 4, 1]), + ] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_space_to_batch_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_space_to_batch_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Squeeze.py b/tests/layer_tests/tensorflow_tests/test_tf_Squeeze.py new file mode 100644 index 00000000000..0ef13751b34 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Squeeze.py @@ -0,0 +1,124 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestSqueeze(CommonTFLayerTest): + def create_squeeze_net(self, shape, axis, ir_version): + """ + Tensorflow net IR net + + Input->Squeeze => Input->[Permute]->Reshape + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x_shape = shape.copy() + # reshaping + if len(x_shape) >= 3: + x_shape.append(x_shape.pop(1)) + + x = tf.compat.v1.placeholder(tf.float32, x_shape, 'Input') + squeeze = tf.squeeze(x, axis=axis, name="Operation") + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + unsigned_axis = [ax if ax > -1 else len(x_shape) + ax for ax in axis] + if not unsigned_axis: + unsigned_axis = [i for i, dim in enumerate(shape) if dim == 1] + + ref_net = None + + return tf_net, ref_net + + test_data_1D = [ + pytest.param(dict(shape=[1], axis=[]), marks=pytest.mark.xfail(reason="*-18807")), + pytest.param(dict(shape=[1], axis=[0]), marks=pytest.mark.xfail(reason="*-18859")), + pytest.param(dict(shape=[1], axis=[-1]), marks=pytest.mark.xfail(reason="*-18859")) + ] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_squeeze_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [ + pytest.param(dict(shape=[1, 1], axis=[]), marks=pytest.mark.xfail(reason="*-18807")), + dict(shape=[1, 1], axis=[0]), + dict(shape=[1, 1], axis=[-1]) + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_squeeze_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + pytest.param(dict(shape=[1, 1, 3], axis=[]), + marks=[pytest.mark.xfail(reason="*-18807"), pytest.mark.xfail(reason="*-19053")]), + pytest.param(dict(shape=[1, 1, 3], axis=[0]), marks=pytest.mark.xfail(reason="*-19053")), + pytest.param(dict(shape=[1, 1, 3], axis=[-1]), marks=pytest.mark.xfail(reason="*-19053")) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_squeeze_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + pytest.param(dict(shape=[1, 1, 50, 100], axis=[]), marks=pytest.mark.xfail(reason="*-18807")), + dict(shape=[1, 1, 50, 100], axis=[0]), + dict(shape=[1, 1, 50, 100], axis=[-1]), + dict(shape=[1, 100, 50, 1], axis=[0, 2]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_squeeze_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + pytest.param(dict(shape=[1, 1, 50, 100, 224], axis=[]), marks=pytest.mark.xfail(reason="*-18807")), + pytest.param(dict(shape=[1, 1, 50, 100, 224], axis=[0]), marks=pytest.mark.xfail(reason="*-18879")), + pytest.param(dict(shape=[1, 1, 50, 100, 224], axis=[-1]), marks=pytest.mark.xfail(reason="*-18879")), + dict(shape=[1, 224, 1, 100, 1], axis=[0, 3]), + dict(shape=[1, 224, 1, 100, 1], axis=[0, 1, 3]), + dict(shape=[1, 224, 1, 1, 100], axis=[0, 1, 2]), + dict(shape=[1, 224, 1, 1, 1], axis=[0, 1, 2, 3]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.special_xfail(args={'ie_device': 'GPU', 'precision': 'FP16', 'params': {'axis': [0, 3]}}, + reason="*-19394") + @pytest.mark.special_xfail(args={'ie_device': 'GPU', 'precision': 'FP16', 'params': {'axis': [0, 1, 3]}}, + reason="*-19394") + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_squeeze_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_squeeze_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Sub.py b/tests/layer_tests/tensorflow_tests/test_tf_Sub.py new file mode 100644 index 00000000000..00f63494255 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Sub.py @@ -0,0 +1,294 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest + +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestSub(CommonTFLayerTest): + def create_sub_placeholder_const_net(self, x_shape, y_shape, ir_version): + """ + Tensorflow net IR net + + Placeholder->Sub => Placeholder->Eltwise or Power or ScaleShift + / / + Const-------/ Const-------/ + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + tf_x_shape = x_shape.copy() + tf_y_shape = y_shape.copy() + # reshaping + if len(tf_x_shape) >= 3: + tf_x_shape.append(tf_x_shape.pop(1)) + if len(tf_y_shape) >= 3: + tf_y_shape.append(tf_y_shape.pop(1)) + + x = tf.compat.v1.placeholder(tf.float32, tf_x_shape, 'Input') + constant_value = np.random.randint(-256, 256, tf_y_shape).astype(np.float32) + if (constant_value == 0).all(): + # Avoid elimination of the layer from IR + constant_value = constant_value + 1 + y = tf.constant(constant_value) + + sub = tf.subtract(x, y, name="Operation") + sub_shape = sub.shape.as_list() + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + if len(sub_shape) >= 3: + # Permute sub_shape to (N,C,...) format + order = [0, len(sub_shape) - 1] + list(range(1, len(sub_shape) - 1)) + sub_shape = [sub_shape[i] for i in order] + + y_shape_to_compare = tf_y_shape.copy() + while len(y_shape_to_compare) < len(x_shape): + # Expand shape of constant with 1 + y_shape_to_compare = [1] + y_shape_to_compare + constant_value = np.expand_dims(constant_value, axis=0) + + if len(y_shape_to_compare) >= 3: + # Permute constant_value to (N,C,...) format for correct further reshape + order = [0, len(y_shape_to_compare) - 1] + list(range(1, len(y_shape_to_compare) - 1)) + y_shape_to_compare = [y_shape_to_compare[i] for i in order] + constant_value = np.transpose(constant_value, order) + + ref_net = None + + return tf_net, ref_net + + # TODO: implement tests for 2 Consts + Sub + + test_data_1D = [ + # Power + dict(x_shape=[1], y_shape=[1]), + # Eltwise + pytest.param(dict(x_shape=[3], y_shape=[3]), marks=pytest.mark.xfail(reason="*-19180")) + ] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_sub_placeholder_const_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_2D = [ + # Power + dict(x_shape=[1, 1], y_shape=[1, 1]), + # ScaleShift + dict(x_shape=[1, 3], y_shape=[1, 3]), + # Eltwise + pytest.param(dict(x_shape=[3, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[2, 3], y_shape=[2, 3]) + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_sub_placeholder_const_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_3D = [ + # Power + dict(x_shape=[1, 1, 1], y_shape=[1, 1, 1]), + # ScaleShift + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[1, 3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 3], y_shape=[1, 1, 3]), + marks=[pytest.mark.xfail(reason="*-19053"), pytest.mark.xfail(reason="*-18830")]), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 224], y_shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_sub_placeholder_const_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_4D = [ + # Power + dict(x_shape=[1, 1, 1, 1], y_shape=[1, 1, 1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1], y_shape=[1, 3, 1, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1, 3], y_shape=[1, 1, 1, 3]), marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[1, 3, 222, 224], y_shape=[1, 3, 222, 224]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_sub_placeholder_const_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_5D = [ + # Power + dict(x_shape=[1, 1, 1, 1, 1], y_shape=[1, 1, 1, 1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[1, 3, 1, 1, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1, 1, 3], y_shape=[1, 1, 1, 1, 3]), + marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[1, 3, 50, 100, 224], y_shape=[1, 3, 50, 100, 224]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_sub_placeholder_const_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + ############################################################################################### + # # + # Broadcast cases # + # # + ############################################################################################### + + test_data_broadcast_1D = [ # Power + dict(x_shape=[3], y_shape=[1]) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_1D) + @pytest.mark.nightly + def test_sub_placeholder_const_broadcast_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_broadcast_2D = [ + # Power + dict(x_shape=[1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3], y_shape=[1]), + # ScaleShift + dict(x_shape=[1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[3, 1], y_shape=[3]), + # Eltwise + pytest.param(dict(x_shape=[3, 1], y_shape=[1, 3, 1, 1]), marks=pytest.mark.xfail(reason="*-19051")) + ] + + @pytest.mark.parametrize("params", test_data_broadcast_2D) + @pytest.mark.nightly + def test_sub_placeholder_const_broadcast_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_broadcast_3D = [ + # Power + dict(x_shape=[1, 1, 1], y_shape=[1]), + # Power + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[1]), marks=pytest.mark.xfail(reason="*-19053")), + # ScaleShift + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[3]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[1, 1, 1], y_shape=[3, 1]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[3, 1, 224], y_shape=[1, 3, 224]), marks=pytest.mark.xfail(reason="*-19053")), + # Eltwise + pytest.param(dict(x_shape=[2, 3, 1], y_shape=[1, 3, 2]), marks=pytest.mark.xfail(reason="*-19053")), + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_broadcast_3D) + @pytest.mark.nightly + def test_sub_placeholder_const_broadcast_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_broadcast_4D = [ + # Power + dict(x_shape=[1, 1, 1, 1], y_shape=[1]), + # Power + dict(x_shape=[1, 3, 1, 1], y_shape=[1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 100, 224], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 1, 1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 1, 1], y_shape=[3, 1]), + # Eltwise + dict(x_shape=[1, 3, 1, 2], y_shape=[3, 1, 2]), + # Eltwise + dict(x_shape=[1, 3, 1, 2], y_shape=[1, 3, 2]), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 100, 224], y_shape=[1, 1, 1, 224]), + marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[2, 3, 1, 2], y_shape=[1, 3, 2, 1]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_broadcast_4D) + @pytest.mark.nightly + def test_sub_placeholder_const_broadcast_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) + + test_data_broadcast_5D = [ + # Power + dict(x_shape=[1, 1, 1, 1, 1], y_shape=[1, 1, 1, 1, 1]), + # Power + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[1, 1]), + # ScaleShift + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 1, 1, 1, 3], y_shape=[3]), + # Eltwise + dict(x_shape=[1, 3, 1, 1, 1], y_shape=[3, 1]), + # Eltwise + dict(x_shape=[1, 3, 1, 1, 2], y_shape=[1, 3, 2]), + # Eltwise + dict(x_shape=[1, 3, 5, 1, 2], y_shape=[5, 3, 2, 1]), + # Eltwise + pytest.param(dict(x_shape=[1, 3, 50, 100, 224], y_shape=[1, 1, 1, 1, 224]), + marks=pytest.mark.xfail(reason="*-19180")), + # Eltwise + dict(x_shape=[2, 3, 1, 2, 1], y_shape=[1, 3, 2, 1, 1]) + ] + + # TODO mark as precommit (after successfully passing in nightly) + @pytest.mark.parametrize("params", test_data_broadcast_5D) + @pytest.mark.nightly + def test_sub_placeholder_const_broadcast_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_sub_placeholder_const_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, + temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_Swish.py b/tests/layer_tests/tensorflow_tests/test_tf_Swish.py new file mode 100644 index 00000000000..158a43353ac --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_Swish.py @@ -0,0 +1,88 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from unit_tests.utils.graph import build_graph + + +class TestSwish(CommonTFLayerTest): + def create_swish_net(self, shape, ir_version): + """ + Tensorflow net IR net + + Input->Swish => Input->Swish + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.reset_default_graph() + + # Create the graph and model + with tf.Session() as sess: + shapes = shape.copy() + # reshaping + if len(shapes) > 3: + shapes.append(shapes.pop(1)) + input = tf.placeholder(tf.float32, shapes, 'Input') + + tf.nn.swish(input) + + tf.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # Please, specify 'type': 'Input' for input node + # Moreover, do not forget to validate ALL layer attributes!!! + # + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'Swish': {'kind': 'op', 'type': 'Swish'}, + 'Swish_data': {'shape': shape, 'kind': 'data'}, + 'result': {'kind': 'op', 'type': 'Result'} + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'Swish'), + ('Swish', 'Swish_data'), + ('Swish_data', 'result') + ]) + + return tf_net, ref_net + + test_data_precommit = [ + pytest.param(dict(shape=[1, 3, 50, 100, 224]), + marks=pytest.mark.skip(reason="Skipped until fixed")) + ] + + @pytest.mark.parametrize("params", test_data_precommit) + @pytest.mark.precommit + def test_swish_precommit(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_swish_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data = [dict(shape=[1]), + dict(shape=[1, 224]), + dict(shape=[1, 3, 224]), + dict(shape=[1, 3, 100, 224]), + dict(shape=[1, 3, 50, 100, 224])] + + @pytest.mark.parametrize("params", test_data) + @pytest.mark.nightly + def test_swish(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_swish_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TopK.py b/tests/layer_tests/tensorflow_tests/test_tf_TopK.py new file mode 100644 index 00000000000..c2d2fe54c07 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TopK.py @@ -0,0 +1,143 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from common.layer_test_class import check_ir_version +from common.tf_layer_test_class import CommonTFLayerTest +from mo.ops.op import PermuteAttrs +from unit_tests.utils.graph import build_graph +from tensorflow_tests.permutation_utils import permute_nchw_to_nhwc, permute_axis + + +class Test_TopK(CommonTFLayerTest): + @staticmethod + def create_topK_net(shape, k, ir_version): + """ + Tensorflow net: + + |-> Values + Input -> TopK | + |-> Indices + + + IR net: + + |-> Values + Input -> TopK | + |-> Indices + + """ + + # + # Create Tensorflow model + # + + import tensorflow as tf + + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + shape_net = permute_nchw_to_nhwc(shape) + + input_tensor = tf.compat.v1.placeholder(tf.int32, shape=shape_net, name='Input') + values, indices = tf.nn.top_k(input_tensor, k=k, sorted=True, name='Operation') + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + # + # Create reference IR net + # + topk_output_shape = shape.copy() + inverse_nhwc_nchw = PermuteAttrs.get_nhwc_to_nchw_permutation(len(topk_output_shape)).inv + topk_axis = permute_axis(len(topk_output_shape) - 1, inverse_nhwc_nchw) # we need to permute axis attribute + topk_output_shape[topk_axis] = k + + ref_net = None + + if check_ir_version(10, None, ir_version): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': shape, 'kind': 'data'}, + 'Const_k_input_data': {'shape': [], 'kind': 'data'}, + 'Const_k': {'kind': 'op', 'type': 'Const'}, + 'Const_k_data': {'shape': [], 'kind': 'data'}, + 'TopK': {'kind': 'op', 'type': 'TopK', 'axis': topk_axis, 'mode': 'max', 'sort': 'value'}, + 'TopK_data_1': {'shape': topk_output_shape, 'kind': 'data'}, + 'TopK_data_2': {'shape': topk_output_shape, 'kind': 'data'}, + 'result_1': {'kind': 'op', 'type': 'Result'}, + 'result_2': {'kind': 'op', 'type': 'Result'}, + } + + ref_net = build_graph(nodes_attributes, + [('input', 'input_data'), + ('input_data', 'TopK', {'in': 0}), + + ('Const_k_input_data', 'Const_k'), + ('Const_k', 'Const_k_data'), + ('Const_k_data', 'TopK', {'in': 1}), + + ('TopK', 'TopK_data_1', {'out': 0}), + ('TopK', 'TopK_data_2', {'out': 1}), + ('TopK_data_1', 'result_1'), + ('TopK_data_2', 'result_2'), + ]) + + return tf_net, ref_net + + test_data_1D = [ + dict(shape=[15], k=10), + dict(shape=[15], k=5), + ] + + @pytest.mark.parametrize("params", test_data_1D) + @pytest.mark.nightly + def test_TopK_1D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_topK_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_2D = [ + dict(shape=[14, 15], k=10), + dict(shape=[14, 15], k=5), + ] + + @pytest.mark.parametrize("params", test_data_2D) + @pytest.mark.nightly + def test_TopK_2D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_topK_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_3D = [ + dict(shape=[13, 14, 15], k=10), + dict(shape=[13, 14, 15], k=5), + ] + + @pytest.mark.parametrize("params", test_data_3D) + @pytest.mark.nightly + def test_TopK_3D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_topK_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_4D = [ + dict(shape=[12, 13, 14, 15], k=10), + dict(shape=[12, 13, 14, 15], k=5), + ] + + @pytest.mark.parametrize("params", test_data_4D) + @pytest.mark.nightly + def test_TopK_4D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_topK_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) + + test_data_5D = [ + dict(shape=[11, 12, 13, 14, 15], k=10), + dict(shape=[11, 12, 13, 14, 15], k=5), + ] + + @pytest.mark.parametrize("params", test_data_5D) + @pytest.mark.nightly + def test_TopK_5D(self, params, ie_device, precision, ir_version, temp_dir): + self._test(*self.create_topK_net(**params, ir_version=ir_version), + ie_device, precision, ir_version, temp_dir=temp_dir) From 7b2779c406bcf00c42102a1faedd459ea70a7b45 Mon Sep 17 00:00:00 2001 From: Rafal Blaczkowski Date: Wed, 16 Jun 2021 12:25:06 +0200 Subject: [PATCH 33/43] cleanup improvement (#6186) --- .ci/openvino-onnx/Jenkinsfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.ci/openvino-onnx/Jenkinsfile b/.ci/openvino-onnx/Jenkinsfile index 5fe24928798..ff2e8a45125 100644 --- a/.ci/openvino-onnx/Jenkinsfile +++ b/.ci/openvino-onnx/Jenkinsfile @@ -155,10 +155,9 @@ def getConfigurationsMap() { CONFIGURATION_WORKFLOW = { configuration -> node("OpenVINO") { + String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}" try { PROJECT_NAME = "openvino" - String workdir = "${HOME}/workspace/${BUILD_NUMBER}_${env.CHANGE_ID}_${configuration.name}" - stage("Clone repository") { prepare_repository(workdir) } @@ -185,10 +184,10 @@ CONFIGURATION_WORKFLOW = { configuration -> } finally { stage("Cleanup") { - deleteDir() String docker_container_name = get_docker_container_name(configuration) sh """ docker rm -f ${docker_container_name} + rm -rf ${workdir} """ } } From dc0d482c23432ba72367a84e4639faeb18c32ee5 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Wed, 16 Jun 2021 12:45:36 +0200 Subject: [PATCH 34/43] Add support for fp64 in Sqrt's evaluate method (#5913) It's required for t2t-vit models. --- ngraph/core/src/op/sqrt.cpp | 4 +++- ngraph/test/constant_folding.cpp | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ngraph/core/src/op/sqrt.cpp b/ngraph/core/src/op/sqrt.cpp index e706e4ae7c2..339a8b74706 100644 --- a/ngraph/core/src/op/sqrt.cpp +++ b/ngraph/core/src/op/sqrt.cpp @@ -57,6 +57,7 @@ namespace sqrtop NGRAPH_TYPE_CASE(evaluate_sqrt, u64, arg0, out, count); NGRAPH_TYPE_CASE(evaluate_sqrt, f16, arg0, out, count); NGRAPH_TYPE_CASE(evaluate_sqrt, f32, arg0, out, count); + NGRAPH_TYPE_CASE(evaluate_sqrt, f64, arg0, out, count); default: rc = false; break; } return rc; @@ -79,7 +80,8 @@ bool op::Sqrt::has_evaluate() const case ngraph::element::u32: case ngraph::element::u64: case ngraph::element::f16: - case ngraph::element::f32: return true; + case ngraph::element::f32: + case ngraph::element::f64: return true; default: break; } return false; diff --git a/ngraph/test/constant_folding.cpp b/ngraph/test/constant_folding.cpp index c34dcb12c6e..d6b2d98ee9f 100644 --- a/ngraph/test/constant_folding.cpp +++ b/ngraph/test/constant_folding.cpp @@ -300,6 +300,7 @@ TEST(constant_folding, constant_unary_binary) auto g = make_shared(element::i32, Shape{2}, values_g); auto h = make_shared(element::boolean, Shape{2, 2}, values_h); auto i = make_shared(element::boolean, Shape{2}, values_i); + auto doubles = make_shared(element::f64, Shape{2}, std::vector{4.0, 9.0}); auto add = make_shared(a, b); auto sub = make_shared(a, b); @@ -328,6 +329,7 @@ TEST(constant_folding, constant_unary_binary) auto logical_or_autob_numpy = make_shared(h, i, op::AutoBroadcastType::NUMPY); auto logical_xor_autob_numpy = make_shared(h, i, op::AutoBroadcastType::NUMPY); + auto doubles_sqrt = make_shared(doubles); auto neg_sqrt = make_shared(c); @@ -355,7 +357,8 @@ TEST(constant_folding, constant_unary_binary) less_autob_numpy, less_eq_autob_numpy, logical_or_autob_numpy, - logical_xor_autob_numpy}, + logical_xor_autob_numpy, + doubles_sqrt}, ParameterVector{}); auto func_error = make_shared(NodeVector{neg_sqrt}, ParameterVector{}); @@ -388,6 +391,7 @@ TEST(constant_folding, constant_unary_binary) vector less_eq_autob_numpy_expected{1, 1, 0, 1}; vector logical_or_autob_numpy_expected{0, 1, 1, 1}; vector logical_xor_autob_numpy_expected{0, 1, 1, 0}; + vector doubles_sqrt_expected{2.0, 3.0}; ASSERT_EQ(get_result_constant(func, 0), add_expected); ASSERT_EQ(get_result_constant(func, 1), sub_expected); @@ -414,6 +418,7 @@ TEST(constant_folding, constant_unary_binary) ASSERT_EQ(get_result_constant(func, 22), less_eq_autob_numpy_expected); ASSERT_EQ(get_result_constant(func, 23), logical_or_autob_numpy_expected); ASSERT_EQ(get_result_constant(func, 24), logical_xor_autob_numpy_expected); + ASSERT_EQ(get_result_constant(func, 25), doubles_sqrt_expected); ASSERT_NO_THROW(pass_manager.run_passes(func_error)); } From d76d2674782d3c3bc83be7c06cfa7db42b804326 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Wed, 16 Jun 2021 12:46:33 +0200 Subject: [PATCH 35/43] Add support for fp64 in Convert's evaluate (#5911) It's required for t2t-vit models. --- ngraph/core/src/op/convert.cpp | 4 ++++ ngraph/test/constant_folding.cpp | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/ngraph/core/src/op/convert.cpp b/ngraph/core/src/op/convert.cpp index 4230c3a9afb..a920fec7729 100644 --- a/ngraph/core/src/op/convert.cpp +++ b/ngraph/core/src/op/convert.cpp @@ -104,6 +104,7 @@ namespace convert TYPE_OUT_CASE(bf16, arg, out); TYPE_OUT_CASE(f16, arg, out); TYPE_OUT_CASE(f32, arg, out); + TYPE_OUT_CASE(f64, arg, out); TYPE_OUT_CASE(boolean, arg, out); default: rc = false; break; } @@ -129,6 +130,7 @@ namespace convert NGRAPH_TYPE_CASE(evaluate_convert, bf16, arg, out); NGRAPH_TYPE_CASE(evaluate_convert, f16, arg, out); NGRAPH_TYPE_CASE(evaluate_convert, f32, arg, out); + NGRAPH_TYPE_CASE(evaluate_convert, f64, arg, out); NGRAPH_TYPE_CASE(evaluate_convert, boolean, arg, out); default: rc = false; break; } @@ -200,6 +202,7 @@ bool op::v0::Convert::has_evaluate() const case ngraph::element::bf16: case ngraph::element::f16: case ngraph::element::f32: + case ngraph::element::f64: case ngraph::element::boolean: break; default: return false; } @@ -219,6 +222,7 @@ bool op::v0::Convert::has_evaluate() const case ngraph::element::bf16: case ngraph::element::f16: case ngraph::element::f32: + case ngraph::element::f64: case ngraph::element::boolean: break; default: return false; } diff --git a/ngraph/test/constant_folding.cpp b/ngraph/test/constant_folding.cpp index d6b2d98ee9f..0f5ce320531 100644 --- a/ngraph/test/constant_folding.cpp +++ b/ngraph/test/constant_folding.cpp @@ -464,6 +464,16 @@ TEST(constant_folding, const_convert) vector expected{true, false, true, false, true, false, true}; test_const_convert(in, expected); } + { + vector in{1, 2, 3, 4, 5}; + vector expected{1.0, 2.0, 3.0, 4.0, 5.0}; + test_const_convert(in, expected); + } + { + vector in{1.2, 2.1, 3.3, 4.45, 5.02}; + vector expected{1, 2, 3, 4, 5}; + test_const_convert(in, expected); + } } TEST(constant_folding, shape_of_v0) From b05977a536bff6f618aabe8008bb43db23b3ea71 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Wed, 16 Jun 2021 14:14:50 +0300 Subject: [PATCH 36/43] [CPU][IE TESTS] Added more input shapes for Pooling tests (#6083) --- .../plugin/cpu/single_layer_tests/pooling.cpp | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp index 0df9c464c4f..6fb1f7d1169 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp @@ -105,6 +105,22 @@ const auto ref = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"}; const std::vector vecCpuConfigs = {ref, sse42, avx, avx512}; const std::vector inpOutPrecision = {Precision::FP32, Precision::BF16}; +const std::vector> inputShapes4D = { + std::vector{3, 4, 64, 64}, + std::vector{2, 8, 8, 12}, + std::vector{1, 16, 16, 12}, + std::vector{1, 21, 8, 4}, + std::vector{1, 32, 8, 8}, +}; + +const std::vector> inputShapes5D = { + std::vector{1, 4, 16, 16, 16}, + std::vector{2, 8, 8, 8, 8}, + std::vector{2, 16, 12, 16, 20}, + std::vector{1, 19, 16, 20, 8}, + std::vector{1, 32, 16, 8, 12}, +}; + const std::vector paramsMax4D = { poolSpecificParams{ ngraph::helpers::PoolingTypes::MAX, {2, 2}, {2, 2}, {0, 0}, {0, 0}, ngraph::op::RoundingType::CEIL, ngraph::op::PadType::EXPLICIT, false }, @@ -137,7 +153,7 @@ INSTANTIATE_TEST_CASE_P(smoke_MaxPool_CPU_4D, PoolingLayerCPUTest, ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 64, 64})), + ::testing::ValuesIn(inputShapes4D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), ::testing::Values(emptyFusingSpec)), @@ -152,7 +168,7 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D, PoolingLayerCPUTest, ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 64, 64})), + ::testing::ValuesIn(inputShapes4D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), ::testing::Values(emptyFusingSpec)), @@ -167,7 +183,7 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_NotOptimized, PoolingLayerCPUTest, ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 64, 64})), + ::testing::ValuesIn(inputShapes4D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::Values(ref), ::testing::Values(emptyFusingSpec)), @@ -207,7 +223,7 @@ INSTANTIATE_TEST_CASE_P(smoke_MaxPool_CPU_5D, PoolingLayerCPUTest, ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 16, 32, 32})), + ::testing::ValuesIn(inputShapes5D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), ::testing::Values(emptyFusingSpec)), @@ -222,7 +238,7 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D, PoolingLayerCPUTest, ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 32, 32, 32})), + ::testing::ValuesIn(inputShapes5D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)), ::testing::Values(emptyFusingSpec)), @@ -237,7 +253,7 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_NotOptimized, PoolingLayerCPUTest, ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 16, 16, 16})), + ::testing::ValuesIn(inputShapes5D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::Values(ref), ::testing::Values(emptyFusingSpec)), @@ -272,7 +288,7 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_4D_I8, PoolingLayerCPUTest, ::testing::Values(Precision::FP32), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 64, 64})), + ::testing::ValuesIn(inputShapes4D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_4D)), ::testing::ValuesIn(fusingParamsSet)), @@ -287,7 +303,7 @@ INSTANTIATE_TEST_CASE_P(smoke_AvgPool_CPU_5D_I8, PoolingLayerCPUTest, ::testing::Values(Precision::FP32), ::testing::Values(InferenceEngine::Layout::ANY), ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 4, 16, 16, 16})), + ::testing::ValuesIn(inputShapes5D), ::testing::Values(CommonTestUtils::DEVICE_CPU)), ::testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigsFusing_5D)), ::testing::ValuesIn(fusingParamsSet)), From 5b847fabe842a9124efbfccb39655e73f4dac19b Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Wed, 16 Jun 2021 14:42:26 +0300 Subject: [PATCH 37/43] [CPU] Refactor load_emitter_context semantic. Update store emitter (#5824) * [CPU] Refactor load_emitter_context semantic. Update store emitter New load_emitter_context constructor arguments order seems to be more convenient. Store emitter now emits bf16 emu. --- .../mkldnn_plugin/emitters/jit_load_store_emitters.cpp | 5 +++++ .../mkldnn_plugin/emitters/jit_load_store_emitters.hpp | 6 ++++-- .../src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 7 +++---- .../src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp | 9 ++++----- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp index 83bc04c530d..4d1e3819394 100644 --- a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp @@ -510,6 +510,11 @@ size_t jit_store_emitter::aux_vecs_count() const { size_t jit_store_emitter::get_inputs_num() const { return 1; } +void jit_store_emitter::emit_data() const { + if (emu_vcvtneps2bf16) + emu_vcvtneps2bf16->emit_data(); +} + void jit_store_emitter::emit_impl(const std::vector &in_idxs, const std::vector &out_idxs, const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, const emitter_context *emit_context) const { diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.hpp b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.hpp index 00c2e49262d..ec863d0c69e 100644 --- a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.hpp +++ b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.hpp @@ -18,8 +18,8 @@ struct load_emitter_context : public emitter_context { load_emitter_context() : src_prc_(Precision::FP32), dst_prc_(Precision::FP32), load_num_(8), offset_byte_(0), is_fill_(false), fill_value_("zero") {} - load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, bool is_fill = false, std::string fill_value = "zero", int offset_byte = 0): - src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), is_fill_(is_fill), fill_value_(fill_value), offset_byte_(offset_byte) {} + load_emitter_context(Precision src_prc, Precision dst_prc, int load_num, int offset_byte = 0, bool is_fill = false, std::string fill_value = "zero"): + src_prc_(src_prc), dst_prc_(dst_prc), load_num_(load_num), offset_byte_(offset_byte), is_fill_(is_fill), fill_value_(fill_value) {} int offset_byte_; int load_num_; @@ -124,6 +124,8 @@ public: size_t get_inputs_num() const override; + void emit_data() const override; + std::shared_ptr get_emu_vcvtneps2bf16() const { return emu_vcvtneps2bf16; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index f27a40e3bd2..baff79e5d75 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -306,8 +306,8 @@ private: inline void worker_tail_planar() { Precision dst_prc = isFloatCompatible(jcp_.src_prc) ? Precision::FP32 : Precision::I32; load_emitter->emit_code({static_cast(reg_src.getIdx())}, {static_cast(vmm_val.getIdx())}, - std::make_shared(jcp_.src_prc, dst_prc, tail_num, true, "zero"), - {}, {load_pool_gpr_idxs}); + std::make_shared(jcp_.src_prc, dst_prc, tail_num, 0, true), + {}, {load_pool_gpr_idxs}); if (jcp_.normalize_variance) { if (!isFloatCompatible(jcp_.src_prc)) @@ -477,8 +477,7 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator this->postamble(); load_emitter->emit_data(); - if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr) - store_emitter->get_emu_vcvtneps2bf16()->emit_data(); + store_emitter->emit_data(); for (auto& inj : eltwise_injectors) inj->prepare_table(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index 77db7621692..a1a7f8329a5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -88,8 +88,7 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi this->postamble(); load_emitter->emit_data(); - if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core) && store_emitter != nullptr && store_emitter->get_emu_vcvtneps2bf16() != nullptr) - store_emitter->get_emu_vcvtneps2bf16()->emit_data(); + store_emitter->emit_data(); } private: @@ -155,7 +154,7 @@ private: Vmm vmm_max = get_acc_reg(i); load_emitter->emit_code({static_cast(reg_input.getIdx())}, {static_cast(vmm_max.getIdx())}, - std::make_shared(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off), + std::make_shared(jpp_.src_prc, Precision::FP32, step, i * src_c_off), {}, load_pool_gpr_idxs); } @@ -169,7 +168,7 @@ private: Vmm vmm_src = get_src_reg(i); load_emitter->emit_code({static_cast(aux_reg_input1.getIdx())}, {static_cast(vmm_src.getIdx())}, - std::make_shared(jpp_.src_prc, Precision::FP32, step, false, "zero", i * src_c_off), + std::make_shared(jpp_.src_prc, Precision::FP32, step, i * src_c_off), {}, load_pool_gpr_idxs); if (isa == cpu::x64::sse41) { @@ -222,7 +221,7 @@ private: for (int i = 0; i < c_blocks; i++) { const int src_c_off = i * jpp_.ih * jpp_.iw * jpp_.c_block * jpp_.src_data_size; - const auto load_context = std::make_shared(jpp_.src_prc, Precision::FP32, step, false, "zero", src_c_off); + const auto load_context = std::make_shared(jpp_.src_prc, Precision::FP32, step, src_c_off); mov(aux_reg_input, reg_input); From 99ebda98f16f2290285f8c6673e039e3104c2687 Mon Sep 17 00:00:00 2001 From: Vladislav Volkov Date: Wed, 16 Jun 2021 15:54:40 +0300 Subject: [PATCH 38/43] [CPU] Memory leak in jit_uni_i8i8_pooling kernel (#6188) --- inference-engine/thirdparty/mkl-dnn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index 87516e47dae..e0381c369fc 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 87516e47dae71fc9c326d0f3685c1572c740e127 +Subproject commit e0381c369fc3bed487b0dcfef7e9fcb2e0aea575 From b1db3448e7b72fa54dbf60d26c7155f03a0baa7c Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Wed, 16 Jun 2021 14:57:29 +0200 Subject: [PATCH 39/43] [ONNX] Extend ONNX Importer for operation "Einsum" (#6074) --- ngraph/frontend/onnx_import/src/op/einsum.cpp | 30 ++++++++++++ ngraph/frontend/onnx_import/src/op/einsum.hpp | 25 ++++++++++ .../frontend/onnx_import/src/ops_bridge.cpp | 2 + ngraph/python/tests/__init__.py | 4 +- .../python/tests/test_ngraph/test_einsum.py | 6 +-- ngraph/python/tests/test_onnx/test_backend.py | 15 +++--- ngraph/test/models/onnx/einsum_sum.prototxt | 47 +++++++++++++++++++ ngraph/test/onnx/onnx_import.in.cpp | 33 +++++++++++-- 8 files changed, 142 insertions(+), 20 deletions(-) create mode 100644 ngraph/frontend/onnx_import/src/op/einsum.cpp create mode 100644 ngraph/frontend/onnx_import/src/op/einsum.hpp create mode 100644 ngraph/test/models/onnx/einsum_sum.prototxt diff --git a/ngraph/frontend/onnx_import/src/op/einsum.cpp b/ngraph/frontend/onnx_import/src/op/einsum.cpp new file mode 100644 index 00000000000..1d2a7a4edc9 --- /dev/null +++ b/ngraph/frontend/onnx_import/src/op/einsum.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op/einsum.hpp" +#include "default_opset.hpp" + +namespace ngraph +{ + namespace onnx_import + { + namespace op + { + namespace set_1 + { + OutputVector einsum(const Node& node) + { + const std::string& equation{node.get_attribute_value("equation")}; + + return OutputVector{ + std::make_shared(node.get_ng_inputs(), equation)}; + } + + } // namespace set_1 + + } // namespace op + + } // namespace onnx_import + +} // namespace ngraph diff --git a/ngraph/frontend/onnx_import/src/op/einsum.hpp b/ngraph/frontend/onnx_import/src/op/einsum.hpp new file mode 100644 index 00000000000..c9f87479ea0 --- /dev/null +++ b/ngraph/frontend/onnx_import/src/op/einsum.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/node.hpp" +#include "onnx_import/core/node.hpp" + +namespace ngraph +{ + namespace onnx_import + { + namespace op + { + namespace set_1 + { + OutputVector einsum(const Node& node); + + } // namespace set_1 + } // namespace op + + } // namespace onnx_import + +} // namespace ngraph diff --git a/ngraph/frontend/onnx_import/src/ops_bridge.cpp b/ngraph/frontend/onnx_import/src/ops_bridge.cpp index f7864360ba7..0055b8afc4a 100644 --- a/ngraph/frontend/onnx_import/src/ops_bridge.cpp +++ b/ngraph/frontend/onnx_import/src/ops_bridge.cpp @@ -42,6 +42,7 @@ #include "op/dequantize_linear.hpp" #include "op/div.hpp" #include "op/dropout.hpp" +#include "op/einsum.hpp" #include "op/elu.hpp" #include "op/equal.hpp" #include "op/erf.hpp" @@ -340,6 +341,7 @@ namespace ngraph REGISTER_OPERATOR("Dropout", 1, dropout); REGISTER_OPERATOR("Dropout", 7, dropout); REGISTER_OPERATOR("Dropout", 12, dropout); + REGISTER_OPERATOR("Einsum", 1, einsum); REGISTER_OPERATOR("Elu", 1, elu); REGISTER_OPERATOR("Equal", 1, equal); REGISTER_OPERATOR("Erf", 1, erf); diff --git a/ngraph/python/tests/__init__.py b/ngraph/python/tests/__init__.py index 817d4584534..3bcf0d961fb 100644 --- a/ngraph/python/tests/__init__.py +++ b/ngraph/python/tests/__init__.py @@ -25,8 +25,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): skip_segfault = pytest.mark.skip(reason="Segmentation fault error") xfail_issue_33488 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" "MaxUnpool") -xfail_issue_33512 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" - "Einsum") xfail_issue_33535 = xfail_test(reason="nGraph does not support the following ONNX operations:" "DynamicQuantizeLinear") xfail_issue_33538 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:" @@ -145,5 +143,5 @@ xfail_issue_49753 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1 xfail_issue_49754 = xfail_test(reason="RuntimeError: Unsupported dynamic ops: v1::TopKIE") xfail_issue_52463 = xfail_test(reason="test_operator_add_size1_singleton_broadcast_cpu - " "Not equal to tolerance") -xfail_issue_45432 = xfail_test(reason="Einsum is not implemented in CPU plugin.") +xfail_issue_58033 = xfail_test(reason="Einsum operation misses support for complex ellipsis equations") xfail_issue_onnx_models_140 = xfail_test(reason="https://github.com/onnx/models/issues/140") diff --git a/ngraph/python/tests/test_ngraph/test_einsum.py b/ngraph/python/tests/test_ngraph/test_einsum.py index a89b6c3ff35..fb7581d9160 100644 --- a/ngraph/python/tests/test_ngraph/test_einsum.py +++ b/ngraph/python/tests/test_ngraph/test_einsum.py @@ -3,7 +3,7 @@ import numpy as np import pytest from ngraph.utils.types import get_element_type -from tests import xfail_issue_45432 +from tests import xfail_issue_58033 from tests.runtime import get_runtime @@ -86,13 +86,13 @@ def test_simple_ellipsis(data_type): einsum_op_exec([(5, 3, 4)], "a...->...", data_type) -@xfail_issue_45432 +@xfail_issue_58033 @pytest.mark.parametrize("data_type", [np.float32, np.int32]) def test_multiple_ellipsis(data_type): einsum_op_exec([(3, 5), 1], "a...,...->a...", data_type, with_value=True) -@xfail_issue_45432 +@xfail_issue_58033 @pytest.mark.parametrize("data_type", [np.float32, np.int32]) def test_broadcasting_ellipsis(data_type): einsum_op_exec([(9, 1, 4, 3), (3, 11, 7, 1)], "a...b,b...->a...", data_type, with_value=True) diff --git a/ngraph/python/tests/test_onnx/test_backend.py b/ngraph/python/tests/test_onnx/test_backend.py index a8ee1cbb065..f8a1d99c900 100644 --- a/ngraph/python/tests/test_onnx/test_backend.py +++ b/ngraph/python/tests/test_onnx/test_backend.py @@ -9,7 +9,6 @@ from tests.test_onnx.utils.onnx_backend import OpenVinoTestBackend from tests import (BACKEND_NAME, xfail_issue_33488, - xfail_issue_33512, xfail_issue_33535, xfail_issue_33538, xfail_issue_33581, @@ -59,7 +58,9 @@ from tests import (BACKEND_NAME, xfail_issue_49753, xfail_issue_49754, xfail_issue_52463, - xfail_issue_55760) + xfail_issue_55760, + xfail_issue_58033, + ) def expect_fail(test_case_path, xfail): # type: (str) -> None @@ -285,12 +286,6 @@ tests_expected_to_fail = [ "OnnxBackendNodeModelTest.test_qlinearconv_cpu"), (xfail_issue_38724, "OnnxBackendNodeModelTest.test_resize_tf_crop_and_resize_cpu"), - (xfail_issue_33512, - "OnnxBackendNodeModelTest.test_einsum_transpose_cpu", - "OnnxBackendNodeModelTest.test_einsum_batch_diagonal_cpu", - "OnnxBackendNodeModelTest.test_einsum_batch_matmul_cpu", - "OnnxBackendNodeModelTest.test_einsum_sum_cpu", - "OnnxBackendNodeModelTest.test_einsum_inner_prod_cpu"), (xfail_issue_33606, "OnnxBackendNodeModelTest.test_det_2d_cpu", "OnnxBackendNodeModelTest.test_det_nd_cpu"), @@ -368,7 +363,9 @@ tests_expected_to_fail = [ "OnnxBackendNodeModelTest.test_quantizelinear_cpu"), (xfail_issue_33593, "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_strides_cpu", - "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",) + "OnnxBackendNodeModelTest.test_maxpool_with_argmax_2d_precomputed_pads_cpu",), + (xfail_issue_58033, + "OnnxBackendNodeModelTest.test_einsum_batch_diagonal_cpu"), ] for test_group in tests_expected_to_fail: diff --git a/ngraph/test/models/onnx/einsum_sum.prototxt b/ngraph/test/models/onnx/einsum_sum.prototxt new file mode 100644 index 00000000000..0f56b47d3ab --- /dev/null +++ b/ngraph/test/models/onnx/einsum_sum.prototxt @@ -0,0 +1,47 @@ +ir_version: 7 +producer_name: "backend-test" +graph { + node { + input: "x" + output: "y" + op_type: "Einsum" + attribute { + name: "equation" + s: "ij->i" + type: STRING + } + } + name: "test_einsum_sum" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + } + } + } + } +} +opset_import { + version: 12 +} diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp index 849a015c4c3..943d6b90ca1 100644 --- a/ngraph/test/onnx/onnx_import.in.cpp +++ b/ngraph/test/onnx/onnx_import.in.cpp @@ -4615,18 +4615,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_8x17) 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); test_case.run(); -} - +} + NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x3x4) { auto function = onnx_import::import_onnx_model( file_util::path_join(SERIALIZED_ZOO, "onnx/constant_sparse_tensor_float_2x3x4.prototxt")); auto test_case = test::TestCase(function); - test_case.add_expected_output(Shape{2, 3, 4}, {1.f, 0.f, 0.f, 8.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.f, 0.f, + test_case.add_expected_output(Shape{2, 3, 4}, {1.f, 0.f, 0.f, 8.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 3.f, 0.f}); test_case.run(); -} +} NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x2x3x4) { @@ -4636,7 +4636,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_constant_sparse_tensor_float_2x2x3x4) auto test_case = test::TestCase(function); test_case.add_expected_output(Shape{2, 2, 3, 4}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 2.f, 3.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 8.f, 0.f, 1.f, 2.f, 0.f, 0.f, 0.f, 3.f, 0.f, + 0.f, 0.f, 0.f, 8.f, 0.f, 1.f, 2.f, 0.f, 0.f, 0.f, 3.f, 0.f, 1.f, 0.f, 0.f, 0.f, 0.f, 2.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); test_case.run(); } + +NGRAPH_TEST(${BACKEND_NAME}, onnx_einsum_sum) +{ + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/einsum_sum.prototxt")); + auto test_case = test::TestCase(function); + test_case.add_input(Shape{3, 4}, + {1.764052345967664, + 0.4001572083672233, + 0.9787379841057392, + 2.240893199201458, + 1.8675579901499675, + -0.977277879876411, + 0.9500884175255894, + -0.1513572082976979, + -0.10321885179355784, + 0.41059850193837233, + 0.144043571160878, + 1.454273506962975}); + test_case.add_expected_output( + Shape{3}, {5.3838407376420845, 1.689011319501448, 1.9056967282686674}); + test_case.run(); +} From 2c775d48b27e3148769ce9998d48e75642c019a4 Mon Sep 17 00:00:00 2001 From: Gleb Kazantaev Date: Wed, 16 Jun 2021 16:02:57 +0300 Subject: [PATCH 40/43] Remove Pruning from MO (#6191) --- .../src/offline_transformations/src/moc_transformations.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp index 35f4a575c15..745e173b4d7 100644 --- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp +++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp @@ -12,9 +12,5 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr f) { - ngraph::pass::Manager m(get_pass_config()); - m.register_pass(); - m.run_passes(f); - return false; } \ No newline at end of file From 5c55d390e82ba055bb09e7366b4a105180c46c0e Mon Sep 17 00:00:00 2001 From: Elizaveta Lobanova Date: Wed, 16 Jun 2021 16:19:21 +0300 Subject: [PATCH 41/43] [GNA] Allow 2d reshape of the first diagonal layer (#6115) --- inference-engine/src/gna_plugin/gna_groups.hpp | 7 ------- .../src/gna_plugin/optimizer/gna_pass_manager.cpp | 5 ++--- .../gna/pass_tests/convert_matmul_to_pointwise_conv.cpp | 3 +-- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/inference-engine/src/gna_plugin/gna_groups.hpp b/inference-engine/src/gna_plugin/gna_groups.hpp index 2449338821c..704588a153d 100644 --- a/inference-engine/src/gna_plugin/gna_groups.hpp +++ b/inference-engine/src/gna_plugin/gna_groups.hpp @@ -52,13 +52,6 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) { if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift()) return false; - // Don't reshape the first dnn layer since it breaks groups recognition - auto prevLayer = InferenceEngine::CNNNetPrevLayerSkipCertain(layer, 0, [](InferenceEngine::CNNLayerPtr ptr) { - return LayerInfo(ptr).isNonValuesChangable(); - }); - IE_ASSERT(prevLayer != nullptr); - if (LayerInfo(prevLayer).isInput()) return false; - // Don't reshape diagonallayers with bias connection return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); } diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index b8962cebd36..ef333e7e46f 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -85,9 +85,8 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer, return LayerInfo(ptr).isNonValuesChangable(); }); IE_ASSERT(inputLayer != nullptr); - size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ? - nextLayer->outData[0]->getDims().back() : - Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; + size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() : + Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; std::vector weightsValues(weightsSize, fillValue); IE_ASSERT(diagLayer != nullptr); diagLayer->_weights = make_shared_blob( diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp index 7e3d15174f3..86bfe5e25c0 100644 --- a/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp @@ -217,8 +217,7 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToP ::testing::ValuesIn(inputShape)), ConvertMatmulToPointwiseConv::getTestCaseName); -// Issue 55662 -INSTANTIATE_TEST_CASE_P(DISABLED_smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq, +INSTANTIATE_TEST_CASE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), From b8313c4fae5a794508b0808145182fe2dd62caea Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Wed, 16 Jun 2021 23:47:08 +0900 Subject: [PATCH 42/43] [IE CLDNN] Disable crop optimization only when the node is inside a loop body program (#6192) --- .../src/graph_optimizer/prepare_buffer_fusing.cpp | 2 +- .../thirdparty/clDNN/src/include/loop_inst.h | 2 +- .../thirdparty/clDNN/src/include/program_impl.h | 8 ++++++-- inference-engine/thirdparty/clDNN/src/program.cpp | 11 +++++++---- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp index eebd65149ad..c57e72ad5f4 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp @@ -283,7 +283,7 @@ void prepare_buffer_fusing::run(program_impl& p) { } if (node.get_dependencies().size() == 1 && node.get_users().size() > 0) { - if (node.get_dependency(0).is_type()) { + if (p.is_loop_body() && node.get_dependency(0).is_type()) { return; } // optimization is available for cropping across depth(features) only diff --git a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h index e39bb90f431..08e7d416d44 100644 --- a/inference-engine/thirdparty/clDNN/src/include/loop_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/loop_inst.h @@ -266,7 +266,7 @@ public: auto opts = get_program().get_options(); std::vector output_names_vec(output_names.begin(), output_names.end()); opts.set_option(build_option::outputs(output_names_vec)); - body_program = program_impl::build_program(get_program().get_engine(), body, opts, false); + body_program = program_impl::build_program(get_program().get_engine(), body, opts, false, false, true); } const primitive_id& get_trip_count_id() const { return get_primitive()->trip_count_id; } diff --git a/inference-engine/thirdparty/clDNN/src/include/program_impl.h b/inference-engine/thirdparty/clDNN/src/include/program_impl.h index fe8c8cb55f2..df45537f00a 100644 --- a/inference-engine/thirdparty/clDNN/src/include/program_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/program_impl.h @@ -138,7 +138,8 @@ public: topology_impl const& topology, build_options const& options, bool is_internal, - bool no_optimizations = false); + bool no_optimizations = false, + bool is_body_program = false); /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */ program_impl(engine& engine_ref, std::set> const& nodes, @@ -153,6 +154,7 @@ public: std::vector& get_outputs() { return outputs; } // ToDo: redesign reorder-inputs pass to make it const as_well as get_engine and get options + bool is_loop_body() const { return is_body_program; } bool is_debug_build() const { return options.get()->enabled(); } const nodes_ordering& get_processing_order() const; nodes_ordering& get_processing_order(); @@ -228,7 +230,8 @@ public: const topology_impl& topology, const build_options& options, bool is_internal = false, - bool no_optimizations = false); + bool no_optimizations = false, + bool is_body_program = false); static ptr build_program(engine& engine, const std::set>& nodes, const build_options& options, @@ -253,6 +256,7 @@ private: nodes_ordering processing_order; std::unique_ptr pm; std::shared_ptr tuning_cache; + bool is_body_program; std::map> nodes_map; diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index ec59a526be0..1bd90364028 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -84,13 +84,15 @@ program_impl::program_impl(engine& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal, - bool no_optimizations) + bool no_optimizations, + bool is_body_program) : _engine(engine_ref), _stream(_engine.create_stream()), program_state(_engine), options(options), processing_order(), - tuning_cache(nullptr) { + tuning_cache(nullptr), + is_body_program(is_body_program) { init_primitives(); kernel_selector::KernelBase::ResetCounter(); set_options(); @@ -163,8 +165,9 @@ program_impl::ptr program_impl::build_program(engine& engine, const topology_impl& topology, const build_options& options, bool is_internal, - bool no_optimizations) { - return std::make_shared(engine, topology, options, is_internal, no_optimizations); + bool no_optimizations, + bool is_body_program) { + return std::make_shared(engine, topology, options, is_internal, no_optimizations, is_body_program); } program_impl::ptr program_impl::build_program(engine& engine, From db67c1b2b99f36f5ac46713c41b518db25cac1c0 Mon Sep 17 00:00:00 2001 From: Gabriele Galiero Casay Date: Wed, 16 Jun 2021 17:11:25 +0200 Subject: [PATCH 43/43] Add reduce operations to script with trusted ops (#6200) --- .../layer_tests_summary/utils/constants.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py index 7e193c2d790..8309b0e8593 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/layer_tests_summary/utils/constants.py @@ -66,7 +66,12 @@ VERIFIED_OP_REFERENCES = [ 'ReadValue-6', 'ReduceL1-4', 'ReduceL2-4', + 'ReduceLogicalAnd-1', + 'ReduceLogicalOr-1', + 'ReduceMax-1', 'ReduceMean-1', + 'ReduceMin-1', + 'ReduceProd-1', 'ReduceSum-1', 'RegionYOLO-1', 'Relu-1',