From 681faadce3d5df5f31c4c5f7920f22488cc675c5 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Wed, 8 Mar 2023 08:34:24 +0400 Subject: [PATCH] [GPU] Added shape agnostic kernels for GatherElements and Tile (#15798) * [GPU] Added shape agnostic kernel for GatherElements * [GPU] Added shape agnostic kernel for Tile --- .../src/graph/impls/ocl/gather_elements.cpp | 42 ++++++---- .../intel_gpu/src/graph/impls/ocl/range.cpp | 7 +- .../intel_gpu/src/graph/impls/ocl/softmax.cpp | 6 +- .../intel_gpu/src/graph/impls/ocl/tile.cpp | 30 ++++++- .../cl_kernels/gather_elements_ref.cl | 3 +- .../kernel_selector/cl_kernels/tile_ref.cl | 4 +- .../gather/gather_elements_kernel_ref.cpp | 24 ++++-- .../gather/gather_elements_kernel_ref.h | 2 +- .../kernels/range/range_kernel_ref.cpp | 2 +- .../kernels/tile/tile_kernel_ref.cpp | 16 +++- .../kernels/tile/tile_kernel_ref.h | 2 +- .../test_cases/gather_elements_gpu_test.cpp | 78 +++++++++++++++++++ .../tests/test_cases/tile_gpu_test.cpp | 47 +++++++++++ 13 files changed, 223 insertions(+), 40 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp index 6eb050e9113..1ee7c44f0ee 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/gather_elements.cpp @@ -54,9 +54,9 @@ struct gather_elements_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); - auto params = get_default_params(impl_param); + auto params = get_default_params(impl_param, is_shape_agnostic); auto optional_params = get_default_optional_params(impl_param.get_program()); size_t rank = impl_param.get_output_layout().get_rank(); @@ -65,24 +65,36 @@ struct gather_elements_impl : typed_primitive_impl_ocl { params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1))); return {params, optional_params}; } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_kernel_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + update_kernels_list_to_skip(); + } }; namespace detail { attach_gather_elements_impl::attach_gather_elements_impl() { - implementation_map::add(impl_types::ocl, typed_primitive_impl_ocl::create, { - std::make_tuple(data_types::i8, format::bfyx), - std::make_tuple(data_types::u8, format::bfyx), - std::make_tuple(data_types::f32, format::bfyx), - std::make_tuple(data_types::f16, format::bfyx), - std::make_tuple(data_types::i32, format::bfyx), - std::make_tuple(data_types::f32, format::bfzyx), - std::make_tuple(data_types::f16, format::bfzyx), - std::make_tuple(data_types::i32, format::bfzyx), - std::make_tuple(data_types::f32, format::bfwzyx), - std::make_tuple(data_types::f16, format::bfwzyx), - std::make_tuple(data_types::i32, format::bfwzyx), - }); + auto types = { + data_types::f32, + data_types::f16, + data_types::i32, + data_types::i8, + data_types::u8 + }; + + auto formats = { + format::bfyx, + format::bfzyx, + format::bfwzyx + }; + + implementation_map::add(impl_types::ocl, + shape_types::any, + typed_primitive_impl_ocl::create, + types, + formats); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp index 959990e62d1..56e43bd969d 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/range.cpp @@ -23,8 +23,8 @@ struct range_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { - auto params = get_default_params(impl_param); + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { + auto params = get_default_params(impl_param, is_shape_agnostic); for (int i : {1, 2}) params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); auto optional_params = get_default_optional_params(impl_param.get_program()); @@ -33,8 +33,9 @@ struct range_impl : typed_primitive_impl_ocl { } void update_dispatch_data(const kernel_impl_params& impl_param) override { - auto kernel_params = get_kernel_params(impl_param); + auto kernel_params = get_kernel_params(impl_param, true); (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + update_kernels_list_to_skip(); } }; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp index d18dd4e83be..0c2e20d8e4a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/softmax.cpp @@ -45,9 +45,9 @@ struct softmax_impl : typed_primitive_impl_ocl { return make_unique(*this); } - static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); - auto params = get_default_params(impl_param); + auto params = get_default_params(impl_param, is_shape_agnostic); auto optional_params = get_default_optional_params(impl_param.get_program()); size_t rank = impl_param.get_output_layout().get_rank(); @@ -57,7 +57,7 @@ struct softmax_impl : typed_primitive_impl_ocl { } void update_dispatch_data(const kernel_impl_params& impl_param) override { - auto kernel_params = get_kernel_params(impl_param); + auto kernel_params = get_kernel_params(impl_param, true); (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); update_kernels_list_to_skip(); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp index 8c166bd1aaf..f85e6841be3 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/tile.cpp @@ -24,9 +24,9 @@ struct tile_impl : typed_primitive_impl_ocl { } public: - static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { + static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { const auto& primitive = impl_param.typed_desc(); - auto params = get_default_params(impl_param); + auto params = get_default_params(impl_param, is_shape_agnostic); auto optional_params = get_default_optional_params(impl_param.get_program()); auto repeats = primitive->repeats; @@ -42,13 +42,19 @@ public: return {params, optional_params}; } + + void update_dispatch_data(const kernel_impl_params& impl_param) override { + auto kernel_params = get_kernel_params(impl_param, true); + (_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data); + update_kernels_list_to_skip(); + } }; namespace detail { attach_tile_impl::attach_tile_impl() { auto types = {data_types::i8, data_types::u8, data_types::i32, data_types::f16, data_types::f32}; - auto formats = { + auto static_formats = { format::bfyx, format::bfzyx, format::bfwzyx, @@ -65,7 +71,23 @@ attach_tile_impl::attach_tile_impl() { format::bs_fs_zyx_bsv32_fsv16 }; - implementation_map::add(impl_types::ocl, typed_primitive_impl_ocl::create, types, formats); + implementation_map::add(impl_types::ocl, + shape_types::static_shape, + typed_primitive_impl_ocl::create, + types, + static_formats); + + auto dynamic_formats = { + format::bfyx, + format::bfzyx, + format::bfwzyx + }; + + implementation_map::add(impl_types::ocl, + shape_types::dynamic_shape, + typed_primitive_impl_ocl::create, + types, + dynamic_formats); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_elements_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_elements_ref.cl index 86fccc859de..60981866152 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_elements_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gather_elements_ref.cl @@ -6,7 +6,8 @@ #define GET_OUTPUT_INDEX(prefix, idx_order) CAT(prefix, _GET_INDEX)(idx_order) -KERNEL(gather_elements_ref)(const __global INPUT0_TYPE* data, +KERNEL(gather_elements_ref)(OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* data, const __global INPUT1_TYPE* indices, __global OUTPUT_TYPE* output #if HAS_FUSED_OPS_DECLS diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/tile_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/tile_ref.cl index 25f923ebdde..224ade42481 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/tile_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/tile_ref.cl @@ -4,7 +4,9 @@ #include "include/batch_headers/fetch_data.cl" -KERNEL(tile_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) +KERNEL(tile_ref)(OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + __global OUTPUT_TYPE* output) { const uint x = (uint)get_global_id(0) % OUTPUT_SIZE_X; const uint y = (uint)get_global_id(0) / OUTPUT_SIZE_X; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp index 44bc87a2d96..0a54ffe60d8 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.cpp @@ -55,6 +55,7 @@ ParamsKey GatherElementsKernelRef::GetSupportedKey() const { k.EnableTensorPitches(); k.EnableBatching(); k.EnableDifferentTypes(); + k.EnableDynamicShapesSupport(); return k; } @@ -71,7 +72,7 @@ static inline std::vector GetDefaultOrder(size_t size) { return default_order; } -CommonDispatchData GatherElementsKernelRef::SetDefault(const gather_elements_params& params, const optional_params&) const { +CommonDispatchData GatherElementsKernelRef::SetDefault(const gather_elements_params& params) const { CommonDispatchData dispatchData; auto in_layout = params.inputs[0].GetLayout(); auto out_layout = params.outputs[0].GetLayout(); @@ -134,10 +135,10 @@ bool GatherElementsKernelRef::Validate(const Params& p, const optional_params& o } const gather_elements_params& params = static_cast(p); - auto input_dims = params.inputs[0].LogicalDims(); - auto indices_dims = params.inputs[1].LogicalDims(); + size_t input_rank = params.inputs[0].GetDims().size(); + size_t indices_rank = params.inputs[1].GetDims().size(); - if (input_dims.size() != indices_dims.size()) { + if (input_rank != indices_rank) { return false; } @@ -157,13 +158,22 @@ KernelsData GatherElementsKernelRef::GetKernelsData(const Params& params, const KernelData kd = KernelData::Default(params); gather_elements_params& newParams = *static_cast(kd.params.get()); - auto dispatchData = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams); auto cldnn_jit = GetJitConstants(newParams); - auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + + kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + }; + auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params)); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, + "", false, false, 2, GetFusedPrimitiveInputsCount(params), 1, newParams.has_dynamic_tensors()); return { kd }; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.h index f6916b65e00..82566fb253f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/gather/gather_elements_kernel_ref.h @@ -28,7 +28,7 @@ public: GatherElementsKernelRef() : KernelBaseOpenCL("gather_elements_ref") {} virtual ~GatherElementsKernelRef() {} virtual JitConstants GetJitConstants(const gather_elements_params& params) const; - virtual CommonDispatchData SetDefault(const gather_elements_params& params, const optional_params&) const; + virtual CommonDispatchData SetDefault(const gather_elements_params& params) const; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp index d781427611e..b2b4f7fc4ab 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/range/range_kernel_ref.cpp @@ -35,7 +35,7 @@ KernelsData RangeKernelRef::GetKernelsData(const Params ¶ms, const optional_ auto jit = CreateJit(kernelName, jit_constants, entry_point); kernel_data.update_dispatch_data_func = [](const Params& params, KernelData& kd) { - const auto& prim_params = static_cast(params); + const auto& prim_params = static_cast(params); auto dispatchData = SetDefault(prim_params); OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); kd.kernels[0].params.workGroups.global = dispatchData.gws; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp index 75145d57d88..d558b0836d2 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.cpp @@ -25,10 +25,11 @@ ParamsKey TileKernelRef::GetSupportedKey() const { k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); + k.EnableDynamicShapesSupport(); return k; } -CommonDispatchData TileKernelRef::SetDefault(const tile_params& params, const optional_params&) const { +CommonDispatchData TileKernelRef::SetDefault(const tile_params& params) const { CommonDispatchData dispatchData; auto in_layout = params.inputs[0].GetLayout(); auto out_layout = params.outputs[0].GetLayout(); @@ -55,14 +56,23 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p KernelData kd = KernelData::Default(params); tile_params& newParams = *static_cast(kd.params.get()); - auto dispatchData = SetDefault(newParams, options); + auto dispatchData = SetDefault(newParams); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options); auto cldnn_jit = GetJitConstants(newParams); auto jit = CreateJit(kernelName, cldnn_jit, entry_point); + kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) { + const auto& prim_params = static_cast(params); + auto dispatchData = SetDefault(prim_params); + OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func"); + kd.kernels[0].params.workGroups.global = dispatchData.gws; + kd.kernels[0].params.workGroups.local = dispatchData.lws; + }; + auto& kernel = kd.kernels[0]; - FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point); + FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, + EXE_MODE_DEFAULT, false, false, 1, 0, 1, newParams.has_dynamic_tensors()); return {kd}; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.h index 0cbe6903d20..dce8f1e30f1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/tile/tile_kernel_ref.h @@ -27,7 +27,7 @@ public: virtual ~TileKernelRef() {} virtual JitConstants GetJitConstants(const tile_params& params) const; - virtual CommonDispatchData SetDefault(const tile_params& params, const optional_params&) const; + virtual CommonDispatchData SetDefault(const tile_params& params) const; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp index d5b998b9344..6f6d1cc03be 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gather_elements_gpu_test.cpp @@ -10,6 +10,8 @@ #include #include +#include "gather_elements_inst.h" + #include #include @@ -1237,3 +1239,79 @@ TEST(gather_elements_gpu_fp16, export_import) { DoTest(engine, input0, input1, expected_results, tensor(2, 3, 3, 1, 1, 5), axis, true); } + +TEST(gather_elements_gpu, dynamic) { + auto& engine = get_test_engine(); + + auto axis = 3; + + ov::Shape in0_shape = { 1, 2, 1, 5, 2, 4 }; + ov::Shape in1_shape = { 1, 2, 1, 2, 2, 4 }; + + auto in0_dyn_layout = layout{ov::PartialShape::dynamic(in0_shape.size()), data_types::u8, format::bfwzyx}; + auto in1_dyn_layout = layout{ov::PartialShape::dynamic(in1_shape.size()), data_types::u8, format::bfwzyx}; + + auto input0 = engine.allocate_memory({in0_shape, data_types::u8, format::bfwzyx}); // data + auto input1 = engine.allocate_memory({in1_shape, data_types::u8, format::bfwzyx}); // indices + + set_values(input0, { + 0, 1, 8, 5, 5, 2, 0, 7, + 7, 10, 4, 5, 9, 0, 0, 5, + 7, 0, 4, 0, 4, 7, 6, 10, + 9, 5, 1, 7, 4, 7, 10, 8, + 2, 0, 8, 3, 6, 8, 10, 4, + 2, 10, 7, 8, 7, 0, 6, 9, + 2, 4, 8, 5, 2, 3, 3, 1, + 5, 9, 10, 0, 9, 5, 5, 3, + 10, 5, 2, 0, 10, 0, 5, 4, + 3, 10, 5, 5, 10, 0, 8, 8 + }); + + set_values(input1, { + 0, 2, 4, 3, + 4, 0, 0, 1, + 4, 0, 1, 0, + 1, 0, 1, 1, + 3, 1, 4, 2, + 4, 2, 1, 3, + 2, 1, 2, 4, + 1, 0, 2, 4 + }); + + std::vector expected_results = { + 0, 0, 8, 7, + 6, 2, 0, 5, + 2, 1, 4, 5, + 9, 2, 0, 5, + 10, 4, 5, 0, + 10, 5, 3, 4, + 5, 4, 10, 5, + 2, 0, 5, 8 + }; + + topology topology; + topology.add(input_layout("InputData", in0_dyn_layout)); + topology.add(input_layout("InputIndices", in1_dyn_layout)); + topology.add(gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), axis)); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + + network.set_input_data("InputData", input0); + network.set_input_data("InputIndices", input1); + + auto inst = network.get_primitive("gather_elements"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network.execute(); + + auto output = outputs.at("gather_elements").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < expected_results.size(); ++i) { + ASSERT_EQ(expected_results[i], output_ptr[i]); + } +} diff --git a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp index d02cc30aa76..826c7f0a4ee 100644 --- a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp @@ -6,6 +6,7 @@ #include #include +#include "tile_inst.h" #include @@ -275,6 +276,52 @@ TEST_F(tile_gpu, basic_in1x2x2x2_axis_z) { this->test_basic_in1x2x2x2_axis_z(false); } +TEST_F(tile_gpu, dynamic) { + auto& engine = get_test_engine(); + + ov::Shape input_shape = { 1, 2, 2, 2 }; + auto input_dyn_layout = layout{ ov::PartialShape::dynamic(input_shape.size()), data_types::f32, format::bfyx }; + auto input = engine.allocate_memory({ input_shape, data_types::f32, format::bfyx }); + + set_values(input, { 1.f, 0.f, + 5.f, 1.5f, + 2.f, 0.f, + 6.f, 5.2f }); + + topology topology; + topology.add(input_layout("input", input_dyn_layout)); + topology.add(tile("tile", input_info("input"), std::vector{ 1, 2, 1, 1 })); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + network network(engine, topology, config); + network.set_input_data("input", input); + + auto inst = network.get_primitive("tile"); + auto impl = inst->get_impl(); + ASSERT_TRUE(impl != nullptr); + ASSERT_TRUE(impl->is_dynamic()); + + auto outputs = network.execute(); + + auto output = outputs.at("tile").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + std::vector ref_data = { 1.f, 0.f, + 5.f, 1.5f, + 2.f, 0.f, + 6.f, 5.2f, + + 1.f, 0.f, + 5.f, 1.5f, + 2.f, 0.f, + 6.f, 5.2f }; + + for (size_t i = 0; i < ref_data.size(); ++i) { + ASSERT_EQ(output_ptr[i], ref_data[i]) << "Index=" << i; + } +} + namespace { template struct Params {