From 788e76722f6c2a8fcd1774c0591a3d4cd41d6e74 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Thu, 15 Jul 2021 12:05:34 +0300 Subject: [PATCH] [GPU] Impls refactoring (#6603) --- .../clDNN/api/cldnn/graph/program.hpp | 11 +- .../cldnn/primitives/implementation_desc.hpp | 70 +++++ .../thirdparty/clDNN/src/CMakeLists.txt | 15 +- .../clDNN/src/gpu/activation_gpu.cpp | 126 --------- .../clDNN/src/gpu/average_unpooling_gpu.cpp | 86 ------ .../clDNN/src/gpu/batch_to_space_gpu.cpp | 77 ----- .../thirdparty/clDNN/src/gpu/border_gpu.cpp | 100 ------- .../clDNN/src/gpu/concatenation_gpu.cpp | 163 ----------- .../thirdparty/clDNN/src/gpu/crop_gpu.cpp | 127 --------- .../clDNN/src/gpu/deconvolution_gpu.cpp | 173 ------------ .../clDNN/src/gpu/depth_to_space_gpu.cpp | 70 ----- .../thirdparty/clDNN/src/gpu/eltwise_gpu.cpp | 195 ------------- .../clDNN/src/gpu/gather_nd_gpu.cpp | 68 ----- .../thirdparty/clDNN/src/gpu/kd_selector.h | 266 ------------------ .../thirdparty/clDNN/src/gpu/lrn_gpu.cpp | 84 ------ .../clDNN/src/gpu/mutable_data_gpu.cpp | 32 --- .../thirdparty/clDNN/src/gpu/mvn_gpu.cpp | 106 ------- .../clDNN/src/gpu/normalize_gpu.cpp | 93 ------ .../thirdparty/clDNN/src/gpu/one_hot_gpu.cpp | 74 ----- .../thirdparty/clDNN/src/gpu/quantize_gpu.cpp | 174 ------------ .../thirdparty/clDNN/src/gpu/register_gpu.cpp | 86 ------ .../thirdparty/clDNN/src/gpu/scale_gpu.cpp | 144 ---------- .../thirdparty/clDNN/src/gpu/select_gpu.cpp | 70 ----- .../clDNN/src/gpu/shuffle_channels_gpu.cpp | 85 ------ .../clDNN/src/gpu/space_to_batch_gpu.cpp | 77 ----- .../clDNN/src/gpu/space_to_depth_gpu.cpp | 79 ------ .../thirdparty/clDNN/src/gpu/tile_gpu.cpp | 72 ----- .../graph_optimizer/add_required_reorders.cpp | 14 +- .../src/graph_optimizer/compile_graph.cpp | 4 +- .../graph_optimizer/post_input_reorder.cpp | 6 +- .../graph_optimizer/post_optimize_weights.cpp | 2 +- .../remove_redundant_reorders.cpp | 5 +- .../src/graph_optimizer/reorder_inputs.cpp | 3 + .../common/condition.cpp} | 26 +- .../loop_gpu.cpp => impls/common/loop.cpp} | 22 +- .../clDNN/src/impls/common/register.cpp | 22 ++ .../clDNN/src/impls/common/register.hpp | 35 +++ .../common/wait_for_events.cpp} | 33 ++- .../{gpu => impls/cpu}/cpu_impl_helpers.hpp | 0 .../cpu/detection_output.cpp} | 24 +- .../cpu/non_max_suppression.cpp} | 27 +- .../cpu/proposal.cpp} | 26 +- .../clDNN/src/impls/cpu/register.cpp | 20 ++ .../clDNN/src/impls/cpu/register.hpp | 31 ++ .../clDNN/src/impls/implementation_map.hpp | 188 +++++++++++++ .../clDNN/src/impls/ocl/activation.cpp | 123 ++++++++ .../ocl/arg_max_min.cpp} | 54 ++-- .../clDNN/src/impls/ocl/average_unpooling.cpp | 79 ++++++ .../clDNN/src/impls/ocl/batch_to_space.cpp | 78 +++++ .../ocl/binary_convolution.cpp} | 24 +- .../thirdparty/clDNN/src/impls/ocl/border.cpp | 96 +++++++ .../ocl/broadcast.cpp} | 46 +-- .../clDNN/src/impls/ocl/concatenation.cpp | 159 +++++++++++ .../ocl/convolution.cpp} | 104 ++++--- .../thirdparty/clDNN/src/impls/ocl/crop.cpp | 118 ++++++++ .../ocl/ctc_greedy_decoder.cpp} | 28 +- .../cum_sum_gpu.cpp => impls/ocl/cum_sum.cpp} | 33 +-- .../ocl/custom_primitive.cpp} | 35 +-- .../clDNN/src/impls/ocl/deconvolution.cpp | 148 ++++++++++ .../ocl/deformable_convolution.cpp} | 44 +-- .../clDNN/src/impls/ocl/depth_to_space.cpp | 71 +++++ .../clDNN/src/impls/ocl/eltwise.cpp | 188 +++++++++++++ .../ocl/embedding_bag.cpp} | 25 +- .../ocl/extract_image_patches.cpp} | 33 +-- .../ocl/fully_connected.cpp} | 61 ++-- .../ocl/fused_conv_eltwise.cpp} | 69 ++--- .../gather_gpu.cpp => impls/ocl/gather.cpp} | 41 ++- .../clDNN/src/impls/ocl/gather_nd.cpp | 67 +++++ .../ocl/gather_tree.cpp} | 37 ++- .../{gpu/gemm_gpu.cpp => impls/ocl/gemm.cpp} | 45 +-- .../ocl/generic_layer.cpp} | 35 ++- .../{gpu/grn_gpu.cpp => impls/ocl/grn.cpp} | 24 +- .../thirdparty/clDNN/src/impls/ocl/lrn.cpp | 82 ++++++ .../ocl/lstm_dynamic_input.cpp} | 26 +- .../ocl/lstm_dynamic_timeloop.cpp} | 26 +- .../ocl/lstm_elt.cpp} | 30 +- .../ocl/lstm_gemm.cpp} | 30 +- .../ocl/max_unpooling.cpp} | 47 ++-- .../clDNN/src/impls/ocl/mutable_data.cpp | 32 +++ .../thirdparty/clDNN/src/impls/ocl/mvn.cpp | 86 ++++++ .../clDNN/src/impls/ocl/normalize.cpp | 83 ++++++ .../clDNN/src/impls/ocl/one_hot.cpp | 74 +++++ .../permute_gpu.cpp => impls/ocl/permute.cpp} | 22 +- .../pooling_gpu.cpp => impls/ocl/pooling.cpp} | 123 ++++---- .../ocl/primitive_base.cpp} | 8 +- .../ocl/primitive_base.hpp} | 14 +- .../ocl/pyramid_roi_align.cpp} | 39 ++- .../clDNN/src/impls/ocl/quantize.cpp | 160 +++++++++++ .../reduce_gpu.cpp => impls/ocl/reduce.cpp} | 61 ++-- .../ocl/region_yolo.cpp} | 29 +- .../clDNN/src/impls/ocl/register.cpp | 79 ++++++ .../ocl/register.hpp} | 157 +++++------ .../reorder_gpu.cpp => impls/ocl/reorder.cpp} | 20 +- .../ocl/reorg_yolo.cpp} | 33 +-- .../ocl/resample.cpp} | 63 +++-- .../reshape_gpu.cpp => impls/ocl/reshape.cpp} | 22 +- .../ocl/reverse_sequence.cpp} | 31 +- .../ocl/roi_pooling.cpp} | 26 +- .../thirdparty/clDNN/src/impls/ocl/scale.cpp | 136 +++++++++ .../ocl/scatter_elements_update.cpp} | 41 ++- .../ocl/scatter_nd_update.cpp} | 41 ++- .../ocl/scatter_update.cpp} | 41 ++- .../thirdparty/clDNN/src/impls/ocl/select.cpp | 69 +++++ .../clDNN/src/impls/ocl/shuffle_channels.cpp | 82 ++++++ .../softmax_gpu.cpp => impls/ocl/softmax.cpp} | 37 +-- .../clDNN/src/impls/ocl/space_to_batch.cpp | 78 +++++ .../clDNN/src/impls/ocl/space_to_depth.cpp | 76 +++++ .../ocl/strided_slice.cpp} | 38 +-- .../thirdparty/clDNN/src/impls/ocl/tile.cpp | 70 +++++ .../clDNN/src/include/implementation_map.h | 174 ------------ .../clDNN/src/include/layout_optimizer.h | 3 +- .../clDNN/src/include/network_impl.h | 4 +- .../clDNN/src/include/primitive_inst.h | 1 + .../clDNN/src/include/primitive_type.h | 15 +- .../clDNN/src/include/primitive_type_base.h | 18 +- .../clDNN/src/include/program_node.h | 5 + .../clDNN/src/{gpu => }/kernel_runner.cpp | 0 .../clDNN/src/{gpu => }/kernel_runner.h | 0 .../thirdparty/clDNN/src/layout_optimizer.cpp | 22 +- .../thirdparty/clDNN/src/program.cpp | 8 +- .../thirdparty/clDNN/src/program_node.cpp | 3 + 121 files changed, 3590 insertions(+), 3851 deletions(-) create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/condition_gpu.cpp => impls/common/condition.cpp} (86%) rename inference-engine/thirdparty/clDNN/src/{gpu/loop_gpu.cpp => impls/common/loop.cpp} (94%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/common/register.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/common/register.hpp rename inference-engine/thirdparty/clDNN/src/{gpu/wait_for_events_gpu.cpp => impls/common/wait_for_events.cpp} (55%) rename inference-engine/thirdparty/clDNN/src/{gpu => impls/cpu}/cpu_impl_helpers.hpp (100%) rename inference-engine/thirdparty/clDNN/src/{gpu/detection_output_cpu.cpp => impls/cpu/detection_output.cpp} (98%) rename inference-engine/thirdparty/clDNN/src/{gpu/non_max_suppression_cpu.cpp => impls/cpu/non_max_suppression.cpp} (94%) rename inference-engine/thirdparty/clDNN/src/{gpu/proposal_gpu.cpp => impls/cpu/proposal.cpp} (96%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/arg_max_min_gpu.cpp => impls/ocl/arg_max_min.cpp} (62%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/binary_convolution_gpu.cpp => impls/ocl/binary_convolution.cpp} (90%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/broadcast_gpu.cpp => impls/ocl/broadcast.cpp} (51%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/convolution_gpu.cpp => impls/ocl/convolution.cpp} (57%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/ctc_greedy_decoder_gpu.cpp => impls/ocl/ctc_greedy_decoder.cpp} (64%) rename inference-engine/thirdparty/clDNN/src/{gpu/cum_sum_gpu.cpp => impls/ocl/cum_sum.cpp} (66%) rename inference-engine/thirdparty/clDNN/src/{gpu/custom_gpu_primitive_gpu.cpp => impls/ocl/custom_primitive.cpp} (91%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/deformable_convolution_gpu.cpp => impls/ocl/deformable_convolution.cpp} (78%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/embedding_bag_gpu.cpp => impls/ocl/embedding_bag.cpp} (75%) rename inference-engine/thirdparty/clDNN/src/{gpu/extract_image_patches_gpu.cpp => impls/ocl/extract_image_patches.cpp} (55%) rename inference-engine/thirdparty/clDNN/src/{gpu/fully_connected_gpu.cpp => impls/ocl/fully_connected.cpp} (54%) rename inference-engine/thirdparty/clDNN/src/{gpu/fused_conv_eltwise_gpu.cpp => impls/ocl/fused_conv_eltwise.cpp} (62%) rename inference-engine/thirdparty/clDNN/src/{gpu/gather_gpu.cpp => impls/ocl/gather.cpp} (60%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/gather_tree_gpu.cpp => impls/ocl/gather_tree.cpp} (52%) rename inference-engine/thirdparty/clDNN/src/{gpu/gemm_gpu.cpp => impls/ocl/gemm.cpp} (52%) rename inference-engine/thirdparty/clDNN/src/{gpu/generic_layer_gpu.cpp => impls/ocl/generic_layer.cpp} (82%) rename inference-engine/thirdparty/clDNN/src/{gpu/grn_gpu.cpp => impls/ocl/grn.cpp} (68%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_dynamic_input_gpu.cpp => impls/ocl/lstm_dynamic_input.cpp} (77%) rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_dynamic_timeloop_gpu.cpp => impls/ocl/lstm_dynamic_timeloop.cpp} (83%) rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_elt_gpu.cpp => impls/ocl/lstm_elt.cpp} (81%) rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_gemm_gpu.cpp => impls/ocl/lstm_gemm.cpp} (80%) rename inference-engine/thirdparty/clDNN/src/{gpu/max_unpooling_gpu.cpp => impls/ocl/max_unpooling.cpp} (51%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/permute_gpu.cpp => impls/ocl/permute.cpp} (74%) rename inference-engine/thirdparty/clDNN/src/{gpu/pooling_gpu.cpp => impls/ocl/pooling.cpp} (51%) rename inference-engine/thirdparty/clDNN/src/{gpu/primitive_gpu_base.cpp => impls/ocl/primitive_base.cpp} (87%) rename inference-engine/thirdparty/clDNN/src/{gpu/primitive_gpu_base.h => impls/ocl/primitive_base.hpp} (96%) rename inference-engine/thirdparty/clDNN/src/{gpu/pyramid_roi_align_gpu.cpp => impls/ocl/pyramid_roi_align.cpp} (59%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/reduce_gpu.cpp => impls/ocl/reduce.cpp} (50%) rename inference-engine/thirdparty/clDNN/src/{gpu/region_yolo_gpu.cpp => impls/ocl/region_yolo.cpp} (63%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/register_gpu.hpp => impls/ocl/register.hpp} (51%) rename inference-engine/thirdparty/clDNN/src/{gpu/reorder_gpu.cpp => impls/ocl/reorder.cpp} (90%) rename inference-engine/thirdparty/clDNN/src/{gpu/reorg_yolo_gpu.cpp => impls/ocl/reorg_yolo.cpp} (53%) rename inference-engine/thirdparty/clDNN/src/{gpu/resample_gpu.cpp => impls/ocl/resample.cpp} (69%) rename inference-engine/thirdparty/clDNN/src/{gpu/reshape_gpu.cpp => impls/ocl/reshape.cpp} (70%) rename inference-engine/thirdparty/clDNN/src/{gpu/reverse_sequence_gpu.cpp => impls/ocl/reverse_sequence.cpp} (59%) rename inference-engine/thirdparty/clDNN/src/{gpu/roi_pooling_gpu.cpp => impls/ocl/roi_pooling.cpp} (85%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/scatter_elements_update_gpu.cpp => impls/ocl/scatter_elements_update.cpp} (62%) rename inference-engine/thirdparty/clDNN/src/{gpu/scatter_nd_update_gpu.cpp => impls/ocl/scatter_nd_update.cpp} (50%) rename inference-engine/thirdparty/clDNN/src/{gpu/scatter_update_gpu.cpp => impls/ocl/scatter_update.cpp} (61%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/select.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/shuffle_channels.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/softmax_gpu.cpp => impls/ocl/softmax.cpp} (65%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_batch.cpp create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_depth.cpp rename inference-engine/thirdparty/clDNN/src/{gpu/strided_slice_gpu.cpp => impls/ocl/strided_slice.cpp} (78%) create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/tile.cpp delete mode 100644 inference-engine/thirdparty/clDNN/src/include/implementation_map.h rename inference-engine/thirdparty/clDNN/src/{gpu => }/kernel_runner.cpp (100%) rename inference-engine/thirdparty/clDNN/src/{gpu => }/kernel_runner.h (100%) diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp index 95dfca9fc14..5f1a7cc4925 100644 --- a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp +++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp @@ -2,11 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -/////////////////////////////////////////////////////////////////////////////////////////////////// - #pragma once #include "cldnn/runtime/engine.hpp" +#include "cldnn/primitives/implementation_desc.hpp" #include "topology.hpp" @@ -99,14 +98,6 @@ struct learning_params { learning_params() : momentum(0.9f), weights_decay(0.0005f) {} }; -/// @brief Description of primitives implementation. -struct implementation_desc { - format::type output_format; ///< Output format. - std::string kernel_name; ///< GPU kernel name. -}; - -using implementation_forcing_map = std::map; - /// @brief Represents user-provided program build option. struct build_option { /// @brief Allow primitives fusing during program build (default: false). diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp new file mode 100644 index 00000000000..4ce5fcab2e8 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp @@ -0,0 +1,70 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cldnn/runtime/tensor.hpp" + +#include +#include + +namespace cldnn { + +/// @brief Primitives implementation type. +enum class impl_types : uint8_t { + cpu = 1 << 0, + common = 1 << 1, + ocl = 1 << 2, + any = 0xFF, +}; + +inline impl_types operator&(impl_types a, impl_types b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) & static_cast(b)); +} + +inline impl_types operator|(impl_types a, impl_types b) { + typedef std::underlying_type::type underlying_type; + return static_cast(static_cast(a) | static_cast(b)); +} + +inline impl_types operator~(impl_types a) { + typedef std::underlying_type::type underlying_type; + return static_cast(~static_cast(a)); +} + +inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) { + switch (impl_type) { + case impl_types::cpu: out << "cpu"; break; + case impl_types::common: out << "common"; break; + case impl_types::ocl: out << "ocl"; break; + case impl_types::any: out << "any"; break; + default: out << "unknown"; break; + } + + return out; +} + +/// @brief Description of primitives implementation. +struct implementation_desc { + format::type output_format; ///< Output format. + std::string kernel_name; ///< GPU kernel name. + impl_types impl_type; ///< GPU implementation type. + + implementation_desc() : + output_format(format::any), + kernel_name(""), + impl_type(impl_types::any) {} + + implementation_desc(format::type output_format, + std::string kernel_name, + impl_types impl_type = impl_types::any) : + output_format(output_format), + kernel_name(kernel_name), + impl_type(impl_type) {} +}; + +using implementation_forcing_map = std::map; + +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt index b9c0af334bc..2dff45a2007 100644 --- a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt +++ b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt @@ -38,12 +38,15 @@ file(GLOB __CLDNN_Headers__include "${__CLDNN_Directory__include}/*.hpp" ) -set(__CLDNN_Directory__gpu "${CMAKE_CURRENT_SOURCE_DIR}/gpu") -set(__CLDNN_Label__gpu "gpu") +set(__CLDNN_Directory__impls "${CMAKE_CURRENT_SOURCE_DIR}/impls") +set(__CLDNN_Label__gpu "impls") file(GLOB __CLDNN_Sources__gpu - "${__CLDNN_Directory__gpu}/*.h" - "${__CLDNN_Directory__gpu}/*.hpp" - "${__CLDNN_Directory__gpu}/*.cpp" + "${__CLDNN_Directory__impls}/common/*.hpp" + "${__CLDNN_Directory__impls}/common/*.cpp" + "${__CLDNN_Directory__impls}/cpu/*.hpp" + "${__CLDNN_Directory__impls}/cpu/*.cpp" + "${__CLDNN_Directory__impls}/ocl/*.hpp" + "${__CLDNN_Directory__impls}/ocl/*.cpp" ) set(__CLDNN_Directory__cg_cache "${CLDNN__CODEGEN_INCDIR}") @@ -130,5 +133,5 @@ endif() # ====================================================================================================== ie_sse42_optimization_flags(sse4_2_flags) -set_source_files_properties(gpu/detection_output_cpu.cpp half.cpp +set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES COMPILE_FLAGS "${sse4_2_flags}") diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp deleted file mode 100644 index 9a90f972752..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "activation_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "cldnn/runtime/error_handler.hpp" -#include "kernel_selector_helper.h" -#include "activation/activation_kernel_selector.h" -#include "activation/activation_kernel_base.h" -#include "register_gpu.hpp" - -namespace cldnn { -namespace gpu { - -struct activation_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { - kernel_arguments_data args = parent::get_arguments(instance, split); - - if (_outer.is_parameterized()) { - args.slope = instance.slope_memory(); - } - - return args; - } - - static primitive_impl* create(const activation_node& arg) { - auto activation_params = get_default_params(arg); - auto activation_optional_params = - get_default_optional_params(arg.get_program()); - - convert_new_activation_func(arg.get_primitive(), activation_params.activations); - - if (arg.is_parameterized()) { - const auto& slope_layout = arg.slope_input().get_output_layout(); - const auto& output_layout = arg.get_output_layout(); - - const auto params_num = - kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function); - - CLDNN_ERROR_LESS_THAN(arg.id(), - "Slope layout size count", - slope_layout.size.count(), - "output_layout.size.feature[0] * params_num", - static_cast(output_layout.size.feature[0] * params_num), - "Error - not enough data inside additional params buffer"); - - activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout)); - } - - auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params); - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto activation = new activation_gpu(arg, best_kernels[0]); - - return activation; - } -}; - -namespace detail { - -attach_activation_gpu::attach_activation_gpu() { - auto val_fw = activation_gpu::create; - - implementation_map::add({ - {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw}, - // block f16 format - {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw}, - // 3D - {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw}, - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw }, - // bfwzyx - {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw}, - // fs_b_yx_fsv32 - {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw}, - }); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp deleted file mode 100644 index 55ad712d4d8..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "average_unpooling_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "cldnn/runtime/error_handler.hpp" -#include "kernel_selector_helper.h" -#include "average_unpooling/average_unpooling_kernel_selector.h" -#include "average_unpooling/average_unpooling_kernel_base.h" - -namespace cldnn { -namespace gpu { - -struct average_unpooling_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { - kernel_arguments_data args = parent::get_arguments(instance, split); - return args; - } - -public: - static primitive_impl* create(const average_unpooling_node& arg) { - auto average_unpooling_params = get_default_params(arg); - auto average_unpooling_optional_params = - get_default_optional_params(arg.get_program()); - auto& params = average_unpooling_params; - - auto primitive = arg.get_primitive(); - auto stride = primitive->stride; - - params.unpoolSize = { - (uint32_t)primitive->size.spatial[0], - (uint32_t)primitive->size.spatial[1], - }; - - params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]}; - - auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto average_unpool = new average_unpooling_gpu(arg, best_kernels[0]); - - return average_unpool; - } -}; - -namespace detail { - -attach_average_unpooling_gpu::attach_average_unpooling_gpu() { - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), - average_unpooling_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), - average_unpooling_gpu::create); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp deleted file mode 100644 index d2ef8e70df9..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "batch_to_space_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "kernel_selector_helper.h" -#include "batch_to_space/batch_to_space_kernel_selector.h" -#include "batch_to_space/batch_to_space_kernel_ref.h" -#include "cldnn/runtime/error_handler.hpp" -#include "data_inst.h" -#include - -using namespace cldnn; - -namespace cldnn { -namespace gpu { -struct batch_to_space_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -public: - static primitive_impl* create(const batch_to_space_node& arg) { - auto batch_to_space_params = get_default_params(arg); - auto batch_to_space_optional_params = - get_default_optional_params(arg.get_program()); - - auto primitive = arg.get_primitive(); - - batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape); - batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin); - batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end); - - auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto batch_to_space = new batch_to_space_gpu(arg, best_kernels[0]); - - return batch_to_space; - } -}; - -namespace detail { - -attach_batch_to_space_gpu::attach_batch_to_space_gpu() { - auto val_fw = batch_to_space_gpu::create; - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp deleted file mode 100644 index 01c1d54ad10..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "border_inst.h" - -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "kernel_selector_helper.h" -#include "border/border_kernel_selector.h" -#include "border/border_kernel_base.h" -#include "cldnn/runtime/error_handler.hpp" - -namespace cldnn { -namespace gpu { - -struct border_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - - static primitive_impl* create(const border_node& arg) { - auto b_params = get_default_params(arg, 1); - auto b_optional_params = - get_default_optional_params(arg.get_program()); - - auto desc = arg.get_primitive(); - - b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes); - b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes); - b_params.border_value = desc->border_value; - - switch (desc->type) { - case border_type::constant: - b_params.b_type = kernel_selector::border_type::CONSTANT; - break; - case border_type::edge: - b_params.b_type = kernel_selector::border_type::EDGE; - break; - case border_type::mirror: - b_params.b_type = kernel_selector::border_type::MIRROR; - break; - case border_type::mirror_101: - b_params.b_type = kernel_selector::border_type::MIRROR_101; - break; - default: - assert( - false && - "Encountered unhandled enum case: border_type during translation to kernel selector enumeration."); - } - - auto& kernel_selector = kernel_selector::border_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - return new border_gpu(arg, best_kernels[0]); - } -}; - -namespace detail { - -attach_border_gpu::attach_border_gpu() { - auto val_fw = border_gpu::create; - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp deleted file mode 100644 index 590e97a2a8d..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "concatenation_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "cldnn/runtime/error_handler.hpp" -#include "kernel_selector_helper.h" -#include "concatenation/concatenation_kernel_selector.h" -#include "concatenation/concatenation_kernel_base.h" - -#include - -namespace cldnn { -namespace gpu { - -namespace { -kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) { - switch (axis) { - case concatenation::along_x: - return kernel_selector::concat_axis::X; - case concatenation::along_y: - return kernel_selector::concat_axis::Y; - case concatenation::along_z: - return kernel_selector::concat_axis::Z; - case concatenation::along_w: - return kernel_selector::concat_axis::W; - case concatenation::along_f: - return kernel_selector::concat_axis::FEATURE; - case concatenation::along_b: - return kernel_selector::concat_axis::BATCH; - default: - return kernel_selector::concat_axis::X; - } -} -} // namespace - -struct concatenation_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - - concatenation_gpu(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) { - if (!_outer.can_be_optimized()) { - CLDNN_ERROR_NOT_EQUAL(_outer.id(), - "Input count", - _outer.inputs_count(), - "kds size", - kd.kernels.size(), - "Error - not enough kernels for concatenation"); - } - } - -protected: - bool optimized_out(concatenation_inst& instance) const override { - return parent::optimized_out(instance) || _outer.can_be_optimized(); - } - -public: - static primitive_impl* create(const concatenation_node& arg) { - if (arg.can_be_optimized()) { - return new concatenation_gpu(arg, {}); - } - - auto concat_params = get_default_params(arg); - auto concat_optional_params = - get_default_optional_params(arg.get_program()); - auto axis = arg.get_primitive()->axis; - - concat_params.inputs.resize(arg.inputs_count()); - for (size_t i = 0; i < arg.inputs_count(); ++i) { - const layout& input_layout = arg.input(i).get_output_layout(); - concat_params.inputs[i] = convert_data_tensor(input_layout); - } - - concat_params.axis = convert_axis(axis); - concat_optional_params.kernelPerInput = true; - - auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params); - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - concatenation_gpu* concat = new concatenation_gpu(arg, best_kernels[0]); - - return concat; - } -}; - -namespace detail { - -attach_concatenation_gpu::attach_concatenation_gpu() { - implementation_map::add({ - {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create}, - // 5D - { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create }, - // block f16 format - {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), concatenation_gpu::create}, - // MMAD - {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), concatenation_gpu::create}, - // 6D - {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create}, - {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), concatenation_gpu::create}, - }); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp deleted file mode 100644 index 07244dde5ae..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "crop_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "kernel_selector_helper.h" -#include "eltwise/eltwise_kernel_selector.h" -#include "eltwise/eltwise_kernel_base.h" -#include "cldnn/runtime/error_handler.hpp" - -namespace cldnn { -namespace gpu { - -struct crop_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -protected: - bool optimized_out(crop_inst& instance) const override { - return parent::optimized_out(instance) || _outer.can_be_optimized(); - } - -public: - static primitive_impl* create(const crop_node& arg) { - auto ew_params = get_default_params(arg, 1); - auto ew_optional_params = - get_default_optional_params(arg.get_program()); - - ew_params.operations.push_back( - {{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN}); - - const auto& input_layout = arg.input().get_output_layout(); - ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets); - - auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto crop = new crop_gpu(arg, best_kernels[0]); - - return crop; - } -}; - -namespace detail { - -attach_crop_gpu::attach_crop_gpu() { - auto val_fw = crop_gpu::create; - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp deleted file mode 100644 index 9c1532ecfd4..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp +++ /dev/null @@ -1,173 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "deconvolution_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "cldnn/runtime/error_handler.hpp" -#include "kernel_selector_helper.h" -#include "deconvolution/deconvolution_kernel_selector.h" -#include "deconvolution/deconvolution_kernel_base.h" -#include - -namespace cldnn { -namespace gpu { - -struct deconvolution_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -protected: - // TODO: share it with convolution and fully connected - bool validate_impl(const typed_primitive_inst&) const override { - bool res = true; - - CLDNN_ERROR_NOT_EQUAL(_outer.id(), - "deconvolution filling value", - _outer.get_output_layout().data_padding.filling_value(), - "padding mode", - 0.0f, - "Unknown padding mode in deconvolution."); - - return res; - } - - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { - kernel_arguments_data args = parent::get_arguments(instance, split); - - args.weights = instance.weights_memory(split); - args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr; - - return args; - } - - int32_t get_split() const override { return _outer.get_split(); } - - uint32_t get_groups() const override { return _outer.get_groups(); } - -public: - static primitive_impl* create(const deconvolution_node& arg) { - const auto& primitive = arg.get_primitive(); - const auto& weights_layout = arg.weights(0).get_output_layout(); - - const auto& weights_size = weights_layout.size; - - const auto& split = primitive->split(); - const auto& stride = primitive->stride; -#if 0 // TODO: support dilation - const auto& dilation = primitive->dilation; -#else - const tensor dilation = {0, 0, 1, 1, 1}; -#endif - const auto actual_split = split; - - const auto& input_offset = primitive->input_offset; - const auto& groups = primitive->groups; - - auto deconv_params = get_weights_bias_default_params( - arg, - (groups > 1) ? 1 : actual_split, - 1, - primitive->grouped_weights_shape); - auto deconv_optional_params = - get_default_weights_bias_optional_params(arg.get_program()); - - deconv_params.split = split; - deconv_params.groups = groups; - - auto spatial_size = arg.get_output_layout().format.dimension() - 2; - uint32_t kx = weights_size.spatial[0]; - uint32_t ky = weights_size.spatial[1]; - uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2]; - deconv_params.filterSize = { kx, ky, kz }; - - deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0), - (uint32_t)std::max(-input_offset.spatial[1], 0), - (uint32_t)std::max(-input_offset.spatial[2], 0)}; - - deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]}; - - deconv_params.dilation = {(uint32_t)dilation.spatial[0], - (uint32_t)dilation.spatial[1], - (uint32_t)dilation.spatial[2]}; - - auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with these arguments"); - auto deconv = new deconvolution_gpu(arg, best_kernels[0]); - - return deconv; - } -}; - -namespace detail { - -attach_deconvolution_gpu::attach_deconvolution_gpu() { - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), - deconvolution_gpu::create); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), - deconvolution_gpu::create); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp deleted file mode 100644 index 3a62c9e0a9c..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "depth_to_space_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "kernel_selector_helper.h" -#include "depth_to_space/depth_to_space_kernel_selector.h" -#include "depth_to_space/depth_to_space_kernel_ref.h" -#include "cldnn/runtime/error_handler.hpp" -#include "common_types.h" - -using namespace cldnn; - -namespace cldnn { -namespace gpu { -struct depth_to_space_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -public: - static primitive_impl* create(const depth_to_space_node& arg) { - auto depth_to_space_params = get_default_params(arg); - auto depth_to_space_optional_params = - get_default_optional_params(arg.get_program()); - - depth_to_space_params.block_size = arg.get_primitive()->block_size; - depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST - : kernel_selector::depth_to_space_mode::DEPTH_FIRST; - - auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto depth_to_space = new depth_to_space_gpu(arg, best_kernels[0]); - - return depth_to_space; - } -}; - -namespace detail { - -attach_depth_to_space_gpu::attach_depth_to_space_gpu() { - auto val_fw = depth_to_space_gpu::create; - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp deleted file mode 100644 index 8ffc149e160..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "eltwise_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "cldnn/runtime/error_handler.hpp" -#include "kernel_selector_helper.h" -#include "eltwise/eltwise_kernel_selector.h" -#include "eltwise/eltwise_kernel_base.h" -#include - -namespace cldnn { -namespace gpu { - -struct eltwise_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - -protected: - kernel_arguments_data get_arguments(typed_primitive_inst& instance, int32_t split) const override { - kernel_arguments_data args = parent::get_arguments(instance, split); - return args; - } - -public: - static primitive_impl* create(const eltwise_node& arg) { - auto ew_params = get_default_params(arg); - auto ew_optional_params = - get_default_optional_params(arg.get_program()); - - for (size_t i = 1; i < arg.inputs_count(); i++) { - ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout())); - } - - const auto& primitive = arg.get_primitive(); - - ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0), - kernel_selector::eltwise_params::InputType::Buffer(1)}, - convert_to_eltwise_mode(primitive->mode)}); - - for (uint32_t i = 2; i < static_cast(arg.inputs_count()); i++) { - ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2), - kernel_selector::eltwise_params::InputType::Buffer(i)}, - convert_to_eltwise_mode(primitive->mode)}); - } - - if (primitive->mode == eltwise_mode::sum) { - ew_params.coefficients = primitive->coefficients; - } - - for (size_t i = 0; i < ew_params.inputs.size(); i++) { - if (!ew_params.inputs[i].SameDims(ew_params.output)) { - std::vector input_size = arg.input(i).get_output_layout().size.raw.vector(); - std::vector output_size = arg.get_output_layout().size.raw.vector(); - bool broadcast = false; - for (size_t d = 0; d < output_size.size(); d++) { - if (output_size[d] != 1 && input_size[d] == 1) - broadcast = true; - } - if (broadcast) { - ew_params.broadcast = true; - break; - } else { - ew_params.layoutBased = true; - break; - } - } - } - - // stride - if (!primitive->stride.empty()) { - const auto& stride = primitive->stride; - ew_params.stride.resize(stride.size()); - for (size_t i = 0; i < primitive->stride.size(); i++) { - ew_params.stride[i] = {(uint32_t)stride[i].spatial[0], - (uint32_t)stride[i].spatial[1], - (uint32_t)stride[i].spatial[2]}; - } - } - - // check if strides are the same - if (!ew_params.stride.empty()) { - const auto& stride = ew_params.stride[0]; - for (size_t i = 1; i < ew_params.stride.size(); i++) { - if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y) - ew_params.layoutBased = true; - } - } else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) { - ew_params.broadcast = true; - } - - // TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough - bool quantization = true; - for (size_t i = 0; i < arg.inputs_count(); i++) { - if (arg.input(i).get_output_layout().data_type != data_types::u8 && - arg.input(i).get_output_layout().data_type != data_types::i8) { - quantization = false; - } - } - ew_params.int8_quantization = quantization; - - auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto eltwise = new eltwise_gpu(arg, best_kernels[0]); - - return eltwise; - } -}; - -namespace detail { - -attach_eltwise_gpu::attach_eltwise_gpu() { - implementation_map::add( - {{ std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create }, - // block f16 - { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), eltwise_gpu::create }, - // 3D - { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create }, - // 4D - { std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create }, - - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create }, - - { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create }, - // MMAD - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create }, - { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create }, - - // - { std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }}); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp deleted file mode 100644 index cf7d692651a..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "gather_nd_inst.h" -#include "primitive_gpu_base.h" -#include "implementation_map.h" -#include "kernel_selector_helper.h" -#include "gather/gather_nd_kernel_selector.h" -#include "gather/gather_nd_kernel_ref.h" - -using namespace cldnn; - -namespace cldnn { -namespace gpu { - -struct gather_nd_gpu : typed_primitive_gpu_impl { - using parent = typed_primitive_gpu_impl; - using parent::parent; - - std::unique_ptr clone() const override { - return make_unique(*this); - } - - static primitive_impl* create(const gather_nd_node& arg) { - auto gather_nd_params = get_default_params(arg); - auto gather_nd_optional_params = - get_default_optional_params(arg.get_program()); - - gather_nd_params.indices_rank = arg.get_primitive()->indices_rank; - gather_nd_params.batch_dims = arg.get_primitive()->batch_dims; - - gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout())); - - auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance(); - auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params); - - CLDNN_ERROR_BOOL(arg.id(), - "Best_kernel.empty()", - best_kernels.empty(), - "Cannot find a proper kernel with this arguments"); - - auto gather_nd = new gather_nd_gpu(arg, best_kernels[0]); - - return gather_nd; - } -}; - -namespace detail { - -attach_gather_nd_gpu::attach_gather_nd_gpu() { - auto val_fw = gather_nd_gpu::create; - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw); - - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw); -} - -} // namespace detail -} // namespace gpu -} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h b/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h deleted file mode 100644 index 32c9694e155..00000000000 --- a/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include - -namespace cldnn { -namespace gpu { -namespace mputils { -template -struct type_tuple; - -template -struct index_tuple {}; - -// ----------------------------------------------------------------------------------------------------------------------- - -template -struct count_tt; - -template -struct count_tt, ElemTy> - : std::integral_constant, ElemTy>::value + - static_cast(std::is_same::value)> {}; - -template -struct count_tt, ElemTy> : std::integral_constant {}; - -// ----------------------------------------------------------------------------------------------------------------------- - -template -struct size_tt; - -template -struct size_tt> : std::integral_constant {}; - -// ----------------------------------------------------------------------------------------------------------------------- - -template -struct split_tt; - -namespace detail { -template -struct split_tt_helper1; - -template -struct split_tt_helper1, ElemTy, type_tuple> - : split_tt_helper1, ElemTy, type_tuple> {}; - -template -struct split_tt_helper1, Ty, type_tuple> { - using first_type = type_tuple; - using second_type = type_tuple; -}; - -template -struct split_tt_helper1, ElemTy, type_tuple> { - using first_type = type_tuple<>; - using second_type = type_tuple; -}; -} // namespace detail - -template -struct split_tt, ElemTy> : detail::split_tt_helper1, ElemTy, type_tuple<>> {}; - -// ----------------------------------------------------------------------------------------------------------------------- - -template -struct index_of_tt; - -static constexpr std::size_t npos = static_cast(-1); - -namespace detail { -template -struct index_of_tt_helper1; - -template -struct index_of_tt_helper1, ElemTy, Pos> - : index_of_tt_helper1, ElemTy, Pos + 1> {}; - -template -struct index_of_tt_helper1, Ty, Pos> : std::integral_constant {}; - -template -struct index_of_tt_helper1, ElemTy, Pos> : std::integral_constant {}; -} // namespace detail - -template -struct index_of_tt, ElemTy> : detail::index_of_tt_helper1, ElemTy, 0> {}; - -// ----------------------------------------------------------------------------------------------------------------------- - -template -struct remove_tt; - -namespace detail { -template -struct remove_tt_helper1; - -template -struct remove_tt_helper1, ElemTy, type_tuple> - : remove_tt_helper1, ElemTy, type_tuple> {}; - -template -struct remove_tt_helper1, Ty, type_tuple> - : remove_tt_helper1, Ty, type_tuple> {}; - -template -struct remove_tt_helper1, ElemTy, type_tuple> { - using type = type_tuple; -}; -} // namespace detail - -template -struct remove_tt, ElemTy> : detail::remove_tt_helper1, ElemTy, type_tuple<>> {}; - -template -using remove_tt_t = typename remove_tt::type; - -// ----------------------------------------------------------------------------------------------------------------------- - -template