From 788e76722f6c2a8fcd1774c0591a3d4cd41d6e74 Mon Sep 17 00:00:00 2001
From: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Date: Thu, 15 Jul 2021 12:05:34 +0300
Subject: [PATCH] [GPU] Impls refactoring (#6603)

---
 .../clDNN/api/cldnn/graph/program.hpp         |  11 +-
 .../cldnn/primitives/implementation_desc.hpp  |  70 +++++
 .../thirdparty/clDNN/src/CMakeLists.txt       |  15 +-
 .../clDNN/src/gpu/activation_gpu.cpp          | 126 ---------
 .../clDNN/src/gpu/average_unpooling_gpu.cpp   |  86 ------
 .../clDNN/src/gpu/batch_to_space_gpu.cpp      |  77 -----
 .../thirdparty/clDNN/src/gpu/border_gpu.cpp   | 100 -------
 .../clDNN/src/gpu/concatenation_gpu.cpp       | 163 -----------
 .../thirdparty/clDNN/src/gpu/crop_gpu.cpp     | 127 ---------
 .../clDNN/src/gpu/deconvolution_gpu.cpp       | 173 ------------
 .../clDNN/src/gpu/depth_to_space_gpu.cpp      |  70 -----
 .../thirdparty/clDNN/src/gpu/eltwise_gpu.cpp  | 195 -------------
 .../clDNN/src/gpu/gather_nd_gpu.cpp           |  68 -----
 .../thirdparty/clDNN/src/gpu/kd_selector.h    | 266 ------------------
 .../thirdparty/clDNN/src/gpu/lrn_gpu.cpp      |  84 ------
 .../clDNN/src/gpu/mutable_data_gpu.cpp        |  32 ---
 .../thirdparty/clDNN/src/gpu/mvn_gpu.cpp      | 106 -------
 .../clDNN/src/gpu/normalize_gpu.cpp           |  93 ------
 .../thirdparty/clDNN/src/gpu/one_hot_gpu.cpp  |  74 -----
 .../thirdparty/clDNN/src/gpu/quantize_gpu.cpp | 174 ------------
 .../thirdparty/clDNN/src/gpu/register_gpu.cpp |  86 ------
 .../thirdparty/clDNN/src/gpu/scale_gpu.cpp    | 144 ----------
 .../thirdparty/clDNN/src/gpu/select_gpu.cpp   |  70 -----
 .../clDNN/src/gpu/shuffle_channels_gpu.cpp    |  85 ------
 .../clDNN/src/gpu/space_to_batch_gpu.cpp      |  77 -----
 .../clDNN/src/gpu/space_to_depth_gpu.cpp      |  79 ------
 .../thirdparty/clDNN/src/gpu/tile_gpu.cpp     |  72 -----
 .../graph_optimizer/add_required_reorders.cpp |  14 +-
 .../src/graph_optimizer/compile_graph.cpp     |   4 +-
 .../graph_optimizer/post_input_reorder.cpp    |   6 +-
 .../graph_optimizer/post_optimize_weights.cpp |   2 +-
 .../remove_redundant_reorders.cpp             |   5 +-
 .../src/graph_optimizer/reorder_inputs.cpp    |   3 +
 .../common/condition.cpp}                     |  26 +-
 .../loop_gpu.cpp => impls/common/loop.cpp}    |  22 +-
 .../clDNN/src/impls/common/register.cpp       |  22 ++
 .../clDNN/src/impls/common/register.hpp       |  35 +++
 .../common/wait_for_events.cpp}               |  33 ++-
 .../{gpu => impls/cpu}/cpu_impl_helpers.hpp   |   0
 .../cpu/detection_output.cpp}                 |  24 +-
 .../cpu/non_max_suppression.cpp}              |  27 +-
 .../cpu/proposal.cpp}                         |  26 +-
 .../clDNN/src/impls/cpu/register.cpp          |  20 ++
 .../clDNN/src/impls/cpu/register.hpp          |  31 ++
 .../clDNN/src/impls/implementation_map.hpp    | 188 +++++++++++++
 .../clDNN/src/impls/ocl/activation.cpp        | 123 ++++++++
 .../ocl/arg_max_min.cpp}                      |  54 ++--
 .../clDNN/src/impls/ocl/average_unpooling.cpp |  79 ++++++
 .../clDNN/src/impls/ocl/batch_to_space.cpp    |  78 +++++
 .../ocl/binary_convolution.cpp}               |  24 +-
 .../thirdparty/clDNN/src/impls/ocl/border.cpp |  96 +++++++
 .../ocl/broadcast.cpp}                        |  46 +--
 .../clDNN/src/impls/ocl/concatenation.cpp     | 159 +++++++++++
 .../ocl/convolution.cpp}                      | 104 ++++---
 .../thirdparty/clDNN/src/impls/ocl/crop.cpp   | 118 ++++++++
 .../ocl/ctc_greedy_decoder.cpp}               |  28 +-
 .../cum_sum_gpu.cpp => impls/ocl/cum_sum.cpp} |  33 +--
 .../ocl/custom_primitive.cpp}                 |  35 +--
 .../clDNN/src/impls/ocl/deconvolution.cpp     | 148 ++++++++++
 .../ocl/deformable_convolution.cpp}           |  44 +--
 .../clDNN/src/impls/ocl/depth_to_space.cpp    |  71 +++++
 .../clDNN/src/impls/ocl/eltwise.cpp           | 188 +++++++++++++
 .../ocl/embedding_bag.cpp}                    |  25 +-
 .../ocl/extract_image_patches.cpp}            |  33 +--
 .../ocl/fully_connected.cpp}                  |  61 ++--
 .../ocl/fused_conv_eltwise.cpp}               |  69 ++---
 .../gather_gpu.cpp => impls/ocl/gather.cpp}   |  41 ++-
 .../clDNN/src/impls/ocl/gather_nd.cpp         |  67 +++++
 .../ocl/gather_tree.cpp}                      |  37 ++-
 .../{gpu/gemm_gpu.cpp => impls/ocl/gemm.cpp}  |  45 +--
 .../ocl/generic_layer.cpp}                    |  35 ++-
 .../{gpu/grn_gpu.cpp => impls/ocl/grn.cpp}    |  24 +-
 .../thirdparty/clDNN/src/impls/ocl/lrn.cpp    |  82 ++++++
 .../ocl/lstm_dynamic_input.cpp}               |  26 +-
 .../ocl/lstm_dynamic_timeloop.cpp}            |  26 +-
 .../ocl/lstm_elt.cpp}                         |  30 +-
 .../ocl/lstm_gemm.cpp}                        |  30 +-
 .../ocl/max_unpooling.cpp}                    |  47 ++--
 .../clDNN/src/impls/ocl/mutable_data.cpp      |  32 +++
 .../thirdparty/clDNN/src/impls/ocl/mvn.cpp    |  86 ++++++
 .../clDNN/src/impls/ocl/normalize.cpp         |  83 ++++++
 .../clDNN/src/impls/ocl/one_hot.cpp           |  74 +++++
 .../permute_gpu.cpp => impls/ocl/permute.cpp} |  22 +-
 .../pooling_gpu.cpp => impls/ocl/pooling.cpp} | 123 ++++----
 .../ocl/primitive_base.cpp}                   |   8 +-
 .../ocl/primitive_base.hpp}                   |  14 +-
 .../ocl/pyramid_roi_align.cpp}                |  39 ++-
 .../clDNN/src/impls/ocl/quantize.cpp          | 160 +++++++++++
 .../reduce_gpu.cpp => impls/ocl/reduce.cpp}   |  61 ++--
 .../ocl/region_yolo.cpp}                      |  29 +-
 .../clDNN/src/impls/ocl/register.cpp          |  79 ++++++
 .../ocl/register.hpp}                         | 157 +++++------
 .../reorder_gpu.cpp => impls/ocl/reorder.cpp} |  20 +-
 .../ocl/reorg_yolo.cpp}                       |  33 +--
 .../ocl/resample.cpp}                         |  63 +++--
 .../reshape_gpu.cpp => impls/ocl/reshape.cpp} |  22 +-
 .../ocl/reverse_sequence.cpp}                 |  31 +-
 .../ocl/roi_pooling.cpp}                      |  26 +-
 .../thirdparty/clDNN/src/impls/ocl/scale.cpp  | 136 +++++++++
 .../ocl/scatter_elements_update.cpp}          |  41 ++-
 .../ocl/scatter_nd_update.cpp}                |  41 ++-
 .../ocl/scatter_update.cpp}                   |  41 ++-
 .../thirdparty/clDNN/src/impls/ocl/select.cpp |  69 +++++
 .../clDNN/src/impls/ocl/shuffle_channels.cpp  |  82 ++++++
 .../softmax_gpu.cpp => impls/ocl/softmax.cpp} |  37 +--
 .../clDNN/src/impls/ocl/space_to_batch.cpp    |  78 +++++
 .../clDNN/src/impls/ocl/space_to_depth.cpp    |  76 +++++
 .../ocl/strided_slice.cpp}                    |  38 +--
 .../thirdparty/clDNN/src/impls/ocl/tile.cpp   |  70 +++++
 .../clDNN/src/include/implementation_map.h    | 174 ------------
 .../clDNN/src/include/layout_optimizer.h      |   3 +-
 .../clDNN/src/include/network_impl.h          |   4 +-
 .../clDNN/src/include/primitive_inst.h        |   1 +
 .../clDNN/src/include/primitive_type.h        |  15 +-
 .../clDNN/src/include/primitive_type_base.h   |  18 +-
 .../clDNN/src/include/program_node.h          |   5 +
 .../clDNN/src/{gpu => }/kernel_runner.cpp     |   0
 .../clDNN/src/{gpu => }/kernel_runner.h       |   0
 .../thirdparty/clDNN/src/layout_optimizer.cpp |  22 +-
 .../thirdparty/clDNN/src/program.cpp          |   8 +-
 .../thirdparty/clDNN/src/program_node.cpp     |   3 +
 121 files changed, 3590 insertions(+), 3851 deletions(-)
 create mode 100644 inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/condition_gpu.cpp => impls/common/condition.cpp} (86%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/loop_gpu.cpp => impls/common/loop.cpp} (94%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/wait_for_events_gpu.cpp => impls/common/wait_for_events.cpp} (55%)
 rename inference-engine/thirdparty/clDNN/src/{gpu => impls/cpu}/cpu_impl_helpers.hpp (100%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/detection_output_cpu.cpp => impls/cpu/detection_output.cpp} (98%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/non_max_suppression_cpu.cpp => impls/cpu/non_max_suppression.cpp} (94%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/proposal_gpu.cpp => impls/cpu/proposal.cpp} (96%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/arg_max_min_gpu.cpp => impls/ocl/arg_max_min.cpp} (62%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/binary_convolution_gpu.cpp => impls/ocl/binary_convolution.cpp} (90%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/broadcast_gpu.cpp => impls/ocl/broadcast.cpp} (51%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/convolution_gpu.cpp => impls/ocl/convolution.cpp} (57%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/ctc_greedy_decoder_gpu.cpp => impls/ocl/ctc_greedy_decoder.cpp} (64%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/cum_sum_gpu.cpp => impls/ocl/cum_sum.cpp} (66%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/custom_gpu_primitive_gpu.cpp => impls/ocl/custom_primitive.cpp} (91%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/deformable_convolution_gpu.cpp => impls/ocl/deformable_convolution.cpp} (78%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/embedding_bag_gpu.cpp => impls/ocl/embedding_bag.cpp} (75%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/extract_image_patches_gpu.cpp => impls/ocl/extract_image_patches.cpp} (55%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/fully_connected_gpu.cpp => impls/ocl/fully_connected.cpp} (54%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/fused_conv_eltwise_gpu.cpp => impls/ocl/fused_conv_eltwise.cpp} (62%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/gather_gpu.cpp => impls/ocl/gather.cpp} (60%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/gather_tree_gpu.cpp => impls/ocl/gather_tree.cpp} (52%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/gemm_gpu.cpp => impls/ocl/gemm.cpp} (52%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/generic_layer_gpu.cpp => impls/ocl/generic_layer.cpp} (82%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/grn_gpu.cpp => impls/ocl/grn.cpp} (68%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_dynamic_input_gpu.cpp => impls/ocl/lstm_dynamic_input.cpp} (77%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_dynamic_timeloop_gpu.cpp => impls/ocl/lstm_dynamic_timeloop.cpp} (83%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_elt_gpu.cpp => impls/ocl/lstm_elt.cpp} (81%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/lstm_gemm_gpu.cpp => impls/ocl/lstm_gemm.cpp} (80%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/max_unpooling_gpu.cpp => impls/ocl/max_unpooling.cpp} (51%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/permute_gpu.cpp => impls/ocl/permute.cpp} (74%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/pooling_gpu.cpp => impls/ocl/pooling.cpp} (51%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/primitive_gpu_base.cpp => impls/ocl/primitive_base.cpp} (87%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/primitive_gpu_base.h => impls/ocl/primitive_base.hpp} (96%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/pyramid_roi_align_gpu.cpp => impls/ocl/pyramid_roi_align.cpp} (59%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/reduce_gpu.cpp => impls/ocl/reduce.cpp} (50%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/region_yolo_gpu.cpp => impls/ocl/region_yolo.cpp} (63%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/register_gpu.hpp => impls/ocl/register.hpp} (51%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/reorder_gpu.cpp => impls/ocl/reorder.cpp} (90%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/reorg_yolo_gpu.cpp => impls/ocl/reorg_yolo.cpp} (53%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/resample_gpu.cpp => impls/ocl/resample.cpp} (69%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/reshape_gpu.cpp => impls/ocl/reshape.cpp} (70%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/reverse_sequence_gpu.cpp => impls/ocl/reverse_sequence.cpp} (59%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/roi_pooling_gpu.cpp => impls/ocl/roi_pooling.cpp} (85%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/scatter_elements_update_gpu.cpp => impls/ocl/scatter_elements_update.cpp} (62%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/scatter_nd_update_gpu.cpp => impls/ocl/scatter_nd_update.cpp} (50%)
 rename inference-engine/thirdparty/clDNN/src/{gpu/scatter_update_gpu.cpp => impls/ocl/scatter_update.cpp} (61%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/select.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/shuffle_channels.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/softmax_gpu.cpp => impls/ocl/softmax.cpp} (65%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_batch.cpp
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_depth.cpp
 rename inference-engine/thirdparty/clDNN/src/{gpu/strided_slice_gpu.cpp => impls/ocl/strided_slice.cpp} (78%)
 create mode 100644 inference-engine/thirdparty/clDNN/src/impls/ocl/tile.cpp
 delete mode 100644 inference-engine/thirdparty/clDNN/src/include/implementation_map.h
 rename inference-engine/thirdparty/clDNN/src/{gpu => }/kernel_runner.cpp (100%)
 rename inference-engine/thirdparty/clDNN/src/{gpu => }/kernel_runner.h (100%)

diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
index 95dfca9fc14..5f1a7cc4925 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
@@ -2,11 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
 #pragma once
 
 #include "cldnn/runtime/engine.hpp"
+#include "cldnn/primitives/implementation_desc.hpp"
 
 #include "topology.hpp"
 
@@ -99,14 +98,6 @@ struct learning_params {
     learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
 };
 
-/// @brief Description of primitives implementation.
-struct implementation_desc {
-    format::type output_format;  ///< Output format.
-    std::string kernel_name;  ///< GPU kernel name.
-};
-
-using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
-
 /// @brief Represents user-provided program build option.
 struct build_option {
     /// @brief Allow primitives fusing during program build (default: false).
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
new file mode 100644
index 00000000000..4ce5fcab2e8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/runtime/tensor.hpp"
+
+#include <map>
+#include <ostream>
+
+namespace cldnn {
+
+/// @brief Primitives implementation type.
+enum class impl_types : uint8_t {
+    cpu = 1 << 0,
+    common = 1 << 1,
+    ocl = 1 << 2,
+    any = 0xFF,
+};
+
+inline impl_types operator&(impl_types a, impl_types b) {
+    typedef std::underlying_type<impl_types>::type underlying_type;
+    return static_cast<impl_types>(static_cast<underlying_type>(a) & static_cast<underlying_type>(b));
+}
+
+inline impl_types operator|(impl_types a, impl_types b) {
+    typedef std::underlying_type<impl_types>::type underlying_type;
+    return static_cast<impl_types>(static_cast<underlying_type>(a) | static_cast<underlying_type>(b));
+}
+
+inline impl_types operator~(impl_types a) {
+    typedef std::underlying_type<impl_types>::type underlying_type;
+    return static_cast<impl_types>(~static_cast<underlying_type>(a));
+}
+
+inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) {
+    switch (impl_type) {
+        case impl_types::cpu: out << "cpu"; break;
+        case impl_types::common: out << "common"; break;
+        case impl_types::ocl: out << "ocl"; break;
+        case impl_types::any: out << "any"; break;
+        default: out << "unknown"; break;
+    }
+
+    return out;
+}
+
+/// @brief Description of primitives implementation.
+struct implementation_desc {
+    format::type output_format;  ///< Output format.
+    std::string kernel_name;     ///< GPU kernel name.
+    impl_types impl_type;        ///< GPU implementation type.
+
+    implementation_desc() :
+        output_format(format::any),
+        kernel_name(""),
+        impl_type(impl_types::any) {}
+
+    implementation_desc(format::type output_format,
+                        std::string kernel_name,
+                        impl_types impl_type = impl_types::any) :
+        output_format(output_format),
+        kernel_name(kernel_name),
+        impl_type(impl_type) {}
+};
+
+using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
+
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
index b9c0af334bc..2dff45a2007 100644
--- a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
@@ -38,12 +38,15 @@ file(GLOB __CLDNN_Headers__include
     "${__CLDNN_Directory__include}/*.hpp"
   )
 
-set(__CLDNN_Directory__gpu             "${CMAKE_CURRENT_SOURCE_DIR}/gpu")
-set(__CLDNN_Label__gpu                 "gpu")
+set(__CLDNN_Directory__impls             "${CMAKE_CURRENT_SOURCE_DIR}/impls")
+set(__CLDNN_Label__gpu                 "impls")
 file(GLOB __CLDNN_Sources__gpu
-    "${__CLDNN_Directory__gpu}/*.h"
-    "${__CLDNN_Directory__gpu}/*.hpp"
-    "${__CLDNN_Directory__gpu}/*.cpp"
+    "${__CLDNN_Directory__impls}/common/*.hpp"
+    "${__CLDNN_Directory__impls}/common/*.cpp"
+    "${__CLDNN_Directory__impls}/cpu/*.hpp"
+    "${__CLDNN_Directory__impls}/cpu/*.cpp"
+    "${__CLDNN_Directory__impls}/ocl/*.hpp"
+    "${__CLDNN_Directory__impls}/ocl/*.cpp"
   )
 
 set(__CLDNN_Directory__cg_cache        "${CLDNN__CODEGEN_INCDIR}")
@@ -130,5 +133,5 @@ endif()
 # ======================================================================================================
 
 ie_sse42_optimization_flags(sse4_2_flags)
-set_source_files_properties(gpu/detection_output_cpu.cpp half.cpp
+set_source_files_properties(impls/cpu/detection_output.cpp half.cpp
   PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
deleted file mode 100644
index 9a90f972752..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "activation_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "activation/activation_kernel_selector.h"
-#include "activation/activation_kernel_base.h"
-#include "register_gpu.hpp"
-
-namespace cldnn {
-namespace gpu {
-
-struct activation_gpu : typed_primitive_gpu_impl<activation> {
-    using parent = typed_primitive_gpu_impl<activation>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<activation_gpu>(*this);
-    }
-
-    kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-
-        if (_outer.is_parameterized()) {
-            args.slope = instance.slope_memory();
-        }
-
-        return args;
-    }
-
-    static primitive_impl* create(const activation_node& arg) {
-        auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
-        auto activation_optional_params =
-            get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
-
-        convert_new_activation_func(arg.get_primitive(), activation_params.activations);
-
-        if (arg.is_parameterized()) {
-            const auto& slope_layout = arg.slope_input().get_output_layout();
-            const auto& output_layout = arg.get_output_layout();
-
-            const auto params_num =
-                kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
-
-            CLDNN_ERROR_LESS_THAN(arg.id(),
-                                  "Slope layout size count",
-                                  slope_layout.size.count(),
-                                  "output_layout.size.feature[0] * params_num",
-                                  static_cast<size_t>(output_layout.size.feature[0] * params_num),
-                                  "Error - not enough data inside additional params buffer");
-
-            activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
-        }
-
-        auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto activation = new activation_gpu(arg, best_kernels[0]);
-
-        return activation;
-    }
-};
-
-namespace detail {
-
-attach_activation_gpu::attach_activation_gpu() {
-    auto val_fw = activation_gpu::create;
-
-    implementation_map<activation>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw},
-        // block f16 format
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
-        // 3D
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw},
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw },
-        // bfwzyx
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw},
-        // fs_b_yx_fsv32
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
-    });
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp
deleted file mode 100644
index 55ad712d4d8..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "average_unpooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "average_unpooling/average_unpooling_kernel_selector.h"
-#include "average_unpooling/average_unpooling_kernel_base.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct average_unpooling_gpu : typed_primitive_gpu_impl<average_unpooling> {
-    using parent = typed_primitive_gpu_impl<average_unpooling>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<average_unpooling_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const average_unpooling_node& arg) {
-        auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
-        auto average_unpooling_optional_params =
-            get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
-        auto& params = average_unpooling_params;
-
-        auto primitive = arg.get_primitive();
-        auto stride = primitive->stride;
-
-        params.unpoolSize = {
-            (uint32_t)primitive->size.spatial[0],
-            (uint32_t)primitive->size.spatial[1],
-        };
-
-        params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
-
-        auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto average_unpool = new average_unpooling_gpu(arg, best_kernels[0]);
-
-        return average_unpool;
-    }
-};
-
-namespace detail {
-
-attach_average_unpooling_gpu::attach_average_unpooling_gpu() {
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                                average_unpooling_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp
deleted file mode 100644
index d2ef8e70df9..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "batch_to_space_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "batch_to_space/batch_to_space_kernel_selector.h"
-#include "batch_to_space/batch_to_space_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "data_inst.h"
-#include <vector>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct batch_to_space_gpu : typed_primitive_gpu_impl<batch_to_space> {
-    using parent = typed_primitive_gpu_impl<batch_to_space>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<batch_to_space_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const batch_to_space_node& arg) {
-        auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
-        auto batch_to_space_optional_params =
-            get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
-
-        auto primitive = arg.get_primitive();
-
-        batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
-        batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
-        batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
-
-        auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto batch_to_space = new batch_to_space_gpu(arg, best_kernels[0]);
-
-        return batch_to_space;
-    }
-};
-
-namespace detail {
-
-attach_batch_to_space_gpu::attach_batch_to_space_gpu() {
-    auto val_fw = batch_to_space_gpu::create;
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp
deleted file mode 100644
index 01c1d54ad10..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "border_inst.h"
-
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "border/border_kernel_selector.h"
-#include "border/border_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-namespace cldnn {
-namespace gpu {
-
-struct border_gpu : typed_primitive_gpu_impl<border> {
-    using parent = typed_primitive_gpu_impl<border>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<border_gpu>(*this);
-    }
-
-    static primitive_impl* create(const border_node& arg) {
-        auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
-        auto b_optional_params =
-            get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
-
-        auto desc = arg.get_primitive();
-
-        b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
-        b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
-        b_params.border_value = desc->border_value;
-
-        switch (desc->type) {
-            case border_type::constant:
-                b_params.b_type = kernel_selector::border_type::CONSTANT;
-                break;
-            case border_type::edge:
-                b_params.b_type = kernel_selector::border_type::EDGE;
-                break;
-            case border_type::mirror:
-                b_params.b_type = kernel_selector::border_type::MIRROR;
-                break;
-            case border_type::mirror_101:
-                b_params.b_type = kernel_selector::border_type::MIRROR_101;
-                break;
-            default:
-                assert(
-                    false &&
-                    "Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
-        }
-
-        auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        return new border_gpu(arg, best_kernels[0]);
-    }
-};
-
-namespace detail {
-
-attach_border_gpu::attach_border_gpu() {
-    auto val_fw = border_gpu::create;
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
deleted file mode 100644
index 590e97a2a8d..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "concatenation_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "concatenation/concatenation_kernel_selector.h"
-#include "concatenation/concatenation_kernel_base.h"
-
-#include <initializer_list>
-
-namespace cldnn {
-namespace gpu {
-
-namespace {
-kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
-    switch (axis) {
-        case concatenation::along_x:
-            return kernel_selector::concat_axis::X;
-        case concatenation::along_y:
-            return kernel_selector::concat_axis::Y;
-        case concatenation::along_z:
-            return kernel_selector::concat_axis::Z;
-        case concatenation::along_w:
-            return kernel_selector::concat_axis::W;
-        case concatenation::along_f:
-            return kernel_selector::concat_axis::FEATURE;
-        case concatenation::along_b:
-            return kernel_selector::concat_axis::BATCH;
-        default:
-            return kernel_selector::concat_axis::X;
-    }
-}
-}  // namespace
-
-struct concatenation_gpu : typed_primitive_gpu_impl<concatenation> {
-    using parent = typed_primitive_gpu_impl<concatenation>;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<concatenation_gpu>(*this);
-    }
-
-    concatenation_gpu(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
-        if (!_outer.can_be_optimized()) {
-            CLDNN_ERROR_NOT_EQUAL(_outer.id(),
-                                  "Input count",
-                                  _outer.inputs_count(),
-                                  "kds size",
-                                  kd.kernels.size(),
-                                  "Error - not enough kernels for concatenation");
-        }
-    }
-
-protected:
-    bool optimized_out(concatenation_inst& instance) const override {
-        return parent::optimized_out(instance) || _outer.can_be_optimized();
-    }
-
-public:
-    static primitive_impl* create(const concatenation_node& arg) {
-        if (arg.can_be_optimized()) {
-            return new concatenation_gpu(arg, {});
-        }
-
-        auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
-        auto concat_optional_params =
-            get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
-        auto axis = arg.get_primitive()->axis;
-
-        concat_params.inputs.resize(arg.inputs_count());
-        for (size_t i = 0; i < arg.inputs_count(); ++i) {
-            const layout& input_layout = arg.input(i).get_output_layout();
-            concat_params.inputs[i] = convert_data_tensor(input_layout);
-        }
-
-        concat_params.axis = convert_axis(axis);
-        concat_optional_params.kernelPerInput = true;
-
-        auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        concatenation_gpu* concat = new concatenation_gpu(arg, best_kernels[0]);
-
-        return concat;
-    }
-};
-
-namespace detail {
-
-attach_concatenation_gpu::attach_concatenation_gpu() {
-    implementation_map<concatenation>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create},
-        // 5D
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
-        // block f16 format
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        // MMAD
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), concatenation_gpu::create},
-        // 6D
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), concatenation_gpu::create},
-    });
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
deleted file mode 100644
index 07244dde5ae..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "crop_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "eltwise/eltwise_kernel_selector.h"
-#include "eltwise/eltwise_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-namespace cldnn {
-namespace gpu {
-
-struct crop_gpu : typed_primitive_gpu_impl<crop> {
-    using parent = typed_primitive_gpu_impl<crop>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<crop_gpu>(*this);
-    }
-
-protected:
-    bool optimized_out(crop_inst& instance) const override {
-        return parent::optimized_out(instance) || _outer.can_be_optimized();
-    }
-
-public:
-    static primitive_impl* create(const crop_node& arg) {
-        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
-        auto ew_optional_params =
-            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
-
-        ew_params.operations.push_back(
-            {{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
-
-        const auto& input_layout = arg.input().get_output_layout();
-        ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
-
-        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto crop = new crop_gpu(arg, best_kernels[0]);
-
-        return crop;
-    }
-};
-
-namespace detail {
-
-attach_crop_gpu::attach_crop_gpu() {
-    auto val_fw = crop_gpu::create;
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
deleted file mode 100644
index 9c1532ecfd4..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "deconvolution_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "deconvolution/deconvolution_kernel_selector.h"
-#include "deconvolution/deconvolution_kernel_base.h"
-#include <algorithm>
-
-namespace cldnn {
-namespace gpu {
-
-struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution> {
-    using parent = typed_primitive_gpu_impl<deconvolution>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<deconvolution_gpu>(*this);
-    }
-
-protected:
-    // TODO: share it with convolution and fully connected
-    bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
-        bool res = true;
-
-        CLDNN_ERROR_NOT_EQUAL(_outer.id(),
-                              "deconvolution filling value",
-                              _outer.get_output_layout().data_padding.filling_value(),
-                              "padding mode",
-                              0.0f,
-                              "Unknown padding mode in deconvolution.");
-
-        return res;
-    }
-
-    kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-
-        args.weights = instance.weights_memory(split);
-        args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
-
-        return args;
-    }
-
-    int32_t get_split() const override { return _outer.get_split(); }
-
-    uint32_t get_groups() const override { return _outer.get_groups(); }
-
-public:
-    static primitive_impl* create(const deconvolution_node& arg) {
-        const auto& primitive = arg.get_primitive();
-        const auto& weights_layout = arg.weights(0).get_output_layout();
-
-        const auto& weights_size = weights_layout.size;
-
-        const auto& split = primitive->split();
-        const auto& stride = primitive->stride;
-#if 0  // TODO: support dilation
-        const auto& dilation = primitive->dilation;
-#else
-        const tensor dilation = {0, 0, 1, 1, 1};
-#endif
-        const auto actual_split = split;
-
-        const auto& input_offset = primitive->input_offset;
-        const auto& groups = primitive->groups;
-
-        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
-            arg,
-            (groups > 1) ? 1 : actual_split,
-            1,
-            primitive->grouped_weights_shape);
-        auto deconv_optional_params =
-            get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
-
-        deconv_params.split = split;
-        deconv_params.groups = groups;
-
-        auto spatial_size = arg.get_output_layout().format.dimension() - 2;
-        uint32_t kx = weights_size.spatial[0];
-        uint32_t ky = weights_size.spatial[1];
-        uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
-        deconv_params.filterSize = { kx, ky, kz };
-
-        deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
-                                 (uint32_t)std::max(-input_offset.spatial[1], 0),
-                                 (uint32_t)std::max(-input_offset.spatial[2], 0)};
-
-        deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
-
-        deconv_params.dilation = {(uint32_t)dilation.spatial[0],
-                                  (uint32_t)dilation.spatial[1],
-                                  (uint32_t)dilation.spatial[2]};
-
-        auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with these arguments");
-        auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
-
-        return deconv;
-    }
-};
-
-namespace detail {
-
-attach_deconvolution_gpu::attach_deconvolution_gpu() {
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
deleted file mode 100644
index 3a62c9e0a9c..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "depth_to_space_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "depth_to_space/depth_to_space_kernel_selector.h"
-#include "depth_to_space/depth_to_space_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "common_types.h"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct depth_to_space_gpu : typed_primitive_gpu_impl<depth_to_space> {
-    using parent = typed_primitive_gpu_impl<depth_to_space>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<depth_to_space_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const depth_to_space_node& arg) {
-        auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
-        auto depth_to_space_optional_params =
-            get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
-
-        depth_to_space_params.block_size = arg.get_primitive()->block_size;
-        depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
-                                                                                                    : kernel_selector::depth_to_space_mode::DEPTH_FIRST;
-
-        auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto depth_to_space = new depth_to_space_gpu(arg, best_kernels[0]);
-
-        return depth_to_space;
-    }
-};
-
-namespace detail {
-
-attach_depth_to_space_gpu::attach_depth_to_space_gpu() {
-    auto val_fw = depth_to_space_gpu::create;
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
deleted file mode 100644
index 8ffc149e160..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "eltwise_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "eltwise/eltwise_kernel_selector.h"
-#include "eltwise/eltwise_kernel_base.h"
-#include <vector>
-
-namespace cldnn {
-namespace gpu {
-
-struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
-    using parent = typed_primitive_gpu_impl<eltwise>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<eltwise_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const eltwise_node& arg) {
-        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
-        auto ew_optional_params =
-            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
-
-        for (size_t i = 1; i < arg.inputs_count(); i++) {
-            ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
-        }
-
-        const auto& primitive = arg.get_primitive();
-
-        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
-                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
-                                        convert_to_eltwise_mode(primitive->mode)});
-
-        for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
-            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
-                                             kernel_selector::eltwise_params::InputType::Buffer(i)},
-                                            convert_to_eltwise_mode(primitive->mode)});
-        }
-
-        if (primitive->mode == eltwise_mode::sum) {
-            ew_params.coefficients = primitive->coefficients;
-        }
-
-        for (size_t i = 0; i < ew_params.inputs.size(); i++) {
-            if (!ew_params.inputs[i].SameDims(ew_params.output)) {
-                std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
-                std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
-                bool broadcast = false;
-                for (size_t d = 0; d < output_size.size(); d++) {
-                    if (output_size[d] != 1 && input_size[d] == 1)
-                        broadcast = true;
-                }
-                if (broadcast) {
-                    ew_params.broadcast = true;
-                    break;
-                } else {
-                    ew_params.layoutBased = true;
-                    break;
-                }
-            }
-        }
-
-        // stride
-        if (!primitive->stride.empty()) {
-            const auto& stride = primitive->stride;
-            ew_params.stride.resize(stride.size());
-            for (size_t i = 0; i < primitive->stride.size(); i++) {
-                ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
-                                       (uint32_t)stride[i].spatial[1],
-                                       (uint32_t)stride[i].spatial[2]};
-            }
-        }
-
-        // check if strides are the same
-        if (!ew_params.stride.empty()) {
-            const auto& stride = ew_params.stride[0];
-            for (size_t i = 1; i < ew_params.stride.size(); i++) {
-                if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
-                    ew_params.layoutBased = true;
-            }
-        } else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
-            ew_params.broadcast = true;
-        }
-
-        // TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
-        bool quantization = true;
-        for (size_t i = 0; i < arg.inputs_count(); i++) {
-            if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
-                arg.input(i).get_output_layout().data_type != data_types::i8) {
-                quantization = false;
-            }
-        }
-        ew_params.int8_quantization = quantization;
-
-        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto eltwise = new eltwise_gpu(arg, best_kernels[0]);
-
-        return eltwise;
-    }
-};
-
-namespace detail {
-
-attach_eltwise_gpu::attach_eltwise_gpu() {
-    implementation_map<eltwise>::add(
-        {{ std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create },
-         // block f16
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         // 3D
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
-         // 4D
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create },
-
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
-         // MMAD
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-
-         //
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }});
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp
deleted file mode 100644
index cf7d692651a..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "gather_nd_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "gather/gather_nd_kernel_selector.h"
-#include "gather/gather_nd_kernel_ref.h"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct gather_nd_gpu : typed_primitive_gpu_impl<gather_nd> {
-    using parent = typed_primitive_gpu_impl<gather_nd>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gather_nd_gpu>(*this);
-    }
-
-    static primitive_impl* create(const gather_nd_node& arg) {
-        auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
-        auto gather_nd_optional_params =
-            get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
-
-        gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
-        gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
-
-        gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
-
-        auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto gather_nd = new gather_nd_gpu(arg, best_kernels[0]);
-
-        return gather_nd;
-    }
-};
-
-namespace detail {
-
-attach_gather_nd_gpu::attach_gather_nd_gpu() {
-    auto val_fw = gather_nd_gpu::create;
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h b/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h
deleted file mode 100644
index 32c9694e155..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h
+++ /dev/null
@@ -1,266 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <initializer_list>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-namespace cldnn {
-namespace gpu {
-namespace mputils {
-template <typename... Tys>
-struct type_tuple;
-
-template <std::size_t... Idxs>
-struct index_tuple {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct count_tt;
-
-template <typename Ty, typename... Tys, typename ElemTy>
-struct count_tt<type_tuple<Ty, Tys...>, ElemTy>
-    : std::integral_constant<std::size_t,
-                             count_tt<type_tuple<Tys...>, ElemTy>::value +
-                                 static_cast<std::size_t>(std::is_same<Ty, ElemTy>::value)> {};
-
-template <typename ElemTy>
-struct count_tt<type_tuple<>, ElemTy> : std::integral_constant<std::size_t, 0> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy>
-struct size_tt;
-
-template <typename... Tys>
-struct size_tt<type_tuple<Tys...>> : std::integral_constant<std::size_t, sizeof...(Tys)> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct split_tt;
-
-namespace detail {
-template <typename TypeTupleTy, typename ElemTy, typename FirstTupleTy>
-struct split_tt_helper1;
-
-template <typename Ty, typename... Tys, typename ElemTy, typename... FirstTys>
-struct split_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<FirstTys...>>
-    : split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<FirstTys..., Ty>> {};
-
-template <typename Ty, typename... Tys, typename... FirstTys>
-struct split_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<FirstTys...>> {
-    using first_type = type_tuple<FirstTys...>;
-    using second_type = type_tuple<Tys...>;
-};
-
-template <typename ElemTy, typename... FirstTys>
-struct split_tt_helper1<type_tuple<>, ElemTy, type_tuple<FirstTys...>> {
-    using first_type = type_tuple<>;
-    using second_type = type_tuple<FirstTys...>;
-};
-}  // namespace detail
-
-template <typename... Tys, typename ElemTy>
-struct split_tt<type_tuple<Tys...>, ElemTy> : detail::split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct index_of_tt;
-
-static constexpr std::size_t npos = static_cast<std::size_t>(-1);
-
-namespace detail {
-template <typename TypeTupleTy, typename ElemTy, std::size_t Pos>
-struct index_of_tt_helper1;
-
-template <typename Ty, typename... Tys, typename ElemTy, std::size_t Pos>
-struct index_of_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, Pos>
-    : index_of_tt_helper1<type_tuple<Tys...>, ElemTy, Pos + 1> {};
-
-template <typename Ty, typename... Tys, std::size_t Pos>
-struct index_of_tt_helper1<type_tuple<Ty, Tys...>, Ty, Pos> : std::integral_constant<std::size_t, Pos> {};
-
-template <typename ElemTy, std::size_t Pos>
-struct index_of_tt_helper1<type_tuple<>, ElemTy, Pos> : std::integral_constant<std::size_t, npos> {};
-}  // namespace detail
-
-template <typename... Tys, typename ElemTy>
-struct index_of_tt<type_tuple<Tys...>, ElemTy> : detail::index_of_tt_helper1<type_tuple<Tys...>, ElemTy, 0> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct remove_tt;
-
-namespace detail {
-template <typename TypeTupleTy, typename ElemTy, typename ResultTupleTy>
-struct remove_tt_helper1;
-
-template <typename Ty, typename... Tys, typename ElemTy, typename... ResultTys>
-struct remove_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<ResultTys...>>
-    : remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<ResultTys..., Ty>> {};
-
-template <typename Ty, typename... Tys, typename... ResultTys>
-struct remove_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<ResultTys...>>
-    : remove_tt_helper1<type_tuple<Tys...>, Ty, type_tuple<ResultTys...>> {};
-
-template <typename ElemTy, typename... ResultTys>
-struct remove_tt_helper1<type_tuple<>, ElemTy, type_tuple<ResultTys...>> {
-    using type = type_tuple<ResultTys...>;
-};
-}  // namespace detail
-
-template <typename... Tys, typename ElemTy>
-struct remove_tt<type_tuple<Tys...>, ElemTy> : detail::remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
-
-template <typename TypeTupleTy, typename ElemTy>
-using remove_tt_t = typename remove_tt<TypeTupleTy, ElemTy>::type;
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <template <typename...> class VariadicTTy, typename TypeTupleTy>
-struct make_vttype_tt;
-
-template <template <typename...> class VariadicTTy, typename... Tys>
-struct make_vttype_tt<VariadicTTy, type_tuple<Tys...>> {
-    using type = VariadicTTy<Tys...>;
-};
-
-template <template <typename...> class VariadicTTy, typename TypeTupleTy>
-using make_vttype_tt_t = typename make_vttype_tt<VariadicTTy, TypeTupleTy>::type;
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy>
-struct make_indexer_tt;
-
-namespace detail {
-template <typename TypeTupleTy, std::size_t Idx, typename IdxTupleTy>
-struct make_indexer_tt_helper1;
-
-template <typename Ty, typename... Tys, std::size_t Idx, std::size_t... Idxs>
-struct make_indexer_tt_helper1<type_tuple<Ty, Tys...>, Idx, index_tuple<Idxs...>>
-    : make_indexer_tt_helper1<type_tuple<Tys...>, Idx + 1, index_tuple<Idxs..., Idx>> {};
-
-template <std::size_t Idx, typename IdxTupleTy>
-struct make_indexer_tt_helper1<type_tuple<>, Idx, IdxTupleTy> {
-    using type = IdxTupleTy;
-};
-
-}  // namespace detail
-
-template <typename... Tys>
-struct make_indexer_tt<type_tuple<Tys...>> : detail::make_indexer_tt_helper1<type_tuple<Tys...>, 0, index_tuple<>> {};
-
-template <typename TypeTupleTy>
-using make_indexer_tt_t = typename make_indexer_tt<TypeTupleTy>::type;
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-namespace detail {
-template <template <typename> class DefaultValSelectorTTy,
-          std::size_t DefaultedStartPos,
-          std::size_t Idx,
-          typename ArgTy>
-constexpr auto select_arg_or_default(ArgTy&& arg) -> typename std::decay<ArgTy>::type {
-    return (Idx < DefaultedStartPos) ? std::forward<ArgTy>(arg)
-                                     : DefaultValSelectorTTy<typename std::decay<ArgTy>::type>::value;
-}
-
-template <template <typename> class DefaultValSelectorTTy,
-          std::size_t DefaultedStartPos,
-          std::size_t... Idxs,
-          typename... ArgTys>
-constexpr auto make_partially_defaulted_std_tuple(index_tuple<Idxs...>&&, ArgTys&&... args)
-    -> std::tuple<typename std::decay<ArgTys>::type...> {
-    return std::make_tuple(
-        select_arg_or_default<DefaultValSelectorTTy, DefaultedStartPos, Idxs>(std::forward<ArgTys>(args))...);
-}
-}  // namespace detail
-
-template <template <typename> class DefaultValSelectorTTy, std::size_t DefaultedStartPos, typename... ArgTys>
-constexpr auto make_partially_defaulted_std_tuple(ArgTys&&... args)
-    -> std::tuple<typename std::decay<ArgTys>::type...> {
-    return detail::make_partially_defaulted_std_tuple<DefaultValSelectorTTy, DefaultedStartPos>(
-        make_indexer_tt_t<type_tuple<ArgTys...>>(),
-        std::forward<ArgTys>(args)...);
-}
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-}  // namespace mputils
-
-/// Marker type that separates required selectors from optional ones in kernel selector signature.
-struct kd_optional_selector_t {};
-
-template <typename Ty>
-struct kd_default_value_selector {
-    static constexpr Ty value = static_cast<Ty>(0);
-};
-
-template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename SelectorsTupleTy>
-class kd_selector;
-
-template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename... SelectorTys>
-class kd_selector<KernelDataTy, OuterTy, ReqSelectorCount, mputils::type_tuple<SelectorTys...>> {
-    using _selector_types = mputils::type_tuple<SelectorTys...>;
-    static_assert(mputils::count_tt<_selector_types, kd_optional_selector_t>::value == 0,
-                  "Optional selectors separator can be specified only in template alias. "
-                  "Please do not use this class directly - use kd_selector_t alias instead.");
-    static_assert(mputils::size_tt<_selector_types>::value > 0, "At least one selector type must be specified.");
-    static_assert(ReqSelectorCount <= mputils::size_tt<_selector_types>::value,
-                  "Number of required selectors is invalid.");
-
-public:
-    using key_type = mputils::make_vttype_tt_t<std::tuple, _selector_types>;
-
-    using hash_type = std::hash<key_type>;
-    using mapped_type = KernelDataTy (*)(const OuterTy&);
-    using map_type = std::unordered_map<key_type, mapped_type, hash_type>;
-    using value_type = typename map_type::value_type;
-
-private:
-    map_type _kernel_map;
-
-    template <std::size_t Idx>
-    KernelDataTy _get_kernel(mputils::index_tuple<Idx>&&, const OuterTy& outer, const SelectorTys&... selectors) {
-        auto value = _kernel_map.find(
-            mputils::make_partially_defaulted_std_tuple<kd_default_value_selector, Idx - 1>(selectors...));
-        if (value == _kernel_map.end())
-            return _get_kernel(mputils::index_tuple<Idx - 1>(), outer, selectors...);
-
-        return value->second(outer);
-    }
-
-    static KernelDataTy _get_kernel(mputils::index_tuple<ReqSelectorCount>&&, const OuterTy&, const SelectorTys&...) {
-        throw std::runtime_error("ERROR: no default element in map for kernel data!!!");
-    }
-
-public:
-    kd_selector(const std::initializer_list<value_type>& l) : _kernel_map(l) {}
-
-    KernelDataTy get_kernel(const OuterTy& outer, const SelectorTys&... selectors) {
-        return _get_kernel(mputils::index_tuple<sizeof...(SelectorTys) + 1>(), outer, selectors...);
-    }
-};
-
-template <typename KernelDataTy, typename OuterTy, typename... SelectorTys>
-using kd_selector_t =
-    kd_selector<KernelDataTy,
-                OuterTy,
-                mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value !=
-                        mputils::npos
-                    ? mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value
-                    : sizeof...(SelectorTys),
-                mputils::remove_tt_t<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>>;
-
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp
deleted file mode 100644
index b829688a736..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "lrn_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "lrn/lrn_kernel_selector.h"
-#include "lrn/lrn_kernel_base.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct lrn_gpu : typed_primitive_gpu_impl<lrn> {
-    using parent = typed_primitive_gpu_impl<lrn>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lrn_gpu>(*this);
-    }
-
-    static primitive_impl* create(const lrn_node& arg) {
-        auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
-        auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
-
-        const auto& primitive = arg.get_primitive();
-
-        lrn_params.alpha = primitive->alpha;
-        lrn_params.beta = primitive->beta;
-        lrn_params.k = primitive->k;
-        lrn_params.localSize = primitive->size;
-        lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
-        lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
-                                  ? kernel_selector::lrn_mode::WITHIN_CHANNEL
-                                  : kernel_selector::lrn_mode::ACROSS_CHANNEL;
-
-        auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto lrn = new lrn_gpu(arg, best_kernels[0]);
-
-        return lrn;
-    }
-};
-
-namespace detail {
-
-attach_lrn_gpu::attach_lrn_gpu() {
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), lrn_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp
deleted file mode 100644
index 24a716b5dbe..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mutable_data_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct mutable_data_gpu : public typed_primitive_gpu_impl<mutable_data> {
-    using parent = typed_primitive_gpu_impl<mutable_data>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<mutable_data_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_gpu(arg, {}); }
-};
-
-namespace detail {
-
-attach_mutable_data_gpu::attach_mutable_data_gpu() {
-    implementation_map<mutable_data>::add({{engine_types::ocl, mutable_data_gpu::create}});
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
deleted file mode 100644
index fc06edd6312..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mvn_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "mvn/mvn_kernel_selector.h"
-#include "mvn/mvn_kernel_base.h"
-
-#include <algorithm>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct mvn_gpu : typed_primitive_gpu_impl<mvn> {
-    using parent = typed_primitive_gpu_impl<mvn>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<mvn_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const mvn_node& arg) {
-        auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
-        auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
-
-        mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
-                                                                  : kernel_selector::mvn_mode::WITHIN_CHANNELS;
-        mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
-        mvn_params.epsilon = arg.get_primitive()->epsilon;
-
-        mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
-                                                                     : kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
-
-        auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto mvn = new mvn_gpu(arg, best_kernels[0]);
-
-        return mvn;
-    }
-};
-
-namespace detail {
-
-attach_mvn_gpu::attach_mvn_gpu() {
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp
deleted file mode 100644
index 7ed0653035f..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "normalize_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "normalize/normalize_kernel_selector.h"
-#include "normalize/normalize_kernel_base.h"
-
-#include <algorithm>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct normalize_gpu : typed_primitive_gpu_impl<normalize> {
-    using parent = typed_primitive_gpu_impl<normalize>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<normalize_gpu>(*this);
-    }
-
-protected:
-     kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        args.scale_table = instance.scale_memory();
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const normalize_node& arg) {
-        auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
-        auto norm_optional_params =
-            get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
-
-        const auto& scale_layout = arg.scale().get_output_layout();
-
-        norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
-                                                                   : kernel_selector::normalize_mode::WITHIN_SPATIAL;
-        norm_params.epsilon = arg.get_primitive()->epsilon;
-        norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
-
-        auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto lrn = new normalize_gpu(arg, best_kernels[0]);
-
-        return lrn;
-    }
-};
-
-namespace detail {
-
-attach_normalize_gpu::attach_normalize_gpu() {
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf),
-                                       normalize_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
deleted file mode 100644
index 218e50f018d..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "one_hot_inst.h"
-
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "one_hot/one_hot_kernel_selector.h"
-#include "one_hot/one_hot_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include <vector>
-
-namespace cldnn {
-namespace gpu {
-
-struct one_hot_gpu : typed_primitive_gpu_impl<one_hot> {
-    using parent = typed_primitive_gpu_impl<one_hot>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<one_hot_gpu>(*this);
-    }
-
-    static primitive_impl* create(const one_hot_node& arg) {
-        auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
-        auto oh_optional_params =
-            get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
-
-        oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
-        oh_params.on_value = arg.get_primitive()->on_value;
-        oh_params.off_value = arg.get_primitive()->off_value;
-
-        auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
-                            arg.get_output_layout().size.sizes(format::bfzyx) :
-                            arg.get_output_layout().size.sizes(format::bfyx);
-
-        oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
-
-        auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with these arguments");
-
-        return new one_hot_gpu(arg, best_kernels[0]);
-    }
-};
-
-namespace detail {
-
-attach_one_hot_gpu::attach_one_hot_gpu() {
-    auto val_fw = one_hot_gpu::create;
-
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
deleted file mode 100644
index 627a2e1799a..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "quantize_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "quantize/quantize_kernel_selector.h"
-#include "quantize/quantize_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct quantize_gpu : typed_primitive_gpu_impl<quantize> {
-    using parent = typed_primitive_gpu_impl<quantize>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<quantize_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
-        kernel_arguments_data args;
-
-        for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
-            args.inputs.push_back(instance.input_memory_ptr(i));
-        }
-        if (instance.node.get_scale_shift_opt()) {
-            if (instance.node.get_dependencies().size() == 9) {
-                args.inputs.push_back(instance.dep_memory_ptr(5));
-                args.inputs.push_back(instance.dep_memory_ptr(6));
-                args.inputs.push_back(instance.dep_memory_ptr(7));
-                args.inputs.push_back(instance.dep_memory_ptr(8));
-            }
-        }
-        args.output = instance.output_memory_ptr();
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const quantize_node& arg) {
-        auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
-        auto quantize_optional_params =
-            get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
-
-        quantize_params.levels = arg.get_levels();
-        quantize_params.packed_binary_output = arg.get_packed_binary_output();
-        quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
-        quantize_params.has_post_scale = arg.get_need_post_scale();
-        quantize_params.has_post_shift = arg.get_need_post_shift();
-        quantize_params.has_pre_shift = arg.get_need_pre_shift();
-        quantize_params.has_clamp = arg.get_need_clamp();
-
-        quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
-        quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
-        quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
-        quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
-        quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
-
-        quantize_params.in_lo = arg.get_input_lo_val();
-        quantize_params.in_hi = arg.get_input_hi_val();
-        quantize_params.in_scale = arg.get_input_scale_val();
-        quantize_params.in_shift = arg.get_input_shift_val();
-        quantize_params.out_scale = arg.get_output_scale_val();
-        quantize_params.out_shift = arg.get_output_shift_val();
-
-        for (size_t i = 1; i < arg.inputs_count(); i++) {
-            quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
-        }
-        const auto& output_layout = arg.get_output_layout();
-        quantize_params.output = convert_data_tensor(output_layout);
-
-        auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto quantize = new quantize_gpu(arg, best_kernels[0]);
-
-        return quantize;
-    }
-};
-
-namespace detail {
-
-attach_quantize_gpu::attach_quantize_gpu() {
-    auto val_fw = quantize_gpu::create;
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fs_b_yx_fsv32), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
deleted file mode 100644
index ec8fd5532f5..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#include "register_gpu.hpp"
-
-namespace cldnn { namespace gpu {
-
-#define REGISTER_GPU(prim)                      \
-    static detail::attach_##prim##_gpu attach_##prim
-
-void register_implementations_gpu() {
-    REGISTER_GPU(activation);
-    REGISTER_GPU(arg_max_min);
-    REGISTER_GPU(average_unpooling);
-    REGISTER_GPU(binary_convolution);
-    REGISTER_GPU(border);
-    REGISTER_GPU(broadcast);
-    REGISTER_GPU(concatenation);
-    REGISTER_GPU(condition);
-    REGISTER_GPU(convolution);
-    REGISTER_GPU(crop);
-    REGISTER_GPU(custom_gpu_primitive);
-    REGISTER_GPU(data);
-    REGISTER_GPU(deconvolution);
-    REGISTER_GPU(deformable_conv);
-    REGISTER_GPU(deformable_interp);
-    REGISTER_GPU(depth_to_space);
-    REGISTER_GPU(batch_to_space);
-    REGISTER_GPU(detection_output);
-    REGISTER_GPU(eltwise);
-    REGISTER_GPU(fully_connected);
-    REGISTER_GPU(gather);
-    REGISTER_GPU(gather_nd);
-    REGISTER_GPU(gemm);
-    REGISTER_GPU(input_layout);
-    REGISTER_GPU(lrn);
-    REGISTER_GPU(lstm_gemm);
-    REGISTER_GPU(lstm_elt);
-    REGISTER_GPU(max_unpooling);
-    REGISTER_GPU(mutable_data);
-    REGISTER_GPU(mvn);
-    REGISTER_GPU(normalize);
-    REGISTER_GPU(one_hot);
-    REGISTER_GPU(permute);
-    REGISTER_GPU(pooling);
-    REGISTER_GPU(prior_box);
-    REGISTER_GPU(proposal);
-    REGISTER_GPU(pyramid_roi_align);
-    REGISTER_GPU(quantize);
-    REGISTER_GPU(reduce);
-    REGISTER_GPU(region_yolo);
-    REGISTER_GPU(reorder);
-    REGISTER_GPU(reorg_yolo);
-    REGISTER_GPU(reshape);
-    REGISTER_GPU(reverse_sequence);
-    REGISTER_GPU(roi_pooling);
-    REGISTER_GPU(scale);
-    REGISTER_GPU(scatter_update);
-    REGISTER_GPU(scatter_nd_update);
-    REGISTER_GPU(scatter_elements_update);
-    REGISTER_GPU(select);
-    REGISTER_GPU(shuffle_channels);
-    REGISTER_GPU(softmax);
-    REGISTER_GPU(space_to_batch);
-    REGISTER_GPU(space_to_depth);
-    REGISTER_GPU(strided_slice);
-    REGISTER_GPU(tile);
-    REGISTER_GPU(fused_conv_eltwise);
-    REGISTER_GPU(lstm_dynamic_input);
-    REGISTER_GPU(lstm_dynamic_timeloop);
-    REGISTER_GPU(generic_layer);
-    REGISTER_GPU(gather_tree);
-    REGISTER_GPU(resample);
-    REGISTER_GPU(non_max_suppression);
-    REGISTER_GPU(grn);
-    REGISTER_GPU(ctc_greedy_decoder);
-    REGISTER_GPU(cum_sum);
-    REGISTER_GPU(embedding_bag);
-    REGISTER_GPU(extract_image_patches);
-    REGISTER_GPU(loop);
-}
-
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
deleted file mode 100644
index cfdf1a3085c..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "scale_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "eltwise/eltwise_kernel_selector.h"
-#include "eltwise/eltwise_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct scale_gpu : typed_primitive_gpu_impl<scale> {
-    using parent = typed_primitive_gpu_impl<scale>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<scale_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
-        args.output = instance.output_memory_ptr();
-
-        if (_outer.bias_term()) {
-            args.inputs.push_back(instance.bias_memory());
-        }
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const scale_node& arg) {
-        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
-        auto ew_optional_params =
-            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
-
-        ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
-
-        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
-                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
-                                        kernel_selector::eltwise_mode::MUL});
-
-        if (arg.bias_term()) {
-            ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
-            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
-                                             kernel_selector::eltwise_params::InputType::Buffer(2)},
-                                            kernel_selector::eltwise_mode::ADD});
-        }
-
-        ew_params.layoutBased = true;
-
-        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto scale = new scale_gpu(arg, best_kernels[0]);
-
-        return scale;
-    }
-};
-
-namespace detail {
-
-attach_scale_gpu::attach_scale_gpu() {
-    auto val_fw = scale_gpu::create;
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fs_b_yx_fsv32), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv32), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp
deleted file mode 100644
index 09572da455a..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "select_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "select/select_kernel_selector.h"
-#include "select/select_kernel_base.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct select_gpu : typed_primitive_gpu_impl<select> {
-    using parent = typed_primitive_gpu_impl<select>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<select_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const select_node& arg) {
-        auto select_params = get_default_params<kernel_selector::select_params>(arg);
-        auto select_optional_params =
-            get_default_optional_params<kernel_selector::select_optional_params>(arg.get_program());
-
-        for (size_t i = 1; i < arg.inputs_count(); i++) {
-            select_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
-        }
-
-        auto& kernel_selector = kernel_selector::select_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(select_params, select_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto select = new select_gpu(arg, best_kernels[0]);
-
-        return select;
-    }
-};
-
-namespace detail {
-
-attach_select_gpu::attach_select_gpu() {
-    implementation_map<select>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), select_gpu::create},
-
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), select_gpu::create},
-
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), select_gpu::create}});
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
deleted file mode 100644
index b7a29684772..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "shuffle_channels_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "shuffle_channels/shuffle_channels_kernel_selector.h"
-#include "shuffle_channels/shuffle_channels_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct shuffle_channels_gpu : typed_primitive_gpu_impl<shuffle_channels> {
-    using parent = typed_primitive_gpu_impl<shuffle_channels>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<shuffle_channels_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const shuffle_channels_node& arg) {
-        auto shuffle_channels_params = get_default_params<kernel_selector::shuffle_channels_params>(arg);
-        auto shuffle_channels_optional_params =
-            get_default_optional_params<kernel_selector::shuffle_channels_optional_params>(arg.get_program());
-
-        const int32_t number_of_dims = 4;
-        int32_t axis = arg.get_primitive()->axis;
-
-        if (axis < 0)
-            axis += number_of_dims;
-
-        shuffle_channels_params.group = arg.get_primitive()->group;
-        shuffle_channels_params.axis = axis;
-
-        auto& kernel_selector = kernel_selector::shuffle_channels_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(shuffle_channels_params, shuffle_channels_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto shuffle_channels = new shuffle_channels_gpu(arg, best_kernels[0]);
-
-        return shuffle_channels;
-    }
-};
-
-namespace detail {
-
-attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
-    auto val_fw = shuffle_channels_gpu::create;
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp
deleted file mode 100644
index ebc97b591f2..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "space_to_batch_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "space_to_batch/space_to_batch_kernel_selector.h"
-#include "space_to_batch/space_to_batch_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "data_inst.h"
-#include <vector>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct space_to_batch_gpu : typed_primitive_gpu_impl<space_to_batch> {
-    using parent = typed_primitive_gpu_impl<space_to_batch>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<space_to_batch_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const space_to_batch_node& arg) {
-        auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
-        auto space_to_batch_optional_params =
-            get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
-
-        auto primitive = arg.get_primitive();
-
-        space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
-        space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
-        space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
-
-        auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto space_to_batch = new space_to_batch_gpu(arg, best_kernels[0]);
-
-        return space_to_batch;
-    }
-};
-
-namespace detail {
-
-attach_space_to_batch_gpu::attach_space_to_batch_gpu() {
-    auto val_fw = space_to_batch_gpu::create;
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp
deleted file mode 100644
index bf92acea8b4..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "space_to_depth_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "space_to_depth/space_to_depth_kernel_selector.h"
-#include "space_to_depth/space_to_depth_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct space_to_depth_gpu : typed_primitive_gpu_impl<space_to_depth> {
-    using parent = typed_primitive_gpu_impl<space_to_depth>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<space_to_depth_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const space_to_depth_node& arg) {
-        auto space_to_depth_params = get_default_params<kernel_selector::space_to_depth_params>(arg);
-        auto space_to_depth_optional_params =
-                get_default_optional_params<kernel_selector::space_to_depth_optional_params>(arg.get_program());
-
-        space_to_depth_params.depth_mode = (arg.get_primitive()->mode == space_to_depth::blocks_first) ?
-                                           kernel_selector::SpaceToDepthMode::BLOCKS_FIRST :
-                                           kernel_selector::SpaceToDepthMode::DEPTH_FIRST;
-
-        space_to_depth_params.block_size = arg.get_primitive()->block_size;
-
-        auto& kernel_selector = kernel_selector::space_to_depth_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(space_to_depth_params, space_to_depth_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto space_to_depth = new space_to_depth_gpu(arg, best_kernels[0]);
-
-        return space_to_depth;
-    }
-};
-
-namespace detail {
-
-attach_space_to_depth_gpu::attach_space_to_depth_gpu() {
-    auto val_fw = space_to_depth_gpu::create;
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
deleted file mode 100644
index d508e2bea31..00000000000
--- a/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "tile_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "tile/tile_kernel_selector.h"
-#include "tile/tile_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct tile_gpu : typed_primitive_gpu_impl<tile> {
-    using parent = typed_primitive_gpu_impl<tile>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<tile_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const tile_node& arg) {
-        auto tile_params = get_default_params<kernel_selector::tile_params>(arg);
-        auto tile_optional_params =
-            get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
-
-        auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto tile = new tile_gpu(arg, best_kernels[0]);
-
-        return tile;
-    }
-};
-
-namespace detail {
-
-attach_tile_gpu::attach_tile_gpu() {
-    auto val_fw = tile_gpu::create;
-
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
index 249325eaacc..4a36fb44f38 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
@@ -51,7 +51,7 @@ void add_required_reorders::run(program_impl& p) {
             continue;  // only nodes with dependencies
         if (usr->is_type<data>())
             continue;
-        if (usr->type()->does_an_implementation_exist(p.get_engine(), *usr))
+        if (usr->type()->does_an_implementation_exist(*usr))
             continue;
 
         bool correct_layout_selected = false;
@@ -71,7 +71,7 @@ void add_required_reorders::run(program_impl& p) {
                                           node->get_output_layout().format,
                                           original_layout.size);
                     usr->set_output_layout(current_layout, false);
-                    if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                    if (usr->type()->does_possible_implementation_exist(*usr)) {
                         correct_layout_selected = true;
                         break;
                     } else if (original_layout.data_type == data_types::i64) {
@@ -80,14 +80,14 @@ void add_required_reorders::run(program_impl& p) {
                         current_layout = original_layout;
                         current_layout.data_type = data_types::i32;
                         usr->set_output_layout(current_layout, false);
-                        if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                        if (usr->type()->does_possible_implementation_exist(*usr)) {
                             correct_layout_selected = true;
                         } else {
                             current_layout = original_layout;
                             current_layout.data_type = data_types::i32;
                             current_layout.format = node->get_output_layout().format;
                             usr->set_output_layout(current_layout, false);
-                            if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                            if (usr->type()->does_possible_implementation_exist(*usr)) {
                                 correct_layout_selected = true;
                             }
                         }
@@ -148,7 +148,7 @@ void add_required_reorders::run(program_impl& p) {
                                       new_layout_format,
                                       original_layout.size);
                 usr->set_output_layout(current_layout, false);
-                if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                if (usr->type()->does_possible_implementation_exist(*usr)) {
                     correct_layout_selected = true;
                     break;
                 }
@@ -164,7 +164,7 @@ void add_required_reorders::run(program_impl& p) {
 
                     usr->set_output_layout(original_layout_i32, false);
 
-                    if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                    if (usr->type()->does_possible_implementation_exist(*usr)) {
                         correct_layout_selected = true;
                     }
 
@@ -174,7 +174,7 @@ void add_required_reorders::run(program_impl& p) {
                                                   new_layout_format,
                                                   original_layout_i32.size);
                             usr->set_output_layout(current_layout_i32, false);
-                            if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                            if (usr->type()->does_possible_implementation_exist(*usr)) {
                                 correct_layout_selected = true;
                                 break;
                             }
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
index 2a89322c620..477c1508fed 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
@@ -42,7 +42,7 @@ void compile_graph::run(program_impl& p) {
                 auto& node = *(std::next(proc_order.begin(), i));
                 node->set_unique_id(std::to_string(i));
                 if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
-                    node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
+                    node->selected_impl = node->type()->choose_impl(*node);
                 }
             }
         });
@@ -51,7 +51,7 @@ void compile_graph::run(program_impl& p) {
 #else
     for (auto& node : p.get_processing_order()) {
         if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
-            node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
+            node->selected_impl = node->type()->choose_impl(*node);
         }
     }
 #endif
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
index 29581b50e92..bb80f2ed2ed 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
@@ -5,7 +5,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "pass_manager.h"
-#include "gpu/primitive_gpu_base.h"
+#include "impls/ocl/primitive_base.hpp"
 #include "fully_connected/fully_connected_params.h"
 #include <memory>
 #include <stdexcept>
@@ -45,7 +45,7 @@ void post_input_reorder::run(program_impl& p) {
         const auto impl = node->get_selected_impl();
         // add a reorder if primitive's input format doesn't match implementation's input format
         if (node->is_type<fully_connected>()) {
-            const auto& fc_impl = dynamic_cast<const gpu::typed_primitive_gpu_impl<fully_connected>&>(*impl);
+            const auto& fc_impl = dynamic_cast<const ocl::typed_primitive_impl_ocl<fully_connected>&>(*impl);
             const auto& fc_params = *static_cast<kernel_selector::fully_connected_params*>(fc_impl._kernel_data.params.get());
 
             auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout());
@@ -62,7 +62,7 @@ void post_input_reorder::run(program_impl& p) {
                 reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
                 reorder.get_output_layout(false);
                 node->set_output_layout(previous_layout, false);
-                reorder.set_selected_impl(reorder.type()->choose_impl(p.get_engine(), reorder));
+                reorder.set_selected_impl(reorder.type()->choose_impl(reorder));
             }
         }
     }
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
index 85ea94cd476..bc32bc1fdfa 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
@@ -56,7 +56,7 @@ void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
             // Don't run impl selection to avoid double compilation of reorder kernels
             // in main program and internal program for constant propagation
             if (!g_node.is_constant())
-                g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
+                g_node.selected_impl = g_node.type()->choose_impl(g_node);
         }
     }
 
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
index 2e484c00beb..bb6563ee617 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
@@ -27,9 +27,8 @@ void remove_redundant_reorders::run(program_impl& p) {
         if (!update_implementations)
             return;
 
-        auto& eng = p.get_engine();
         node.set_unique_id(node.get_unique_id() + "_reorder");
-        auto new_impl = node.type()->choose_impl(eng, node);
+        auto new_impl = node.type()->choose_impl(node);
         node.set_selected_impl(std::move(new_impl));
     };
 
@@ -300,7 +299,7 @@ void remove_redundant_reorders::run(program_impl& p) {
                 continue;
 
             input.set_output_layout(output_layout, false);
-            if (input.type()->does_possible_implementation_exist(p.get_engine(), input)) {
+            if (input.type()->does_possible_implementation_exist(input)) {
                 p.replace_all_usages(node, input);
                 p.add_optimized_primitive_info(node.id());
                 p.remove_all_connections(node);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
index 15257fe0e95..98cd58b5062 100644
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
@@ -58,7 +58,10 @@ std::map<program_node*, format::type> get_preferred_formats(program_impl& p, lay
             continue;
 
         auto ex = lo.get_preferred_format(*n);
+        auto impl = lo.get_preferred_impl_type(*n);
         fmt_map[n] = ex;
+
+        n->set_preferred_impl_type(impl);
     }
     return fmt_map;
 }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
similarity index 86%
rename from inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
index ed47048e17e..10071a3cfb6 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
@@ -4,23 +4,23 @@
 
 #include "condition_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
-#include "register_gpu.hpp"
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
 
 #include <algorithm>
 #include <vector>
 
 namespace cldnn {
-namespace gpu {
+namespace common {
 
-struct condition_gpu : typed_primitive_impl<condition> {
+struct condition_impl : typed_primitive_impl<condition> {
     const condition_node& outer;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<condition_gpu>(*this);
+        return make_unique<condition_impl>(*this);
     }
 
-    explicit condition_gpu(const condition_node& outer) : outer(outer) {}
+    explicit condition_impl(const condition_node& outer) : outer(outer) {}
 
     event::ptr execute_impl(const std::vector<event::ptr>& events, condition_inst& instance) override {
         for (auto& a : events) {
@@ -42,7 +42,7 @@ struct condition_gpu : typed_primitive_impl<condition> {
         return ev;
     }
 
-    static primitive_impl* create(const condition_node& arg) { return new condition_gpu(arg); }
+    static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); }
 
     void init_kernels() override {}
 
@@ -117,13 +117,13 @@ private:
 
 namespace detail {
 
-attach_condition_gpu::attach_condition_gpu() {
-    implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                        condition_gpu::create);
-    implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                        condition_gpu::create);
+attach_condition_common::attach_condition_common() {
+    implementation_map<condition>::add(impl_types::common, condition_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace common
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
similarity index 94%
rename from inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
index bfcbb551491..fb969c525a2 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/loop_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
@@ -5,25 +5,25 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "loop_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
-#include "register_gpu.hpp"
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
 #include "mutable_data_inst.h"
 #include "input_layout_inst.h"
 #include <vector>
 #include <algorithm>
 
 namespace cldnn {
-namespace gpu {
-struct loop_gpu : typed_primitive_impl<loop> {
+namespace common {
+struct loop_impl : typed_primitive_impl<loop> {
     const loop_node& node;
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<loop_gpu>(*this);
+        return make_unique<loop_impl>(*this);
     }
 
     void init_kernels() override {}
 
-    loop_gpu(const loop_gpu& other) : typed_primitive_impl<loop>(other), node(other.node) {}
-    explicit loop_gpu(const loop_node& node) : node(node) {}
+    loop_impl(const loop_impl& other) : typed_primitive_impl<loop>(other), node(other.node) {}
+    explicit loop_impl(const loop_node& node) : node(node) {}
 
     // read scala value from data primitive
     static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
@@ -216,14 +216,14 @@ struct loop_gpu : typed_primitive_impl<loop> {
         return ev;
     }
 
-    static primitive_impl* create(const loop_node& arg) { return new loop_gpu(arg); }
+    static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); }
 };
 
 namespace detail {
-attach_loop_gpu::attach_loop_gpu() {
-    implementation_map<loop>::add({{engine_types::ocl, loop_gpu::create}});
+attach_loop_common::attach_loop_common() {
+    implementation_map<loop>::add(impl_types::common, loop_impl::create, {});
 }
 }  // namespace detail
 
-}  // namespace gpu
+}  // namespace common
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/common/register.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
new file mode 100644
index 00000000000..38a43e814a4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "register.hpp"
+
+namespace cldnn {
+namespace common {
+
+#define REGISTER_COMMON(prim)                      \
+    static detail::attach_##prim##_common attach_##prim
+
+void register_implementations() {
+    REGISTER_COMMON(condition);
+    REGISTER_COMMON(data);
+    REGISTER_COMMON(input_layout);
+    REGISTER_COMMON(loop);
+    REGISTER_COMMON(prior_box);
+}
+
+}  // namespace common
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/common/register.hpp b/inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
new file mode 100644
index 00000000000..d2d13ed3902
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/primitives/condition.hpp"
+#include "cldnn/primitives/loop.hpp"
+#include "cldnn/primitives/data.hpp"
+#include "cldnn/primitives/input_layout.hpp"
+#include "cldnn/primitives/prior_box.hpp"
+
+
+namespace cldnn {
+namespace common {
+void register_implementations();
+
+namespace detail {
+
+#define REGISTER_COMMON(prim)           \
+    struct attach_##prim##_common {     \
+        attach_##prim##_common();       \
+    }
+
+REGISTER_COMMON(condition);
+REGISTER_COMMON(data);
+REGISTER_COMMON(input_layout);
+REGISTER_COMMON(loop);
+REGISTER_COMMON(prior_box);
+
+#undef REGISTER_COMMON
+
+}  // namespace detail
+}  // namespace common
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
similarity index 55%
rename from inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
index 00c74080a38..9298c3e75e8 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
@@ -6,21 +6,20 @@
 #include "data_inst.h"
 #include "prior_box_inst.h"
 #include "input_layout_inst.h"
-#include "implementation_map.h"
-#include "register_gpu.hpp"
-
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
 #include "network_impl.h"
 #include <vector>
 
 namespace cldnn {
-namespace gpu {
+namespace common {
 
-class wait_for_events_gpu : public primitive_impl {
+class wait_for_events_impl : public primitive_impl {
 public:
-    explicit wait_for_events_gpu(const program_node& /*node*/) {}
+    explicit wait_for_events_impl(const program_node& /*node*/) {}
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<wait_for_events_gpu>(*this);
+        return make_unique<wait_for_events_impl>(*this);
     }
 
     void init_kernels() override {}
@@ -33,32 +32,32 @@ public:
 
     bool validate(const primitive_inst&) const override { return true; }
 
-    static primitive_impl* create_data(const data_node& data) { return new wait_for_events_gpu(data); }
+    static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); }
 
     static primitive_impl* create_input_layout(const input_layout_node& input) {
-        return new wait_for_events_gpu(input);
+        return new wait_for_events_impl(input);
     }
 
     static primitive_impl* create_prior_box(const prior_box_node& prior_box) {
         // This primitive is being executed on CPU during network compilation.
-        return new wait_for_events_gpu(prior_box);
+        return new wait_for_events_impl(prior_box);
     }
 };
 
 namespace detail {
 
-attach_data_gpu::attach_data_gpu() {
-    implementation_map<data>::add({ {engine_types::ocl, wait_for_events_gpu::create_data} });
+attach_data_common::attach_data_common() {
+    implementation_map<data>::add(impl_types::common, wait_for_events_impl::create_data, {});
 }
 
-attach_input_layout_gpu::attach_input_layout_gpu() {
-    implementation_map<input_layout>::add({{engine_types::ocl, wait_for_events_gpu::create_input_layout}});
+attach_input_layout_common::attach_input_layout_common() {
+    implementation_map<input_layout>::add(impl_types::common, wait_for_events_impl::create_input_layout, {});
 }
 
-attach_prior_box_gpu::attach_prior_box_gpu() {
-    implementation_map<prior_box>::add({{engine_types::ocl, wait_for_events_gpu::create_prior_box}});
+attach_prior_box_common::attach_prior_box_common() {
+    implementation_map<prior_box>::add(impl_types::common, wait_for_events_impl::create_prior_box, {});
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace common
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/cpu_impl_helpers.hpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/cpu_impl_helpers.hpp
similarity index 100%
rename from inference-engine/thirdparty/clDNN/src/gpu/cpu_impl_helpers.hpp
rename to inference-engine/thirdparty/clDNN/src/impls/cpu/cpu_impl_helpers.hpp
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
similarity index 98%
rename from inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
index efe7033fb9c..ba1d145e571 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
@@ -4,9 +4,9 @@
 
 #include "detection_output_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "math_utils.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include "cpu_impl_helpers.hpp"
 
 #include <algorithm>
@@ -24,7 +24,7 @@
 #endif
 
 namespace cldnn {
-namespace gpu {
+namespace cpu {
 
 namespace {
     using bounding_box = cldnn::cpu::bounding_box;
@@ -43,15 +43,15 @@ bool comp_score_descend<std::pair<int, int>>(const std::pair<float, std::pair<in
 }
 
 /************************ Detection Output CPU ************************/
-struct detection_output_cpu : typed_primitive_impl<detection_output> {
+struct detection_output_impl : typed_primitive_impl<detection_output> {
     enum NMSType {CAFFE, MXNET};
     const detection_output_node& outer;
     NMSType nms_type;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<detection_output_cpu>(*this);
+        return make_unique<detection_output_impl>(*this);
     }
-    explicit detection_output_cpu(const detection_output_node& outer)
+    explicit detection_output_impl(const detection_output_node& outer)
         : outer(outer)
         , nms_type(outer.get_primitive()->decrease_label_id ? MXNET : CAFFE) {}
 
@@ -822,17 +822,19 @@ struct detection_output_cpu : typed_primitive_impl<detection_output> {
 
     void init_kernels() override {}
 
-    static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); }
+    static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); }
 };
 
 namespace detail {
 
-attach_detection_output_gpu::attach_detection_output_gpu() {
-    implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_cpu::create);
-    implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_cpu::create);
+attach_detection_output_impl::attach_detection_output_impl() {
+    implementation_map<detection_output>::add(impl_types::cpu, detection_output_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx)
+    });
 }
 
 }  // namespace detail
 
-}  // namespace gpu
+}  // namespace cpu
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
similarity index 94%
rename from inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
index f085245b6f6..bbba5d888cc 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/non_max_suppression_cpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
@@ -5,7 +5,7 @@
 #include "non_max_suppression_inst.h"
 #include "primitive_inst.h"
 #include "network_impl.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include "cpu_impl_helpers.hpp"
 
 #include <vector>
@@ -15,7 +15,7 @@
 #include <tuple>
 
 namespace cldnn {
-namespace {
+namespace cpu {
 
 using namespace cldnn::cpu;
 
@@ -372,14 +372,14 @@ void run(non_max_suppression_inst& instance) {
     store_result(stream, instance.output_memory_ptr(), result);
 }
 
-struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
+struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
     using parent = typed_primitive_impl<non_max_suppression>;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<non_max_suppression_cpu>(*this);
+        return make_unique<non_max_suppression_impl>(*this);
     }
 
-    non_max_suppression_cpu() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_cpu") {}
+    non_max_suppression_impl() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_impl") {}
 
     virtual event::ptr execute_impl(const std::vector<event::ptr>& event, typed_primitive_inst<non_max_suppression>& instance) {
         for (auto e : event) {
@@ -396,23 +396,20 @@ struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
     }
 
     static primitive_impl* create(const non_max_suppression_node&) {
-        return new non_max_suppression_cpu();
+        return new non_max_suppression_impl();
     }
     void init_kernels() override {}
 };
-}  // namespace
-
-namespace gpu {
 namespace detail {
 
-attach_non_max_suppression_gpu::attach_non_max_suppression_gpu() {
-    implementation_map<non_max_suppression>::add({
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), non_max_suppression_cpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), non_max_suppression_cpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), non_max_suppression_cpu::create}
+attach_non_max_suppression_impl::attach_non_max_suppression_impl() {
+    implementation_map<non_max_suppression>::add(impl_types::cpu, non_max_suppression_impl::create, {
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
     });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace cpu
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
similarity index 96%
rename from inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
index feccbd92039..38efd84fe67 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
@@ -4,10 +4,10 @@
 
 #include "proposal_inst.h"
 #include "cldnn/runtime/engine.hpp"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
-#include "register_gpu.hpp"
+#include "register.hpp"
 
 #include <algorithm>
 #include <string>
@@ -17,7 +17,7 @@
 #define EPSILON 0.00001f
 
 namespace cldnn {
-namespace gpu {
+namespace cpu {
 
 namespace {
 
@@ -190,13 +190,13 @@ struct im_info_t {
     int min_bbox_y;
 };
 
-struct proposal_gpu : typed_primitive_impl<proposal> {
+struct proposal_impl : typed_primitive_impl<proposal> {
     const proposal_node& outer;
 
-    explicit proposal_gpu(const proposal_node& arg) : outer(arg) {}
+    explicit proposal_impl(const proposal_node& arg) : outer(arg) {}
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<proposal_gpu>(*this);
+        return make_unique<proposal_impl>(*this);
     }
 
     template <typename dtype>
@@ -442,19 +442,19 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
             CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items");
         }
 
-        return new proposal_gpu(arg);
+        return new proposal_impl(arg);
     }
 };
 
 namespace detail {
 
-attach_proposal_gpu::attach_proposal_gpu() {
-    implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                      proposal_gpu::create);
-    implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                      proposal_gpu::create);
+attach_proposal_impl::attach_proposal_impl() {
+    implementation_map<proposal>::add(impl_types::cpu, proposal_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx)
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace cpu
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
new file mode 100644
index 00000000000..6ba6e1aed09
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "register.hpp"
+
+namespace cldnn {
+namespace cpu {
+
+#define REGISTER_CPU(prim)                                \
+    static detail::attach_##prim##_impl attach_##prim
+
+void register_implementations() {
+    REGISTER_CPU(detection_output);
+    REGISTER_CPU(proposal);
+    REGISTER_CPU(non_max_suppression);
+}
+
+}  // namespace cpu
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp b/inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
new file mode 100644
index 00000000000..c8f9319895e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/primitives/detection_output.hpp"
+#include "cldnn/primitives/proposal.hpp"
+#include "cldnn/primitives/non_max_suppression.hpp"
+
+namespace cldnn {
+namespace cpu {
+void register_implementations();
+
+namespace detail {
+
+
+#define REGISTER_CPU(prim)        \
+    struct attach_##prim##_impl { \
+        attach_##prim##_impl();   \
+    }
+
+REGISTER_CPU(proposal);
+REGISTER_CPU(non_max_suppression);
+REGISTER_CPU(detection_output);
+
+#undef REGISTER_CPU
+
+}  // namespace detail
+}  // namespace cpu
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp b/inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
new file mode 100644
index 00000000000..039b557cdb3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
@@ -0,0 +1,188 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <functional>
+#include <typeinfo>
+#include <tuple>
+#include <string>
+
+namespace cldnn {
+
+template <typename T, typename U>
+class singleton_map : public std::map<T, U> {
+    singleton_map() : std::map<T, U>() {}
+    singleton_map(singleton_map const&) = delete;
+    void operator=(singleton_map const&) = delete;
+
+public:
+    static singleton_map& instance() {
+        static singleton_map instance_;
+        return instance_;
+    }
+};
+
+
+struct permute;
+struct reorder;
+struct custom_gpu_primitive;
+struct generic_layer;
+struct reshape;
+struct data;
+struct mutable_data;
+struct input_layout;
+struct prior_box;
+struct loop;
+
+struct primitive_impl;
+
+template <class PType>
+struct typed_program_node;
+
+template <typename primitive_kind>
+struct implementation_key {
+    typedef std::tuple<data_types, format::type> type;
+    type operator()(const typed_program_node<primitive_kind>& primitive) {
+        return std::make_tuple(primitive.get_dependency(0).get_output_layout().data_type,
+                               primitive.get_dependency(0).get_output_layout().format);
+    }
+    type operator()(const layout& proposed_layout) {
+        return std::make_tuple(proposed_layout.data_type, proposed_layout.format);
+    }
+};
+
+template <>
+struct implementation_key<permute> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<permute>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<reorder> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<reorder>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<generic_layer> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<generic_layer>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<custom_gpu_primitive> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<custom_gpu_primitive>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<reshape> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<reshape>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<data> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<data>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<mutable_data> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<mutable_data>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<input_layout> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<input_layout>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<prior_box> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<prior_box>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<loop> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<loop>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <typename primitive_kind>
+class implementation_map {
+public:
+    using key_builder = implementation_key<primitive_kind>;
+    using key_type = typename key_builder::type;
+    using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
+    using map_type = singleton_map<impl_types, std::pair<std::set<key_type>, factory_type>>;
+
+    static factory_type get(const typed_program_node<primitive_kind>& primitive) {
+        impl_types target_impl_type = primitive.get_preferred_impl_type();
+        // lookup in database; throw if not found
+        auto key = key_builder()(primitive);
+        for (auto& kv : map_type::instance()) {
+            impl_types impl_type = kv.first;
+            if ((target_impl_type & impl_type) != impl_type)
+                continue;
+
+            std::set<key_type>& keys_set = kv.second.first;
+            auto& factory = kv.second.second;
+            if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
+                return factory;
+            }
+        }
+        throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
+                                     " could not find any implementation to match key");
+    }
+
+    // check if for a given engine and type there exist an implementation
+    static bool check(const typed_program_node<primitive_kind>& primitive) {
+        impl_types target_impl_type = primitive.get_preferred_impl_type();
+        auto key = key_builder()(primitive);
+        return check_key(target_impl_type, key);
+    }
+
+    // check if there exists a kernel implementation of a primitive with output set it primitive's output layout
+    static bool check_io_eq(const typed_program_node<primitive_kind>& primitive) {
+        impl_types target_impl_type = primitive.get_preferred_impl_type();
+        auto key = key_builder()(primitive.get_output_layout());
+        return check_key(target_impl_type, key);
+    }
+
+    static bool check_key(impl_types target_impl_type, key_type key) {
+        for (auto& kv : map_type::instance()) {
+            impl_types impl_type = kv.first;
+            if ((target_impl_type & impl_type) != impl_type)
+                continue;
+            std::set<key_type>& keys_set = kv.second.first;
+            if (keys_set.empty())
+                return true;
+            return keys_set.find(key) != keys_set.end();
+        }
+        return false;
+    }
+
+    static void add(impl_types impl_type, factory_type factory, std::set<key_type> keys) {
+        if (impl_type == impl_types::any) {
+            throw std::runtime_error("[CLDNN] Can't register impl with type any");
+        }
+        map_type::instance().insert({impl_type, {keys, factory}});
+    }
+};
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
new file mode 100644
index 00000000000..be9290ba986
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
@@ -0,0 +1,123 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "activation_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "activation/activation_kernel_selector.h"
+#include "activation/activation_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct activation_impl : typed_primitive_impl_ocl<activation> {
+    using parent = typed_primitive_impl_ocl<activation>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<activation_impl>(*this);
+    }
+
+    kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+
+        if (_outer.is_parameterized()) {
+            args.slope = instance.slope_memory();
+        }
+
+        return args;
+    }
+
+    static primitive_impl* create(const activation_node& arg) {
+        auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
+        auto activation_optional_params =
+            get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
+
+        convert_new_activation_func(arg.get_primitive(), activation_params.activations);
+
+        if (arg.is_parameterized()) {
+            const auto& slope_layout = arg.slope_input().get_output_layout();
+            const auto& output_layout = arg.get_output_layout();
+
+            const auto params_num =
+                kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
+
+            CLDNN_ERROR_LESS_THAN(arg.id(),
+                                  "Slope layout size count",
+                                  slope_layout.size.count(),
+                                  "output_layout.size.feature[0] * params_num",
+                                  static_cast<size_t>(output_layout.size.feature[0] * params_num),
+                                  "Error - not enough data inside additional params buffer");
+
+            activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
+        }
+
+        auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto activation = new activation_impl(arg, best_kernels[0]);
+
+        return activation;
+    }
+};
+
+namespace detail {
+
+attach_activation_impl::attach_activation_impl() {
+    implementation_map<activation>::add(impl_types::ocl, activation_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i32, format::yxfb),
+        // block f16 format
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        // 3D
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        // bfwzyx
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        // fs_b_yx_fsv32
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/arg_max_min.cpp
similarity index 62%
rename from inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/arg_max_min.cpp
index 7717ea901de..42506385954 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/arg_max_min.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "arg_max_min_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "arg_max_min/arg_max_min_kernel_selector.h"
@@ -12,14 +12,14 @@
 #include "kernel_runner.h"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct arg_max_min_gpu : typed_primitive_gpu_impl<arg_max_min> {
-    using parent = typed_primitive_gpu_impl<arg_max_min>;
+struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
+    using parent = typed_primitive_impl_ocl<arg_max_min>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<arg_max_min_gpu>(*this);
+        return make_unique<arg_max_min_impl>(*this);
     }
 
 protected:
@@ -98,37 +98,27 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto conv = new arg_max_min_gpu(arg, best_kernels[0]);
+        auto conv = new arg_max_min_impl(arg, best_kernels[0]);
 
         return conv;
     }
 };
 
 namespace detail {
-
-    attach_arg_max_min_gpu::attach_arg_max_min_gpu() {
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                             arg_max_min_gpu::create);
-    }
-
+attach_arg_max_min_impl::attach_arg_max_min_impl() {
+    implementation_map<arg_max_min>::add(impl_types::ocl, arg_max_min_impl::create,  {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+    });
+}
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
new file mode 100644
index 00000000000..ab4728bb7ab
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
@@ -0,0 +1,79 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "average_unpooling_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "average_unpooling/average_unpooling_kernel_selector.h"
+#include "average_unpooling/average_unpooling_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct average_unpooling_impl : typed_primitive_impl_ocl<average_unpooling> {
+    using parent = typed_primitive_impl_ocl<average_unpooling>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<average_unpooling_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const average_unpooling_node& arg) {
+        auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
+        auto average_unpooling_optional_params =
+            get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
+        auto& params = average_unpooling_params;
+
+        auto primitive = arg.get_primitive();
+        auto stride = primitive->stride;
+
+        params.unpoolSize = {
+            (uint32_t)primitive->size.spatial[0],
+            (uint32_t)primitive->size.spatial[1],
+        };
+
+        params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
+
+        auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto average_unpool = new average_unpooling_impl(arg, best_kernels[0]);
+
+        return average_unpool;
+    }
+};
+
+namespace detail {
+
+attach_average_unpooling_impl::attach_average_unpooling_impl() {
+    implementation_map<average_unpooling>::add(impl_types::ocl, average_unpooling_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
new file mode 100644
index 00000000000..55c860b5883
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "batch_to_space_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "batch_to_space/batch_to_space_kernel_selector.h"
+#include "batch_to_space/batch_to_space_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include "data_inst.h"
+#include <vector>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+struct batch_to_space_impl : typed_primitive_impl_ocl<batch_to_space> {
+    using parent = typed_primitive_impl_ocl<batch_to_space>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<batch_to_space_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const batch_to_space_node& arg) {
+        auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
+        auto batch_to_space_optional_params =
+            get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
+
+        auto primitive = arg.get_primitive();
+
+        batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
+        batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
+        batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
+
+        auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto batch_to_space = new batch_to_space_impl(arg, best_kernels[0]);
+
+        return batch_to_space;
+    }
+};
+
+namespace detail {
+
+attach_batch_to_space_impl::attach_batch_to_space_impl() {
+    implementation_map<batch_to_space>::add(impl_types::ocl, batch_to_space_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/binary_convolution.cpp
similarity index 90%
rename from inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/binary_convolution.cpp
index c9b4c78eee5..d45b3ab32c2 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/binary_convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/binary_convolution.cpp
@@ -5,8 +5,8 @@
 #include "cldnn/primitives/scale.hpp"
 #include "cldnn/primitives/quantize.hpp"
 #include "binary_convolution_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@@ -16,14 +16,14 @@
 #include <memory>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct binary_convolution_gpu : typed_primitive_gpu_impl<binary_convolution> {
-    using parent = typed_primitive_gpu_impl<binary_convolution>;
+struct binary_convolution_impl : typed_primitive_impl_ocl<binary_convolution> {
+    using parent = typed_primitive_impl_ocl<binary_convolution>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<binary_convolution_gpu>(*this);
+        return make_unique<binary_convolution_impl>(*this);
     }
 
 protected:
@@ -125,7 +125,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto conv = new binary_convolution_gpu(arg, best_kernels[0]);
+        auto conv = new binary_convolution_impl(arg, best_kernels[0]);
 
         return conv;
     }
@@ -133,12 +133,12 @@ public:
 
 namespace detail {
 
-attach_binary_convolution_gpu::attach_binary_convolution_gpu() {
-    implementation_map<binary_convolution>::add(
-        std::make_tuple(engine_types::ocl, data_types::bin, format::b_fs_yx_32fp),
-        binary_convolution_gpu::create);
+attach_binary_convolution_impl::attach_binary_convolution_impl() {
+    implementation_map<binary_convolution>::add(impl_types::ocl, binary_convolution_impl::create, {
+        std::make_tuple(data_types::bin, format::b_fs_yx_32fp),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
new file mode 100644
index 00000000000..83ef5ef102e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "border_inst.h"
+
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "border/border_kernel_selector.h"
+#include "border/border_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct border_impl : typed_primitive_impl_ocl<border> {
+    using parent = typed_primitive_impl_ocl<border>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<border_impl>(*this);
+    }
+
+    static primitive_impl* create(const border_node& arg) {
+        auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
+        auto b_optional_params =
+            get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
+
+        auto desc = arg.get_primitive();
+
+        b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
+        b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
+        b_params.border_value = desc->border_value;
+
+        switch (desc->type) {
+            case border_type::constant:
+                b_params.b_type = kernel_selector::border_type::CONSTANT;
+                break;
+            case border_type::edge:
+                b_params.b_type = kernel_selector::border_type::EDGE;
+                break;
+            case border_type::mirror:
+                b_params.b_type = kernel_selector::border_type::MIRROR;
+                break;
+            case border_type::mirror_101:
+                b_params.b_type = kernel_selector::border_type::MIRROR_101;
+                break;
+            default:
+                assert(
+                    false &&
+                    "Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
+        }
+
+        auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        return new border_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+
+attach_border_impl::attach_border_impl() {
+    implementation_map<border>::add(impl_types::ocl, border_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/broadcast.cpp
similarity index 51%
rename from inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/broadcast.cpp
index 1c07420d7a9..170ef56b10f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/broadcast.cpp
@@ -4,22 +4,22 @@
 
 #include "broadcast_inst.h"
 
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "broadcast/broadcast_kernel_selector.h"
 #include "broadcast/broadcast_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
-    using parent = typed_primitive_gpu_impl<broadcast>;
+struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
+    using parent = typed_primitive_impl_ocl<broadcast>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<broadcast_gpu>(*this);
+        return make_unique<broadcast_impl>(*this);
     }
 
     static primitive_impl* create(const broadcast_node& arg) {
@@ -57,29 +57,29 @@ struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        return new broadcast_gpu(arg, best_kernels[0]);
+        return new broadcast_impl(arg, best_kernels[0]);
     }
 };
 
 namespace detail {
 
-attach_broadcast_gpu::attach_broadcast_gpu() {
-    auto val_fw = broadcast_gpu::create;
-
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
+attach_broadcast_impl::attach_broadcast_impl() {
+    implementation_map<broadcast>::add(impl_types::ocl, broadcast_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
new file mode 100644
index 00000000000..d422df6c640
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
@@ -0,0 +1,159 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "concatenation_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "concatenation/concatenation_kernel_selector.h"
+#include "concatenation/concatenation_kernel_base.h"
+
+#include <initializer_list>
+
+namespace cldnn {
+namespace ocl {
+
+namespace {
+kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
+    switch (axis) {
+        case concatenation::along_x:
+            return kernel_selector::concat_axis::X;
+        case concatenation::along_y:
+            return kernel_selector::concat_axis::Y;
+        case concatenation::along_z:
+            return kernel_selector::concat_axis::Z;
+        case concatenation::along_w:
+            return kernel_selector::concat_axis::W;
+        case concatenation::along_f:
+            return kernel_selector::concat_axis::FEATURE;
+        case concatenation::along_b:
+            return kernel_selector::concat_axis::BATCH;
+        default:
+            return kernel_selector::concat_axis::X;
+    }
+}
+}  // namespace
+
+struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
+    using parent = typed_primitive_impl_ocl<concatenation>;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<concatenation_impl>(*this);
+    }
+
+    concatenation_impl(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
+        if (!_outer.can_be_optimized()) {
+            CLDNN_ERROR_NOT_EQUAL(_outer.id(),
+                                  "Input count",
+                                  _outer.inputs_count(),
+                                  "kds size",
+                                  kd.kernels.size(),
+                                  "Error - not enough kernels for concatenation");
+        }
+    }
+
+protected:
+    bool optimized_out(concatenation_inst& instance) const override {
+        return parent::optimized_out(instance) || _outer.can_be_optimized();
+    }
+
+public:
+    static primitive_impl* create(const concatenation_node& arg) {
+        if (arg.can_be_optimized()) {
+            return new concatenation_impl(arg, {});
+        }
+
+        auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
+        auto concat_optional_params =
+            get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
+        auto axis = arg.get_primitive()->axis;
+
+        concat_params.inputs.resize(arg.inputs_count());
+        for (size_t i = 0; i < arg.inputs_count(); ++i) {
+            const layout& input_layout = arg.input(i).get_output_layout();
+            concat_params.inputs[i] = convert_data_tensor(input_layout);
+        }
+
+        concat_params.axis = convert_axis(axis);
+        concat_optional_params.kernelPerInput = true;
+
+        auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        concatenation_impl* concat = new concatenation_impl(arg, best_kernels[0]);
+
+        return concat;
+    }
+};
+
+namespace detail {
+
+attach_concatenation_impl::attach_concatenation_impl() {
+    implementation_map<concatenation>::add(impl_types::ocl, concatenation_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i64, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i64, format::byxf),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i64, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
similarity index 57%
rename from inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
index 63065d8be87..9a6b514f9bc 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
@@ -4,8 +4,8 @@
 
 #include "convolution_inst.h"
 #include "eltwise_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@@ -15,14 +15,14 @@
 #include <memory>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct convolution_gpu : typed_primitive_gpu_impl<convolution> {
-    using parent = typed_primitive_gpu_impl<convolution>;
+struct convolution_impl : typed_primitive_impl_ocl<convolution> {
+    using parent = typed_primitive_impl_ocl<convolution>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<convolution_gpu>(*this);
+        return make_unique<convolution_impl>(*this);
     }
 
 protected:
@@ -151,7 +151,7 @@ public:
                          "Best_kernel.empty()",
                          best_kernels.empty(),
                          "Cannot find a proper kernel with these arguments");
-        auto conv = new convolution_gpu(arg, best_kernels[0]);
+        auto conv = new convolution_impl(arg, best_kernels[0]);
 
         return conv;
     }
@@ -159,55 +159,49 @@ public:
 
 namespace detail {
 
-attach_convolution_gpu::attach_convolution_gpu() {
-    auto val_fw = convolution_gpu::create;
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::winograd_2x3_s1_data), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::winograd_2x3_s1_data), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    // block f16 format
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    // block i8 format
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-    // MMAD
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
+attach_convolution_impl::attach_convolution_impl() {
+    implementation_map<convolution>::add(impl_types::ocl, convolution_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::winograd_2x3_s1_data),
+        std::make_tuple(data_types::f16, format::winograd_2x3_s1_data),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
new file mode 100644
index 00000000000..f170d07307e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "crop_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "eltwise/eltwise_kernel_selector.h"
+#include "eltwise/eltwise_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct crop_impl : typed_primitive_impl_ocl<crop> {
+    using parent = typed_primitive_impl_ocl<crop>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<crop_impl>(*this);
+    }
+
+protected:
+    bool optimized_out(crop_inst& instance) const override {
+        return parent::optimized_out(instance) || _outer.can_be_optimized();
+    }
+
+public:
+    static primitive_impl* create(const crop_node& arg) {
+        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
+        auto ew_optional_params =
+            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
+
+        ew_params.operations.push_back(
+            {{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
+
+        const auto& input_layout = arg.input().get_output_layout();
+        ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
+
+        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto crop = new crop_impl(arg, best_kernels[0]);
+
+        return crop;
+    }
+};
+
+namespace detail {
+
+attach_crop_impl::attach_crop_impl() {
+    implementation_map<crop>::add(impl_types::ocl, crop_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i64, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i64, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
+        std::make_tuple(data_types::i64, format::fyxb),
+        std::make_tuple(data_types::i32, format::fyxb),
+        std::make_tuple(data_types::i8, format::fyxb),
+        std::make_tuple(data_types::u8, format::fyxb),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i64, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/ctc_greedy_decoder.cpp
similarity index 64%
rename from inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/ctc_greedy_decoder.cpp
index 4c6c7f39c3c..4eb4779a805 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/ctc_greedy_decoder_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/ctc_greedy_decoder.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "ctc_greedy_decoder_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "ctc_greedy_decoder/ctc_greedy_decoder_kernel_selector.h"
@@ -15,14 +15,14 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct ctc_greedy_decoder_gpu : typed_primitive_gpu_impl<ctc_greedy_decoder> {
-    using parent = typed_primitive_gpu_impl<ctc_greedy_decoder>;
+struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl<ctc_greedy_decoder> {
+    using parent = typed_primitive_impl_ocl<ctc_greedy_decoder>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<ctc_greedy_decoder_gpu>(*this);
+        return make_unique<ctc_greedy_decoder_impl>(*this);
     }
 
 public:
@@ -51,7 +51,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto grn = new ctc_greedy_decoder_gpu(arg, best_kernels[0]);
+        auto grn = new ctc_greedy_decoder_impl(arg, best_kernels[0]);
 
         return grn;
     }
@@ -59,13 +59,15 @@ public:
 
 namespace detail {
 
-attach_ctc_greedy_decoder_gpu::attach_ctc_greedy_decoder_gpu() {
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), ctc_greedy_decoder_gpu::create);
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), ctc_greedy_decoder_gpu::create);
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), ctc_greedy_decoder_gpu::create);
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), ctc_greedy_decoder_gpu::create);
+attach_ctc_greedy_decoder_impl::attach_ctc_greedy_decoder_impl() {
+    implementation_map<ctc_greedy_decoder>::add(impl_types::ocl, ctc_greedy_decoder_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/cum_sum.cpp
similarity index 66%
rename from inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/cum_sum.cpp
index 82e44cf07a3..c62649499f8 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/cum_sum_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/cum_sum.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "cum_sum_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "cum_sum/cum_sum_kernel_selector.h"
 #include "cum_sum/cum_sum_kernel_ref.h"
@@ -13,7 +13,7 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
 namespace {
 kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
@@ -36,12 +36,12 @@ kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
 }
 }  // namespace
 
-struct cum_sum_gpu : typed_primitive_gpu_impl<cum_sum> {
-    using parent = typed_primitive_gpu_impl<cum_sum>;
+struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
+    using parent = typed_primitive_impl_ocl<cum_sum>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<cum_sum_gpu>(*this);
+        return make_unique<cum_sum_impl>(*this);
     }
 
 public:
@@ -62,7 +62,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto cum_sum = new cum_sum_gpu(arg, best_kernels[0]);
+        auto cum_sum = new cum_sum_impl(arg, best_kernels[0]);
 
         return cum_sum;
     }
@@ -70,16 +70,17 @@ public:
 
 namespace detail {
 
-attach_cum_sum_gpu::attach_cum_sum_gpu() {
-    auto val_fw = cum_sum_gpu::create;
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+attach_cum_sum_impl::attach_cum_sum_impl() {
+    implementation_map<cum_sum>::add(impl_types::ocl, cum_sum_impl::create, {
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp
similarity index 91%
rename from inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp
index eaecee357b6..24e2373e5ec 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/custom_primitive.cpp
@@ -4,12 +4,12 @@
 
 #include "custom_gpu_primitive_inst.h"
 #include "cldnn/runtime/engine.hpp"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
 #include "jitter.h"
 #include "cldnn/runtime/error_handler.hpp"
-#include "register_gpu.hpp"
+#include "register.hpp"
 
 #include <map>
 #include <sstream>
@@ -17,24 +17,24 @@
 #include <memory>
 #include <string>
 
-using namespace cldnn;
 namespace kernel_selector {
 using jit_constants = kernel_selector::JitConstants;
 }
 
-namespace neural {
+namespace cldnn {
+namespace ocl {
 
-struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
+struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
     const custom_gpu_primitive_node& outer;
     std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
     std::vector<kernel::ptr> _kernels;
     kernel_id _kernel_id;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<custom_gpu_primitive_gpu>(*this);
+        return make_unique<custom_gpu_primitive_impl>(*this);
     }
 
-    custom_gpu_primitive_gpu(const custom_gpu_primitive_gpu& other)
+    custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
     : outer(other.outer)
     , cl_kernel(other.cl_kernel)
     , _kernels({})
@@ -42,7 +42,7 @@ struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
         _kernels.emplace_back(std::move(outer.get_program().get_kernel(_kernel_id)->clone()));
     }
 
-    custom_gpu_primitive_gpu(const custom_gpu_primitive_node& arg,
+    custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
                              std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
         : outer(arg)
         , cl_kernel(cl_kernel)
@@ -224,14 +224,15 @@ static primitive_impl* create(const custom_gpu_primitive_node& arg) {
         cl_kernel->params.arguments.push_back(get_arg(p));
     }
 
-    return new custom_gpu_primitive_gpu(arg, cl_kernel);
-}
-}  // namespace neural
-
-namespace cldnn { namespace gpu { namespace detail {
-
-attach_custom_gpu_primitive_gpu::attach_custom_gpu_primitive_gpu() {
-    implementation_map<custom_gpu_primitive>::add({{cldnn::engine_types::ocl, neural::create}});
+    return new custom_gpu_primitive_impl(arg, cl_kernel);
 }
 
-} } }  // namespace cldnn::gpu::detail
+namespace detail {
+
+attach_custom_gpu_primitive_impl::attach_custom_gpu_primitive_impl() {
+    implementation_map<custom_gpu_primitive>::add(cldnn::impl_types::ocl, create, {});
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
new file mode 100644
index 00000000000..5c93733c20c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
@@ -0,0 +1,148 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "deconvolution_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "deconvolution/deconvolution_kernel_selector.h"
+#include "deconvolution/deconvolution_kernel_base.h"
+#include <algorithm>
+
+namespace cldnn {
+namespace ocl {
+
+struct deconvolution_impl : typed_primitive_impl_ocl<deconvolution> {
+    using parent = typed_primitive_impl_ocl<deconvolution>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<deconvolution_impl>(*this);
+    }
+
+protected:
+    // TODO: share it with convolution and fully connected
+    bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
+        bool res = true;
+
+        CLDNN_ERROR_NOT_EQUAL(_outer.id(),
+                              "deconvolution filling value",
+                              _outer.get_output_layout().data_padding.filling_value(),
+                              "padding mode",
+                              0.0f,
+                              "Unknown padding mode in deconvolution.");
+
+        return res;
+    }
+
+    kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+
+        args.weights = instance.weights_memory(split);
+        args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
+
+        return args;
+    }
+
+    int32_t get_split() const override { return _outer.get_split(); }
+
+    uint32_t get_groups() const override { return _outer.get_groups(); }
+
+public:
+    static primitive_impl* create(const deconvolution_node& arg) {
+        const auto& primitive = arg.get_primitive();
+        const auto& weights_layout = arg.weights(0).get_output_layout();
+
+        const auto& weights_size = weights_layout.size;
+
+        const auto& split = primitive->split();
+        const auto& stride = primitive->stride;
+#if 0  // TODO: support dilation
+        const auto& dilation = primitive->dilation;
+#else
+        const tensor dilation = {0, 0, 1, 1, 1};
+#endif
+        const auto actual_split = split;
+
+        const auto& input_offset = primitive->input_offset;
+        const auto& groups = primitive->groups;
+
+        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
+            arg,
+            (groups > 1) ? 1 : actual_split,
+            1,
+            primitive->grouped_weights_shape);
+        auto deconv_optional_params =
+            get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
+
+        deconv_params.split = split;
+        deconv_params.groups = groups;
+
+        auto spatial_size = arg.get_output_layout().format.dimension() - 2;
+        uint32_t kx = weights_size.spatial[0];
+        uint32_t ky = weights_size.spatial[1];
+        uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
+        deconv_params.filterSize = { kx, ky, kz };
+
+        deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
+                                 (uint32_t)std::max(-input_offset.spatial[1], 0),
+                                 (uint32_t)std::max(-input_offset.spatial[2], 0)};
+
+        deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
+
+        deconv_params.dilation = {(uint32_t)dilation.spatial[0],
+                                  (uint32_t)dilation.spatial[1],
+                                  (uint32_t)dilation.spatial[2]};
+
+        auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with these arguments");
+        auto deconv = new deconvolution_impl(arg, best_kernels[0]);
+
+        return deconv;
+    }
+};
+
+namespace detail {
+
+attach_deconvolution_impl::attach_deconvolution_impl() {
+    implementation_map<deconvolution>::add(impl_types::ocl, deconvolution_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/deformable_convolution.cpp
similarity index 78%
rename from inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/deformable_convolution.cpp
index f2b61a505e2..4e5c3137658 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/deformable_convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/deformable_convolution.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "deformable_convolution_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@@ -13,14 +13,14 @@
 #include <algorithm>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct deformable_conv_gpu : typed_primitive_gpu_impl<deformable_conv> {
-    using parent = typed_primitive_gpu_impl<deformable_conv>;
+struct deformable_conv_impl : typed_primitive_impl_ocl<deformable_conv> {
+    using parent = typed_primitive_impl_ocl<deformable_conv>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<deformable_conv_gpu>(*this);
+        return make_unique<deformable_conv_impl>(*this);
     }
 
 protected:
@@ -71,18 +71,18 @@ public:
                          "Best_kernel.empty()",
                          best_kernels.empty(),
                          "Cannot find a proper kernel with these arguments");
-        auto conv = new deformable_conv_gpu(arg, best_kernels[0]);
+        auto conv = new deformable_conv_impl(arg, best_kernels[0]);
 
         return conv;
     }
 };
 
-struct deformable_interp_gpu : typed_primitive_gpu_impl<deformable_interp> {
-    using parent = typed_primitive_gpu_impl<deformable_interp>;
+struct deformable_interp_impl : typed_primitive_impl_ocl<deformable_interp> {
+    using parent = typed_primitive_impl_ocl<deformable_interp>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<deformable_interp_gpu>(*this);
+        return make_unique<deformable_interp_impl>(*this);
     }
 
 protected:
@@ -139,7 +139,7 @@ public:
                          "Best_kernel.empty()",
                          best_kernels.empty(),
                          "Cannot find a proper kernel with these arguments");
-        auto conv = new deformable_interp_gpu(arg, best_kernels[0]);
+        auto conv = new deformable_interp_impl(arg, best_kernels[0]);
 
         return conv;
     }
@@ -147,20 +147,20 @@ public:
 
 namespace detail {
 
-attach_deformable_conv_gpu::attach_deformable_conv_gpu() {
-    implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             deformable_conv_gpu::create);
-    implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             deformable_conv_gpu::create);
+attach_deformable_conv_impl::attach_deformable_conv_impl() {
+    implementation_map<deformable_conv>::add(impl_types::ocl, deformable_conv_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }
 
-attach_deformable_interp_gpu::attach_deformable_interp_gpu() {
-    implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               deformable_interp_gpu::create);
-    implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               deformable_interp_gpu::create);
+attach_deformable_interp_impl::attach_deformable_interp_impl() {
+    implementation_map<deformable_interp>::add(impl_types::ocl, deformable_interp_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
new file mode 100644
index 00000000000..4e4cf3041ff
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "depth_to_space_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "depth_to_space/depth_to_space_kernel_selector.h"
+#include "depth_to_space/depth_to_space_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include "common_types.h"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+struct depth_to_space_impl : typed_primitive_impl_ocl<depth_to_space> {
+    using parent = typed_primitive_impl_ocl<depth_to_space>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<depth_to_space_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const depth_to_space_node& arg) {
+        auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
+        auto depth_to_space_optional_params =
+            get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
+
+        depth_to_space_params.block_size = arg.get_primitive()->block_size;
+        depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
+                                                                                                    : kernel_selector::depth_to_space_mode::DEPTH_FIRST;
+
+        auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto depth_to_space = new depth_to_space_impl(arg, best_kernels[0]);
+
+        return depth_to_space;
+    }
+};
+
+namespace detail {
+
+attach_depth_to_space_impl::attach_depth_to_space_impl() {
+    implementation_map<depth_to_space>::add(impl_types::ocl, depth_to_space_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
new file mode 100644
index 00000000000..8b3242da0b6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
@@ -0,0 +1,188 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "eltwise_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "eltwise/eltwise_kernel_selector.h"
+#include "eltwise/eltwise_kernel_base.h"
+#include <vector>
+
+namespace cldnn {
+namespace ocl {
+
+struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
+    using parent = typed_primitive_impl_ocl<eltwise>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<eltwise_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const eltwise_node& arg) {
+        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
+        auto ew_optional_params =
+            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
+
+        for (size_t i = 1; i < arg.inputs_count(); i++) {
+            ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
+        }
+
+        const auto& primitive = arg.get_primitive();
+
+        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
+                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
+                                        convert_to_eltwise_mode(primitive->mode)});
+
+        for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
+            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
+                                             kernel_selector::eltwise_params::InputType::Buffer(i)},
+                                            convert_to_eltwise_mode(primitive->mode)});
+        }
+
+        if (primitive->mode == eltwise_mode::sum) {
+            ew_params.coefficients = primitive->coefficients;
+        }
+
+        for (size_t i = 0; i < ew_params.inputs.size(); i++) {
+            if (!ew_params.inputs[i].SameDims(ew_params.output)) {
+                std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
+                std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
+                bool broadcast = false;
+                for (size_t d = 0; d < output_size.size(); d++) {
+                    if (output_size[d] != 1 && input_size[d] == 1)
+                        broadcast = true;
+                }
+                if (broadcast) {
+                    ew_params.broadcast = true;
+                    break;
+                } else {
+                    ew_params.layoutBased = true;
+                    break;
+                }
+            }
+        }
+
+        // stride
+        if (!primitive->stride.empty()) {
+            const auto& stride = primitive->stride;
+            ew_params.stride.resize(stride.size());
+            for (size_t i = 0; i < primitive->stride.size(); i++) {
+                ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
+                                       (uint32_t)stride[i].spatial[1],
+                                       (uint32_t)stride[i].spatial[2]};
+            }
+        }
+
+        // check if strides are the same
+        if (!ew_params.stride.empty()) {
+            const auto& stride = ew_params.stride[0];
+            for (size_t i = 1; i < ew_params.stride.size(); i++) {
+                if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
+                    ew_params.layoutBased = true;
+            }
+        } else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
+            ew_params.broadcast = true;
+        }
+
+        // TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
+        bool quantization = true;
+        for (size_t i = 0; i < arg.inputs_count(); i++) {
+            if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
+                arg.input(i).get_output_layout().data_type != data_types::i8) {
+                quantization = false;
+            }
+        }
+        ew_params.int8_quantization = quantization;
+
+        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto eltwise = new eltwise_impl(arg, best_kernels[0]);
+
+        return eltwise;
+    }
+};
+
+namespace detail {
+
+attach_eltwise_impl::attach_eltwise_impl() {
+    implementation_map<eltwise>::add(impl_types::ocl, eltwise_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i64, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i64, format::byxf),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i64, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/embedding_bag.cpp
similarity index 75%
rename from inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/embedding_bag.cpp
index 9e705b8bcb5..0756a622d91 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/embedding_bag_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/embedding_bag.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "embedding_bag_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "embedding_bag/embedding_bag_kernel_selector.h"
 #include "embedding_bag/embedding_bag_kernel_ref.h"
@@ -14,13 +14,13 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
-struct embedding_bag_gpu : typed_primitive_gpu_impl<embedding_bag> {
-    using parent = typed_primitive_gpu_impl<embedding_bag>;
+namespace ocl {
+struct embedding_bag_impl : typed_primitive_impl_ocl<embedding_bag> {
+    using parent = typed_primitive_impl_ocl<embedding_bag>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<embedding_bag_gpu>(*this);
+        return make_unique<embedding_bag_impl>(*this);
     }
 
 public:
@@ -58,7 +58,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto embedding_bag = new embedding_bag_gpu(arg, best_kernels[0]);
+        auto embedding_bag = new embedding_bag_impl(arg, best_kernels[0]);
 
         return embedding_bag;
     }
@@ -66,12 +66,13 @@ public:
 
 namespace detail {
 
-attach_embedding_bag_gpu::attach_embedding_bag_gpu() {
-    auto val_fw = embedding_bag_gpu::create;
-    implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+attach_embedding_bag_impl::attach_embedding_bag_impl() {
+    implementation_map<embedding_bag>::add(impl_types::ocl, embedding_bag_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/extract_image_patches.cpp
similarity index 55%
rename from inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/extract_image_patches.cpp
index c696821b727..e31d3ec04ca 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/extract_image_patches_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/extract_image_patches.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "extract_image_patches_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 
@@ -12,14 +12,14 @@
 #include "extract_image_patches/extract_image_patches_kernel_ref.h"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct extract_image_patches_gpu : typed_primitive_gpu_impl<extract_image_patches> {
-    using parent = typed_primitive_gpu_impl<extract_image_patches>;
+struct extract_image_patches_impl : typed_primitive_impl_ocl<extract_image_patches> {
+    using parent = typed_primitive_impl_ocl<extract_image_patches>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<extract_image_patches_gpu>(*this);
+        return make_unique<extract_image_patches_impl>(*this);
     }
 
 public:
@@ -41,7 +41,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto extract_image_patches = new extract_image_patches_gpu(arg, best_kernels[0]);
+        auto extract_image_patches = new extract_image_patches_impl(arg, best_kernels[0]);
 
         return extract_image_patches;
     }
@@ -49,16 +49,17 @@ public:
 
 namespace detail {
 
-attach_extract_image_patches_gpu::attach_extract_image_patches_gpu() {
-    implementation_map<extract_image_patches>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), extract_image_patches_gpu::create}});
+attach_extract_image_patches_impl::attach_extract_image_patches_impl() {
+    implementation_map<extract_image_patches>::add(impl_types::ocl, extract_image_patches_impl::create, {
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
similarity index 54%
rename from inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
index 2f4a6b48f7e..a0a17c48df7 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
@@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "fully_connected_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "fully_connected/fully_connected_kernel_selector.h"
 #include "fully_connected/fully_connected_params.h"
@@ -20,14 +20,14 @@
 #include <memory>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct fully_connected_gpu : typed_primitive_gpu_impl<fully_connected> {
-    using parent = typed_primitive_gpu_impl<fully_connected>;
+struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
+    using parent = typed_primitive_impl_ocl<fully_connected>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<fully_connected_gpu>(*this);
+        return make_unique<fully_connected_impl>(*this);
     }
 
 protected:
@@ -71,7 +71,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto fc = new fully_connected_gpu(arg, best_kernels[0]);
+        auto fc = new fully_connected_impl(arg, best_kernels[0]);
 
         return fc;
     }
@@ -79,34 +79,29 @@ public:
 
 namespace detail {
 
-attach_fully_connected_gpu::attach_fully_connected_gpu() {
-    auto val_fw = fully_connected_gpu::create;
-
-    implementation_map<fully_connected>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
-        // MMAD
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw},
-        // IMAD
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw},
-        // fs_b_yx_fsv32
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
+attach_fully_connected_impl::attach_fully_connected_impl() {
+    implementation_map<fully_connected>::add(impl_types::ocl, fully_connected_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
     });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/fused_conv_eltwise.cpp
similarity index 62%
rename from inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/fused_conv_eltwise.cpp
index 9e608af17b1..d0472330d7f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/fused_conv_eltwise.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "fused_conv_eltwise_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@@ -14,14 +14,14 @@
 #include <memory>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct fused_conv_eltwise_gpu : typed_primitive_gpu_impl<fused_conv_eltwise> {
-    using parent = typed_primitive_gpu_impl<fused_conv_eltwise>;
+struct fused_conv_eltwise_impl : typed_primitive_impl_ocl<fused_conv_eltwise> {
+    using parent = typed_primitive_impl_ocl<fused_conv_eltwise>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<fused_conv_eltwise_gpu>(*this);
+        return make_unique<fused_conv_eltwise_impl>(*this);
     }
 
 protected:
@@ -139,7 +139,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto conv = new fused_conv_eltwise_gpu(arg, best_kernels[0]);
+        auto conv = new fused_conv_eltwise_impl(arg, best_kernels[0]);
 
         return conv;
     }
@@ -147,42 +147,27 @@ public:
 
 namespace detail {
 
-attach_fused_conv_eltwise_gpu::attach_fused_conv_eltwise_gpu() {
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    // IMAD
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::image_2d_rgba),
-                                                fused_conv_eltwise_gpu::create);
+attach_fused_conv_eltwise_impl::attach_fused_conv_eltwise_impl() {
+    implementation_map<fused_conv_eltwise>::add(impl_types::ocl, fused_conv_eltwise_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::image_2d_rgba),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather.cpp
similarity index 60%
rename from inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/gather.cpp
index d9791de4f2d..87333f16fe2 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "gather_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "gather/gather_kernel_selector.h"
 #include "gather/gather_kernel_ref.h"
@@ -13,7 +13,7 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
     switch (axis) {
         case gather::along_x:
@@ -33,12 +33,12 @@ kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
     }
 }
 
-struct gather_gpu : typed_primitive_gpu_impl<gather> {
-    using parent = typed_primitive_gpu_impl<gather>;
+struct gather_impl : typed_primitive_impl_ocl<gather> {
+    using parent = typed_primitive_impl_ocl<gather>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gather_gpu>(*this);
+        return make_unique<gather_impl>(*this);
     }
 
 public:
@@ -61,7 +61,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto gather = new gather_gpu(arg, best_kernels[0]);
+        auto gather = new gather_impl(arg, best_kernels[0]);
 
         return gather;
     }
@@ -69,21 +69,20 @@ public:
 
 namespace detail {
 
-attach_gather_gpu::attach_gather_gpu() {
-    auto val_fw = gather_gpu::create;
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+attach_gather_impl::attach_gather_impl() {
+    implementation_map<gather>::add(impl_types::ocl, gather_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
new file mode 100644
index 00000000000..f5942fa7491
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gather_nd_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "gather/gather_nd_kernel_selector.h"
+#include "gather/gather_nd_kernel_ref.h"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
+    using parent = typed_primitive_impl_ocl<gather_nd>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<gather_nd_impl>(*this);
+    }
+
+    static primitive_impl* create(const gather_nd_node& arg) {
+        auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
+        auto gather_nd_optional_params =
+            get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
+
+        gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
+        gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
+
+        gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+
+        auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto gather_nd = new gather_nd_impl(arg, best_kernels[0]);
+
+        return gather_nd;
+    }
+};
+
+namespace detail {
+
+attach_gather_nd_impl::attach_gather_nd_impl() {
+    implementation_map<gather_nd>::add(impl_types::ocl, gather_nd_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_tree.cpp
similarity index 52%
rename from inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/gather_tree.cpp
index f0ae5fcdbc6..05269e90399 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/gather_tree_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_tree.cpp
@@ -4,22 +4,22 @@
 
 #include "gather_tree_inst.h"
 
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "gather_tree/gather_tree_kernel_selector.h"
 #include "gather_tree/gather_tree_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
-    using parent = typed_primitive_gpu_impl<gather_tree>;
+struct gather_tree_impl : typed_primitive_impl_ocl<gather_tree> {
+    using parent = typed_primitive_impl_ocl<gather_tree>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gather_tree_gpu>(*this);
+        return make_unique<gather_tree_impl>(*this);
     }
 
     static primitive_impl* create(const gather_tree_node& arg) {
@@ -39,22 +39,21 @@ struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
             best_kernels.empty(),
             "Cannot find a proper kernel with this arguments");
 
-        return new gather_tree_gpu(arg, best_kernels[0]);
+        return new gather_tree_impl(arg, best_kernels[0]);
     }
 };
 namespace detail {
-    attach_gather_tree_gpu::attach_gather_tree_gpu() {
-            auto val_fw = gather_tree_gpu::create;
-
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        }
+attach_gather_tree_impl::attach_gather_tree_impl() {
+    implementation_map<gather_tree>::add(impl_types::ocl, gather_tree_impl::create, {
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+    });
+}
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/gemm.cpp
similarity index 52%
rename from inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/gemm.cpp
index 9a6a76802dc..4ac001cdfc7 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/gemm_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gemm.cpp
@@ -4,22 +4,22 @@
 
 #include "gemm_inst.h"
 
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "gemm/gemm_kernel_selector.h"
 #include "gemm/gemm_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct gemm_gpu : typed_primitive_gpu_impl<gemm> {
-    using parent = typed_primitive_gpu_impl<gemm>;
+struct gemm_impl : typed_primitive_impl_ocl<gemm> {
+    using parent = typed_primitive_impl_ocl<gemm>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gemm_gpu>(*this);
+        return make_unique<gemm_impl>(*this);
     }
 
 public:
@@ -53,28 +53,29 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        return new gemm_gpu(arg, best_kernels[0]);
+        return new gemm_impl(arg, best_kernels[0]);
     }
 };
 
 namespace detail {
 
-attach_gemm_gpu::attach_gemm_gpu() {
-    auto val_fw = gemm_gpu::create;
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
+attach_gemm_impl::attach_gemm_impl() {
+    implementation_map<gemm>::add(impl_types::ocl, gemm_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
similarity index 82%
rename from inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
index 484d7d66702..f4e4cbf7713 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/generic_layer_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
@@ -4,38 +4,37 @@
 
 #include "generic_layer_inst.h"
 #include "cldnn/runtime/engine.hpp"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include <vector>
 
-using namespace cldnn;
+namespace cldnn {
+namespace ocl {
 
-namespace neural {
-
-struct generic_layer_gpu : typed_primitive_impl<generic_layer> {
+struct generic_layer_impl : typed_primitive_impl<generic_layer> {
     const generic_layer_node& outer;
     const kernel_selector::cl_kernel_data& _cl_kernel_data;
     std::vector<kernel::ptr> _kernels;
     kernel_id _kernel_id;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<generic_layer_gpu>(*this);
+        return make_unique<generic_layer_impl>(*this);
     }
 
-    generic_layer_gpu(const generic_layer_gpu& other)
+    generic_layer_impl(const generic_layer_impl& other)
     : outer(other.outer)
     , _cl_kernel_data(other._cl_kernel_data)
     , _kernels({})
     , _kernel_id(other._kernel_id) {
         if (other._kernels.empty()) {
-            throw std::runtime_error("Can't copy generic_layer_gpu node: kernels vector is empty");
+            throw std::runtime_error("Can't copy generic_layer_impl node: kernels vector is empty");
         }
         _kernels.push_back(other._kernels.front()->clone());
     }
 
-    generic_layer_gpu(const generic_layer_node& arg)
+    generic_layer_impl(const generic_layer_node& arg)
         : outer(arg)
         , _cl_kernel_data(*outer.get_primitive()->generic_params.clKernel.get())
         , _kernels() {
@@ -109,17 +108,17 @@ struct generic_layer_cpu : typed_primitive_impl<generic_layer> {
 
 static primitive_impl* create(const generic_layer_node& arg) {
     if (arg.get_primitive()->generic_params.engine == kernel_selector::generic_kernel_params::Engine::GPU) {
-        return new generic_layer_gpu(arg);
+        return new generic_layer_impl(arg);
     } else {
         return new generic_layer_cpu(arg);
     }
 }
 
-}  // namespace neural
+namespace detail {
+attach_generic_layer_impl::attach_generic_layer_impl() {
+    implementation_map<generic_layer>::add(cldnn::impl_types::ocl, create, {});
+}
 
-namespace cldnn { namespace gpu { namespace detail {
-    attach_generic_layer_gpu::attach_generic_layer_gpu() {
-        implementation_map<generic_layer>::add({ {cldnn::engine_types::ocl, neural::create} });
-    }
-
-} } }  // namespace cldnn::gpu::detail
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/grn.cpp
similarity index 68%
rename from inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/grn.cpp
index 5b9d8c9145c..c254a70f7a9 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/grn_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/grn.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "grn_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "grn/grn_kernel_selector.h"
@@ -15,14 +15,14 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct grn_gpu : typed_primitive_gpu_impl<grn> {
-    using parent = typed_primitive_gpu_impl<grn>;
+struct grn_impl : typed_primitive_impl_ocl<grn> {
+    using parent = typed_primitive_impl_ocl<grn>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<grn_gpu>(*this);
+        return make_unique<grn_impl>(*this);
     }
 
 public:
@@ -40,7 +40,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto grn = new grn_gpu(arg, best_kernels[0]);
+        auto grn = new grn_impl(arg, best_kernels[0]);
 
         return grn;
     }
@@ -48,11 +48,13 @@ public:
 
 namespace detail {
 
-attach_grn_gpu::attach_grn_gpu() {
-    implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), grn_gpu::create);
-    implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), grn_gpu::create);
+attach_grn_impl::attach_grn_impl() {
+    implementation_map<grn>::add(impl_types::ocl, grn_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
new file mode 100644
index 00000000000..9edbd978be6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "lrn_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "lrn/lrn_kernel_selector.h"
+#include "lrn/lrn_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct lrn_impl : typed_primitive_impl_ocl<lrn> {
+    using parent = typed_primitive_impl_ocl<lrn>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<lrn_impl>(*this);
+    }
+
+    static primitive_impl* create(const lrn_node& arg) {
+        auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
+        auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
+
+        const auto& primitive = arg.get_primitive();
+
+        lrn_params.alpha = primitive->alpha;
+        lrn_params.beta = primitive->beta;
+        lrn_params.k = primitive->k;
+        lrn_params.localSize = primitive->size;
+        lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
+        lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
+                                  ? kernel_selector::lrn_mode::WITHIN_CHANNEL
+                                  : kernel_selector::lrn_mode::ACROSS_CHANNEL;
+
+        auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto lrn = new lrn_impl(arg, best_kernels[0]);
+
+        return lrn;
+    }
+};
+
+namespace detail {
+
+attach_lrn_impl::attach_lrn_impl() {
+    implementation_map<lrn>::add(impl_types::ocl, lrn_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
similarity index 77%
rename from inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
index 4a8a4ee5d75..937d8390ea5 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_input_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
@@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "lstm_dynamic_input_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm_dynamic/lstm_dynamic_input_kernel_selector.h"
 #include "lstm_dynamic/lstm_dynamic_input_kernel_base.h"
@@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct lstm_dynamic_input_gpu : typed_primitive_gpu_impl<lstm_dynamic_input> {
-    using parent = typed_primitive_gpu_impl<lstm_dynamic_input>;
+struct lstm_dynamic_input_impl : typed_primitive_impl_ocl<lstm_dynamic_input> {
+    using parent = typed_primitive_impl_ocl<lstm_dynamic_input>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_dynamic_input_gpu>(*this);
+        return make_unique<lstm_dynamic_input_impl>(*this);
     }
 
 protected:
@@ -64,7 +64,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto lstm_dynamic = new lstm_dynamic_input_gpu(arg, best_kernels[0]);
+        auto lstm_dynamic = new lstm_dynamic_input_impl(arg, best_kernels[0]);
 
         return lstm_dynamic;
     }
@@ -72,15 +72,13 @@ public:
 
 namespace detail {
 
-attach_lstm_dynamic_input_gpu::attach_lstm_dynamic_input_gpu() {
-    auto val_fw = lstm_dynamic_input_gpu::create;
-
-    implementation_map<lstm_dynamic_input>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+attach_lstm_dynamic_input_impl::attach_lstm_dynamic_input_impl() {
+    implementation_map<lstm_dynamic_input>::add(impl_types::ocl, lstm_dynamic_input_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
     });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
similarity index 83%
rename from inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
index 9c367d5d994..8834065f369 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_dynamic_timeloop_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
@@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "lstm_dynamic_timeloop_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_selector.h"
 #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h"
@@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct lstm_dynamic_timeloop_gpu : typed_primitive_gpu_impl<lstm_dynamic_timeloop> {
-    using parent = typed_primitive_gpu_impl<lstm_dynamic_timeloop>;
+struct lstm_dynamic_timeloop_impl : typed_primitive_impl_ocl<lstm_dynamic_timeloop> {
+    using parent = typed_primitive_impl_ocl<lstm_dynamic_timeloop>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_dynamic_timeloop_gpu>(*this);
+        return make_unique<lstm_dynamic_timeloop_impl>(*this);
     }
 
 protected:
@@ -85,7 +85,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto lstm_dynamic = new lstm_dynamic_timeloop_gpu(arg, best_kernels[0]);
+        auto lstm_dynamic = new lstm_dynamic_timeloop_impl(arg, best_kernels[0]);
 
         return lstm_dynamic;
     }
@@ -93,15 +93,13 @@ public:
 
 namespace detail {
 
-attach_lstm_dynamic_timeloop_gpu::attach_lstm_dynamic_timeloop_gpu() {
-    auto val_fw = lstm_dynamic_timeloop_gpu::create;
-
-    implementation_map<lstm_dynamic_timeloop>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+attach_lstm_dynamic_timeloop_impl::attach_lstm_dynamic_timeloop_impl() {
+    implementation_map<lstm_dynamic_timeloop>::add(impl_types::ocl, lstm_dynamic_timeloop_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
     });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
similarity index 81%
rename from inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
index ed558133306..9a7e30ed2ce 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
@@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "lstm_elt_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm/lstm_elt_kernel_selector.h"
 #include "lstm/lstm_elt_kernel_base.h"
@@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct lstm_elt_gpu : typed_primitive_gpu_impl<lstm_elt> {
-    using parent = typed_primitive_gpu_impl<lstm_elt>;
+struct lstm_elt_impl : typed_primitive_impl_ocl<lstm_elt> {
+    using parent = typed_primitive_impl_ocl<lstm_elt>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_elt_gpu>(*this);
+        return make_unique<lstm_elt_impl>(*this);
     }
 
 protected:
@@ -85,7 +85,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto lstm_elt = new lstm_elt_gpu(arg, best_kernels[0]);
+        auto lstm_elt = new lstm_elt_impl(arg, best_kernels[0]);
 
         return lstm_elt;
     }
@@ -93,17 +93,15 @@ public:
 
 namespace detail {
 
-attach_lstm_elt_gpu::attach_lstm_elt_gpu() {
-    auto val_fw = lstm_elt_gpu::create;
-
-    implementation_map<lstm_elt>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
+attach_lstm_elt_impl::attach_lstm_elt_impl() {
+    implementation_map<lstm_elt>::add(impl_types::ocl, lstm_elt_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
     });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
similarity index 80%
rename from inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
index f6c532decd0..540067ed7b9 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
@@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include "lstm_gemm_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm/lstm_gemm_kernel_selector.h"
 #include "lstm/lstm_gemm_kernel_base.h"
@@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct lstm_gemm_gpu : typed_primitive_gpu_impl<lstm_gemm> {
-    using parent = typed_primitive_gpu_impl<lstm_gemm>;
+struct lstm_gemm_impl : typed_primitive_impl_ocl<lstm_gemm> {
+    using parent = typed_primitive_impl_ocl<lstm_gemm>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_gemm_gpu>(*this);
+        return make_unique<lstm_gemm_impl>(*this);
     }
 
 protected:
@@ -82,7 +82,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto lstm_gemm = new lstm_gemm_gpu(arg, best_kernels[0]);
+        auto lstm_gemm = new lstm_gemm_impl(arg, best_kernels[0]);
 
         return lstm_gemm;
     }
@@ -90,17 +90,15 @@ public:
 
 namespace detail {
 
-attach_lstm_gemm_gpu::attach_lstm_gemm_gpu() {
-    auto val_fw = lstm_gemm_gpu::create;
-
-    implementation_map<lstm_gemm>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
+attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
+    implementation_map<lstm_gemm>::add(impl_types::ocl, lstm_gemm_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
     });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
similarity index 51%
rename from inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
index 2daaaa0ddc2..ebf9cf74897 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/max_unpooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "max_unpooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "network_impl.h"
 #include "kernel_selector_helper.h"
@@ -13,14 +13,14 @@
 #include <vector>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct max_unpooling_gpu : typed_primitive_gpu_impl<max_unpooling> {
-    using parent = typed_primitive_gpu_impl<max_unpooling>;
+struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
+    using parent = typed_primitive_impl_ocl<max_unpooling>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<max_unpooling_gpu>(*this);
+        return make_unique<max_unpooling_impl>(*this);
     }
 
 protected:
@@ -55,7 +55,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto max_unpool = new max_unpooling_gpu(arg, best_kernels[0]);
+        auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);
 
         return max_unpool;
     }
@@ -63,27 +63,20 @@ public:
 
 namespace detail {
 
-attach_max_unpooling_gpu::attach_max_unpooling_gpu() {
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                           max_unpooling_gpu::create);
+attach_max_unpooling_impl::attach_max_unpooling_impl() {
+    implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
new file mode 100644
index 00000000000..1d67d7137c7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mutable_data_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct mutable_data_impl : public typed_primitive_impl_ocl<mutable_data> {
+    using parent = typed_primitive_impl_ocl<mutable_data>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<mutable_data_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_impl(arg, {}); }
+};
+
+namespace detail {
+
+attach_mutable_data_impl::attach_mutable_data_impl() {
+    implementation_map<mutable_data>::add(impl_types::ocl, mutable_data_impl::create, {});
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
new file mode 100644
index 00000000000..3a04d5ecc6d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvn_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "mvn/mvn_kernel_selector.h"
+#include "mvn/mvn_kernel_base.h"
+
+#include <algorithm>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct mvn_impl : typed_primitive_impl_ocl<mvn> {
+    using parent = typed_primitive_impl_ocl<mvn>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<mvn_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const mvn_node& arg) {
+        auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
+        auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
+
+        mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
+                                                                  : kernel_selector::mvn_mode::WITHIN_CHANNELS;
+        mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
+        mvn_params.epsilon = arg.get_primitive()->epsilon;
+
+        mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
+                                                                     : kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
+
+        auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto mvn = new mvn_impl(arg, best_kernels[0]);
+
+        return mvn;
+    }
+};
+
+namespace detail {
+
+attach_mvn_impl::attach_mvn_impl() {
+    implementation_map<mvn>::add(impl_types::ocl, mvn_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
new file mode 100644
index 00000000000..22414d656a5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "normalize_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "normalize/normalize_kernel_selector.h"
+#include "normalize/normalize_kernel_base.h"
+
+#include <algorithm>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct normalize_impl : typed_primitive_impl_ocl<normalize> {
+    using parent = typed_primitive_impl_ocl<normalize>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<normalize_impl>(*this);
+    }
+
+protected:
+     kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        args.scale_table = instance.scale_memory();
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const normalize_node& arg) {
+        auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
+        auto norm_optional_params =
+            get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
+
+        const auto& scale_layout = arg.scale().get_output_layout();
+
+        norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
+                                                                   : kernel_selector::normalize_mode::WITHIN_SPATIAL;
+        norm_params.epsilon = arg.get_primitive()->epsilon;
+        norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
+
+        auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto lrn = new normalize_impl(arg, best_kernels[0]);
+
+        return lrn;
+    }
+};
+
+namespace detail {
+
+attach_normalize_impl::attach_normalize_impl() {
+    implementation_map<normalize>::add(impl_types::ocl, normalize_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
new file mode 100644
index 00000000000..186a7908a68
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "one_hot_inst.h"
+
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "one_hot/one_hot_kernel_selector.h"
+#include "one_hot/one_hot_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include <vector>
+
+namespace cldnn {
+namespace ocl {
+
+struct one_hot_impl : typed_primitive_impl_ocl<one_hot> {
+    using parent = typed_primitive_impl_ocl<one_hot>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<one_hot_impl>(*this);
+    }
+
+    static primitive_impl* create(const one_hot_node& arg) {
+        auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
+        auto oh_optional_params =
+            get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
+
+        oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
+        oh_params.on_value = arg.get_primitive()->on_value;
+        oh_params.off_value = arg.get_primitive()->off_value;
+
+        auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
+                            arg.get_output_layout().size.sizes(format::bfzyx) :
+                            arg.get_output_layout().size.sizes(format::bfyx);
+
+        oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
+
+        auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with these arguments");
+
+        return new one_hot_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+
+attach_one_hot_impl::attach_one_hot_impl() {
+    implementation_map<one_hot>::add(impl_types::ocl, one_hot_impl::create, {
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/permute.cpp
similarity index 74%
rename from inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/permute.cpp
index 4c8e1f54975..d014e6287a3 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/permute.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "permute_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "permute/permute_kernel_selector.h"
@@ -13,14 +13,14 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct permute_gpu : typed_primitive_gpu_impl<permute> {
-    using parent = typed_primitive_gpu_impl<permute>;
+struct permute_impl : typed_primitive_impl_ocl<permute> {
+    using parent = typed_primitive_impl_ocl<permute>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<permute_gpu>(*this);
+        return make_unique<permute_impl>(*this);
     }
 
     static primitive_impl* create(const permute_node& arg) {
@@ -38,7 +38,7 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto permute = new permute_gpu(arg, best_kernels[0]);
+        auto permute = new permute_impl(arg, best_kernels[0]);
 
         return permute;
     }
@@ -46,12 +46,10 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
 
 namespace detail {
 
-attach_permute_gpu::attach_permute_gpu() {
-    implementation_map<permute>::add({
-        {engine_types::ocl, permute_gpu::create},
-    });
+attach_permute_impl::attach_permute_impl() {
+    implementation_map<permute>::add(impl_types::ocl, permute_impl::create, {});
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/pooling.cpp
similarity index 51%
rename from inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/pooling.cpp
index a10439bb1c9..952adc3c8a5 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/pooling.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "pooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "pooling/pooling_kernel_selector.h"
@@ -12,7 +12,7 @@
 #include <algorithm>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
 namespace {
 void validate_args(const pooling_node& arg) {
@@ -64,12 +64,12 @@ kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mo
 }
 }  // namespace
 
-struct pooling_gpu : typed_primitive_gpu_impl<pooling> {
-    using parent = typed_primitive_gpu_impl<pooling>;
+struct pooling_impl : typed_primitive_impl_ocl<pooling> {
+    using parent = typed_primitive_impl_ocl<pooling>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<pooling_gpu>(*this);
+        return make_unique<pooling_impl>(*this);
     }
 
 protected:
@@ -148,7 +148,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto pool = new pooling_gpu(arg, best_kernels[0]);
+        auto pool = new pooling_impl(arg, best_kernels[0]);
 
         return pool;
     }
@@ -156,66 +156,57 @@ public:
 
 namespace detail {
 
-attach_pooling_gpu::attach_pooling_gpu() {
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), pooling_gpu::create);
+attach_pooling_impl::attach_pooling_impl() {
+    implementation_map<pooling>::add(impl_types::ocl, pooling_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.cpp
similarity index 87%
rename from inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.cpp
index 9e4479695c8..63e1e8bd0bb 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.cpp
@@ -2,11 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "primitive_gpu_base.h"
+#include "primitive_base.hpp"
 #include <list>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
 bool is_user_cpu(const program_node* user) {
     if (user->can_be_optimized()) {
@@ -28,5 +28,5 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
     }
     return false;
 }
-}  // namespace gpu
-}  // namespace cldnn
\ No newline at end of file
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
similarity index 96%
rename from inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
index 9cd55a0f9f4..3f6f82c57bd 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
@@ -11,30 +11,30 @@
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include <vector>
 #include <list>
 #include <utility>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
 // checks if any user in a list is a cpu primitive
 bool is_any_user_cpu(const std::list<const program_node*>& users);
 
 /*
 Base class for all GPU implementation of specified primitive type.
-For example, all gpu convolution implementations should derive from typed_primitive_gpu_impl<convolution>.
+For example, all gpu convolution implementations should derive from typed_primitive_impl_ocl<convolution>.
 */
 template <class PType>
-struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
+struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
     const typed_program_node<PType>& _outer;
     kernel_selector::kernel_data _kernel_data;
     std::vector<kernel_id> _kernel_ids;
     std::vector<kernel::ptr> _kernels;
     std::vector<memory::cptr> _intermediates_memory;
 
-    typed_primitive_gpu_impl(const typed_primitive_gpu_impl<PType>& other)
+    typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
     : typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name)
     , _outer(other._outer)
     , _kernel_data(other._kernel_data)
@@ -52,7 +52,7 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
         }
     }
 
-    typed_primitive_gpu_impl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
+    typed_primitive_impl_ocl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
         : typed_primitive_impl<PType>(kd.weightsReorderParams, kd.kernelName),
           _outer(arg),
           _kernel_data(kd) {
@@ -199,5 +199,5 @@ protected:
     }
 };
 
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
similarity index 59%
rename from inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
index 8259257d92c..b93bf0ec88c 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "pyramid_roi_align/pyramid_roi_align_kernel_selector.h"
 #include "pyramid_roi_align/pyramid_roi_align_kernel_base.h"
@@ -14,14 +14,14 @@
 #include <cmath>
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
-    using parent = typed_primitive_gpu_impl<pyramid_roi_align>;
+struct pyramid_roi_align_impl : typed_primitive_impl_ocl<pyramid_roi_align> {
+    using parent = typed_primitive_impl_ocl<pyramid_roi_align>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<pyramid_roi_align_gpu>(*this);
+        return make_unique<pyramid_roi_align_impl>(*this);
     }
 
     static primitive_impl* create(const pyramid_roi_align_node& arg) {
@@ -54,28 +54,23 @@ struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        return new pyramid_roi_align_gpu(arg, best_kernels[0]);
+        return new pyramid_roi_align_impl(arg, best_kernels[0]);
     }
 };
 
 namespace detail {
 
-attach_pyramid_roi_align_gpu::attach_pyramid_roi_align_gpu() {
-    auto val_fw = pyramid_roi_align_gpu::create;
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                               val_fw);
+attach_pyramid_roi_align_impl::attach_pyramid_roi_align_impl() {
+    implementation_map<pyramid_roi_align>::add(impl_types::ocl, pyramid_roi_align_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f16, format::byxf),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
new file mode 100644
index 00000000000..c90d744b1de
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
@@ -0,0 +1,160 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "quantize_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "quantize/quantize_kernel_selector.h"
+#include "quantize/quantize_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct quantize_impl : typed_primitive_impl_ocl<quantize> {
+    using parent = typed_primitive_impl_ocl<quantize>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<quantize_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
+        kernel_arguments_data args;
+
+        for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
+            args.inputs.push_back(instance.input_memory_ptr(i));
+        }
+        if (instance.node.get_scale_shift_opt()) {
+            if (instance.node.get_dependencies().size() == 9) {
+                args.inputs.push_back(instance.dep_memory_ptr(5));
+                args.inputs.push_back(instance.dep_memory_ptr(6));
+                args.inputs.push_back(instance.dep_memory_ptr(7));
+                args.inputs.push_back(instance.dep_memory_ptr(8));
+            }
+        }
+        args.output = instance.output_memory_ptr();
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const quantize_node& arg) {
+        auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
+        auto quantize_optional_params =
+            get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
+
+        quantize_params.levels = arg.get_levels();
+        quantize_params.packed_binary_output = arg.get_packed_binary_output();
+        quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
+        quantize_params.has_post_scale = arg.get_need_post_scale();
+        quantize_params.has_post_shift = arg.get_need_post_shift();
+        quantize_params.has_pre_shift = arg.get_need_pre_shift();
+        quantize_params.has_clamp = arg.get_need_clamp();
+
+        quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
+        quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
+        quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
+        quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
+        quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
+
+        quantize_params.in_lo = arg.get_input_lo_val();
+        quantize_params.in_hi = arg.get_input_hi_val();
+        quantize_params.in_scale = arg.get_input_scale_val();
+        quantize_params.in_shift = arg.get_input_shift_val();
+        quantize_params.out_scale = arg.get_output_scale_val();
+        quantize_params.out_shift = arg.get_output_shift_val();
+
+        for (size_t i = 1; i < arg.inputs_count(); i++) {
+            quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
+        }
+        const auto& output_layout = arg.get_output_layout();
+        quantize_params.output = convert_data_tensor(output_layout);
+
+        auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto quantize = new quantize_impl(arg, best_kernels[0]);
+
+        return quantize;
+    }
+};
+
+namespace detail {
+
+attach_quantize_impl::attach_quantize_impl() {
+    implementation_map<quantize>::add(impl_types::ocl, quantize_impl::create, {
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::i8, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::u8, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/reduce.cpp
similarity index 50%
rename from inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/reduce.cpp
index 5ad63259500..ef412f5dc34 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/reduce_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reduce.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "reduce_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reduce/reduce_kernel_selector.h"
 #include "reduce/reduce_kernel_ref.h"
@@ -15,7 +15,7 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 namespace {
 kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
     switch (mode) {
@@ -49,12 +49,12 @@ kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
     }
 }
 }  // namespace
-struct reduce_gpu : typed_primitive_gpu_impl<reduce> {
-    using parent = typed_primitive_gpu_impl<reduce>;
+struct reduce_impl : typed_primitive_impl_ocl<reduce> {
+    using parent = typed_primitive_impl_ocl<reduce>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reduce_gpu>(*this);
+        return make_unique<reduce_impl>(*this);
     }
 
 public:
@@ -71,7 +71,7 @@ public:
 
         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
 
-        auto reduce = new reduce_gpu(arg, best_kernels[0]);
+        auto reduce = new reduce_impl(arg, best_kernels[0]);
 
         return reduce;
     }
@@ -79,30 +79,31 @@ public:
 
 namespace detail {
 
-attach_reduce_gpu::attach_reduce_gpu() {
-    auto val_fw = reduce_gpu::create;
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
+attach_reduce_impl::attach_reduce_impl() {
+    implementation_map<reduce>::add(impl_types::ocl, reduce_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/region_yolo.cpp
similarity index 63%
rename from inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/region_yolo.cpp
index e5518f66cb8..ae3e9e8682b 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/region_yolo_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/region_yolo.cpp
@@ -3,22 +3,22 @@
 //
 
 #include "region_yolo_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "region_yolo/region_yolo_kernel_selector.h"
 #include "region_yolo/region_yolo_kernel_ref.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
-    using parent = typed_primitive_gpu_impl<region_yolo>;
+struct region_yolo_impl : typed_primitive_impl_ocl<region_yolo> {
+    using parent = typed_primitive_impl_ocl<region_yolo>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<region_yolo_gpu>(*this);
+        return make_unique<region_yolo_impl>(*this);
     }
 
     static primitive_impl* create(const region_yolo_node& arg) {
@@ -41,7 +41,7 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto region_yolo_node = new region_yolo_gpu(arg, best_kernels[0]);
+        auto region_yolo_node = new region_yolo_impl(arg, best_kernels[0]);
 
         return region_yolo_node;
     }
@@ -49,14 +49,15 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
 
 namespace detail {
 
-attach_region_yolo_gpu::attach_region_yolo_gpu() {
-    implementation_map<region_yolo>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), region_yolo_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), region_yolo_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), region_yolo_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), region_yolo_gpu::create}});
+attach_region_yolo_impl::attach_region_yolo_impl() {
+    implementation_map<region_yolo>::add(impl_types::ocl, region_yolo_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
new file mode 100644
index 00000000000..c62b64de62d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
@@ -0,0 +1,79 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "register.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+#define REGISTER_OCL(prim)                      \
+    static detail::attach_##prim##_impl attach_##prim
+
+void register_implementations() {
+    REGISTER_OCL(activation);
+    REGISTER_OCL(arg_max_min);
+    REGISTER_OCL(average_unpooling);
+    REGISTER_OCL(binary_convolution);
+    REGISTER_OCL(border);
+    REGISTER_OCL(broadcast);
+    REGISTER_OCL(concatenation);
+    REGISTER_OCL(convolution);
+    REGISTER_OCL(crop);
+    REGISTER_OCL(custom_gpu_primitive);
+    REGISTER_OCL(deconvolution);
+    REGISTER_OCL(deformable_conv);
+    REGISTER_OCL(deformable_interp);
+    REGISTER_OCL(depth_to_space);
+    REGISTER_OCL(batch_to_space);
+    REGISTER_OCL(eltwise);
+    REGISTER_OCL(fully_connected);
+    REGISTER_OCL(gather);
+    REGISTER_OCL(gather_nd);
+    REGISTER_OCL(gemm);
+    REGISTER_OCL(lrn);
+    REGISTER_OCL(lstm_gemm);
+    REGISTER_OCL(lstm_elt);
+    REGISTER_OCL(max_unpooling);
+    REGISTER_OCL(mutable_data);
+    REGISTER_OCL(mvn);
+    REGISTER_OCL(normalize);
+    REGISTER_OCL(one_hot);
+    REGISTER_OCL(permute);
+    REGISTER_OCL(pooling);
+    REGISTER_OCL(pyramid_roi_align);
+    REGISTER_OCL(quantize);
+    REGISTER_OCL(reduce);
+    REGISTER_OCL(region_yolo);
+    REGISTER_OCL(reorder);
+    REGISTER_OCL(reorg_yolo);
+    REGISTER_OCL(reshape);
+    REGISTER_OCL(reverse_sequence);
+    REGISTER_OCL(roi_pooling);
+    REGISTER_OCL(scale);
+    REGISTER_OCL(scatter_update);
+    REGISTER_OCL(scatter_nd_update);
+    REGISTER_OCL(scatter_elements_update);
+    REGISTER_OCL(select);
+    REGISTER_OCL(shuffle_channels);
+    REGISTER_OCL(softmax);
+    REGISTER_OCL(space_to_batch);
+    REGISTER_OCL(space_to_depth);
+    REGISTER_OCL(strided_slice);
+    REGISTER_OCL(tile);
+    REGISTER_OCL(fused_conv_eltwise);
+    REGISTER_OCL(lstm_dynamic_input);
+    REGISTER_OCL(lstm_dynamic_timeloop);
+    REGISTER_OCL(generic_layer);
+    REGISTER_OCL(gather_tree);
+    REGISTER_OCL(resample);
+    REGISTER_OCL(grn);
+    REGISTER_OCL(ctc_greedy_decoder);
+    REGISTER_OCL(cum_sum);
+    REGISTER_OCL(embedding_bag);
+    REGISTER_OCL(extract_image_patches);
+}
+
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
similarity index 51%
rename from inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
index d6d2db15d6b..036162ed8d8 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
@@ -13,20 +13,16 @@
 #include "cldnn/primitives/border.hpp"
 #include "cldnn/primitives/broadcast.hpp"
 #include "cldnn/primitives/concatenation.hpp"
-#include "cldnn/primitives/condition.hpp"
 #include "cldnn/primitives/convolution.hpp"
 #include "cldnn/primitives/crop.hpp"
 #include "cldnn/primitives/custom_gpu_primitive.hpp"
-#include "cldnn/primitives/data.hpp"
 #include "cldnn/primitives/deconvolution.hpp"
 #include "cldnn/primitives/depth_to_space.hpp"
-#include "cldnn/primitives/detection_output.hpp"
 #include "cldnn/primitives/eltwise.hpp"
 #include "cldnn/primitives/fully_connected.hpp"
 #include "cldnn/primitives/gather.hpp"
 #include "cldnn/primitives/gather_nd.hpp"
 #include "cldnn/primitives/gemm.hpp"
-#include "cldnn/primitives/input_layout.hpp"
 #include "cldnn/primitives/lrn.hpp"
 #include "cldnn/primitives/lstm.hpp"
 #include "cldnn/primitives/lstm_dynamic.hpp"
@@ -37,8 +33,6 @@
 #include "cldnn/primitives/one_hot.hpp"
 #include "cldnn/primitives/permute.hpp"
 #include "cldnn/primitives/pooling.hpp"
-#include "cldnn/primitives/prior_box.hpp"
-#include "cldnn/primitives/proposal.hpp"
 #include "cldnn/primitives/pyramid_roi_align.hpp"
 #include "cldnn/primitives/quantize.hpp"
 #include "cldnn/primitives/reduce.hpp"
@@ -63,97 +57,88 @@
 #include "cldnn/primitives/fused_conv_eltwise.hpp"
 #include "cldnn/primitives/lstm_dynamic_input.hpp"
 #include "cldnn/primitives/lstm_dynamic_timeloop.hpp"
-#include "cldnn/primitives/non_max_suppression.hpp"
 #include "cldnn/primitives/grn.hpp"
 #include "cldnn/primitives/ctc_greedy_decoder.hpp"
-#include "cldnn/primitives/loop.hpp"
 #include "generic_layer.hpp"
 
 
-namespace cldnn { namespace gpu {
-void register_implementations_gpu();
+namespace cldnn {
+namespace ocl {
+void register_implementations();
 
 namespace detail {
 
-#define REGISTER_GPU(prim)              \
-    struct attach_##prim##_gpu {        \
-        attach_##prim##_gpu();          \
+#define REGISTER_OCL(prim)              \
+    struct attach_##prim##_impl {        \
+        attach_##prim##_impl();          \
     }
 
-REGISTER_GPU(activation);
-REGISTER_GPU(arg_max_min);
-REGISTER_GPU(average_unpooling);
-REGISTER_GPU(batch_to_space);
-REGISTER_GPU(binary_convolution);
-REGISTER_GPU(border);
-REGISTER_GPU(broadcast);
-REGISTER_GPU(concatenation);
-REGISTER_GPU(condition);
-REGISTER_GPU(convolution);
-REGISTER_GPU(crop);
-REGISTER_GPU(custom_gpu_primitive);
-REGISTER_GPU(data);
-REGISTER_GPU(deconvolution);
-REGISTER_GPU(deformable_conv);
-REGISTER_GPU(deformable_interp);
-REGISTER_GPU(depth_to_space);
-REGISTER_GPU(detection_output);
-REGISTER_GPU(eltwise);
-REGISTER_GPU(embed);
-REGISTER_GPU(fully_connected);
-REGISTER_GPU(gather);
-REGISTER_GPU(gather_nd);
-REGISTER_GPU(gemm);
-REGISTER_GPU(input_layout);
-REGISTER_GPU(lookup_table);
-REGISTER_GPU(lrn);
-REGISTER_GPU(lstm_gemm);
-REGISTER_GPU(lstm_elt);
-REGISTER_GPU(max_unpooling);
-REGISTER_GPU(mutable_data);
-REGISTER_GPU(mvn);
-REGISTER_GPU(normalize);
-REGISTER_GPU(one_hot);
-REGISTER_GPU(permute);
-REGISTER_GPU(pooling);
-REGISTER_GPU(prior_box);
-REGISTER_GPU(proposal);
-REGISTER_GPU(pyramid_roi_align);
-REGISTER_GPU(quantize);
-REGISTER_GPU(reduce);
-REGISTER_GPU(region_yolo);
-REGISTER_GPU(reorder);
-REGISTER_GPU(reorg_yolo);
-REGISTER_GPU(reshape);
-REGISTER_GPU(reverse_sequence);
-REGISTER_GPU(roi_pooling);
-REGISTER_GPU(scale);
-REGISTER_GPU(scatter_update);
-REGISTER_GPU(scatter_elements_update);
-REGISTER_GPU(scatter_nd_update);
-REGISTER_GPU(select);
-REGISTER_GPU(shuffle_channels);
-REGISTER_GPU(softmax);
-REGISTER_GPU(space_to_batch);
-REGISTER_GPU(space_to_depth);
-REGISTER_GPU(strided_slice);
-REGISTER_GPU(tile);
-REGISTER_GPU(fused_conv_eltwise);
-REGISTER_GPU(lstm_dynamic_input);
-REGISTER_GPU(lstm_dynamic_timeloop);
-REGISTER_GPU(generic_layer);
-REGISTER_GPU(gather_tree);
-REGISTER_GPU(resample);
-REGISTER_GPU(non_max_suppression);
-REGISTER_GPU(grn);
-REGISTER_GPU(ctc_greedy_decoder);
-REGISTER_GPU(cum_sum);
-REGISTER_GPU(embedding_bag);
-REGISTER_GPU(extract_image_patches);
-REGISTER_GPU(loop);
+REGISTER_OCL(activation);
+REGISTER_OCL(arg_max_min);
+REGISTER_OCL(average_unpooling);
+REGISTER_OCL(batch_to_space);
+REGISTER_OCL(binary_convolution);
+REGISTER_OCL(border);
+REGISTER_OCL(broadcast);
+REGISTER_OCL(concatenation);
+REGISTER_OCL(convolution);
+REGISTER_OCL(crop);
+REGISTER_OCL(custom_gpu_primitive);
+REGISTER_OCL(data);
+REGISTER_OCL(deconvolution);
+REGISTER_OCL(deformable_conv);
+REGISTER_OCL(deformable_interp);
+REGISTER_OCL(depth_to_space);
+REGISTER_OCL(eltwise);
+REGISTER_OCL(embed);
+REGISTER_OCL(fully_connected);
+REGISTER_OCL(gather);
+REGISTER_OCL(gather_nd);
+REGISTER_OCL(gemm);
+REGISTER_OCL(lrn);
+REGISTER_OCL(lstm_gemm);
+REGISTER_OCL(lstm_elt);
+REGISTER_OCL(max_unpooling);
+REGISTER_OCL(mutable_data);
+REGISTER_OCL(mvn);
+REGISTER_OCL(normalize);
+REGISTER_OCL(one_hot);
+REGISTER_OCL(permute);
+REGISTER_OCL(pooling);
+REGISTER_OCL(pyramid_roi_align);
+REGISTER_OCL(quantize);
+REGISTER_OCL(reduce);
+REGISTER_OCL(region_yolo);
+REGISTER_OCL(reorder);
+REGISTER_OCL(reorg_yolo);
+REGISTER_OCL(reshape);
+REGISTER_OCL(reverse_sequence);
+REGISTER_OCL(roi_pooling);
+REGISTER_OCL(scale);
+REGISTER_OCL(scatter_update);
+REGISTER_OCL(scatter_elements_update);
+REGISTER_OCL(scatter_nd_update);
+REGISTER_OCL(select);
+REGISTER_OCL(shuffle_channels);
+REGISTER_OCL(softmax);
+REGISTER_OCL(space_to_batch);
+REGISTER_OCL(space_to_depth);
+REGISTER_OCL(strided_slice);
+REGISTER_OCL(tile);
+REGISTER_OCL(fused_conv_eltwise);
+REGISTER_OCL(lstm_dynamic_input);
+REGISTER_OCL(lstm_dynamic_timeloop);
+REGISTER_OCL(generic_layer);
+REGISTER_OCL(gather_tree);
+REGISTER_OCL(resample);
+REGISTER_OCL(grn);
+REGISTER_OCL(ctc_greedy_decoder);
+REGISTER_OCL(cum_sum);
+REGISTER_OCL(embedding_bag);
+REGISTER_OCL(extract_image_patches);
 
-#undef REGISTER_GPU
+#undef REGISTER_OCL
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/reorder.cpp
similarity index 90%
rename from inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/reorder.cpp
index 93fafb1d14c..90e68d3f000 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/reorder_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reorder.cpp
@@ -3,22 +3,22 @@
 //
 
 #include "reorder_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reorder/reorder_kernel_selector.h"
 #include "reorder/reorder_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct reorder_gpu : typed_primitive_gpu_impl<reorder> {
-    using parent = typed_primitive_gpu_impl<reorder>;
+struct reorder_impl : typed_primitive_impl_ocl<reorder> {
+    using parent = typed_primitive_impl_ocl<reorder>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reorder_gpu>(*this);
+        return make_unique<reorder_impl>(*this);
     }
 
 protected:
@@ -108,7 +108,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto reorder = new reorder_gpu(arg, best_kernels[0]);
+        auto reorder = new reorder_impl(arg, best_kernels[0]);
 
         return reorder;
     }
@@ -116,10 +116,10 @@ public:
 
 namespace detail {
 
-attach_reorder_gpu::attach_reorder_gpu() {
-    implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}});
+attach_reorder_impl::attach_reorder_impl() {
+    implementation_map<reorder>::add(impl_types::ocl, reorder_impl::create, {});
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/reorg_yolo.cpp
similarity index 53%
rename from inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/reorg_yolo.cpp
index 5cc78810ad4..223ed3fd513 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/reorg_yolo_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reorg_yolo.cpp
@@ -3,22 +3,22 @@
 //
 
 #include "reorg_yolo_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reorg_yolo/reorg_yolo_kernel_selector.h"
 #include "reorg_yolo/reorg_yolo_kernel_ref.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
-    using parent = typed_primitive_gpu_impl<reorg_yolo>;
+struct reorg_yolo_impl : typed_primitive_impl_ocl<reorg_yolo> {
+    using parent = typed_primitive_impl_ocl<reorg_yolo>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reorg_yolo_gpu>(*this);
+        return make_unique<reorg_yolo_impl>(*this);
     }
 
     static primitive_impl* create(const reorg_yolo_node& arg) {
@@ -38,7 +38,7 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto reorg_yolo_node = new reorg_yolo_gpu(arg, best_kernels[0]);
+        auto reorg_yolo_node = new reorg_yolo_impl(arg, best_kernels[0]);
 
         return reorg_yolo_node;
     }
@@ -46,16 +46,17 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
 
 namespace detail {
 
-attach_reorg_yolo_gpu::attach_reorg_yolo_gpu() {
-    auto val_fw = reorg_yolo_gpu::create;
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+attach_reorg_yolo_impl::attach_reorg_yolo_impl() {
+    implementation_map<reorg_yolo>::add(impl_types::ocl, reorg_yolo_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/resample.cpp
similarity index 69%
rename from inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/resample.cpp
index 1ec432673ee..d84f4d5a853 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/resample_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/resample.cpp
@@ -3,15 +3,15 @@
 //
 
 #include "resample_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_selector/core/actual_kernels/resample/resample_kernel_selector.h"
 #include "kernel_selector/core/actual_kernels/resample/resample_kernel_base.h"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
 namespace {
 inline kernel_selector::sample_type convert_to_sample_type(resample_type type) {
@@ -96,12 +96,12 @@ inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis ax
 }
 }  // namespace
 
-struct resample_gpu : typed_primitive_gpu_impl<resample> {
-    using parent = typed_primitive_gpu_impl<resample>;
+struct resample_impl : typed_primitive_impl_ocl<resample> {
+    using parent = typed_primitive_impl_ocl<resample>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<resample_gpu>(*this);
+        return make_unique<resample_impl>(*this);
     }
 
     static primitive_impl* create(const resample_node& arg) {
@@ -135,7 +135,7 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto resample = new resample_gpu(arg, best_kernels[0]);
+        auto resample = new resample_impl(arg, best_kernels[0]);
 
         return resample;
     }
@@ -143,31 +143,32 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
 
 namespace detail {
 
-attach_resample_gpu::attach_resample_gpu() {
-    implementation_map<resample>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), resample_gpu::create}});
+attach_resample_impl::attach_resample_impl() {
+    implementation_map<resample>::add(impl_types::ocl, resample_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/reshape.cpp
similarity index 70%
rename from inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/reshape.cpp
index 5a78f82cca1..66c5e0ecd1a 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/reshape_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reshape.cpp
@@ -3,28 +3,28 @@
 //
 
 #include "reshape_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reshape/reshape_kernel_ref.h"
 #include "reshape/reshape_kernel_selector.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct reshape_gpu : public typed_primitive_gpu_impl<reshape> {
-    using parent = typed_primitive_gpu_impl<reshape>;
+struct reshape_impl : public typed_primitive_impl_ocl<reshape> {
+    using parent = typed_primitive_impl_ocl<reshape>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reshape_gpu>(*this);
+        return make_unique<reshape_impl>(*this);
     }
 
 public:
     static primitive_impl* create(reshape_node const& arg) {
         if (arg.can_be_optimized()) {
-            return new reshape_gpu(arg, {});
+            return new reshape_impl(arg, {});
         }
 
         auto reorder_params = get_default_params<kernel_selector::reshape_params>(arg);
@@ -39,7 +39,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto reshape = new reshape_gpu(arg, best_kernels[0]);
+        auto reshape = new reshape_impl(arg, best_kernels[0]);
 
         return reshape;
     }
@@ -47,10 +47,10 @@ public:
 
 namespace detail {
 
-attach_reshape_gpu::attach_reshape_gpu() {
-    implementation_map<reshape>::add({{engine_types::ocl, reshape_gpu::create}});
+attach_reshape_impl::attach_reshape_impl() {
+    implementation_map<reshape>::add(impl_types::ocl, reshape_impl::create, {});
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/reverse_sequence.cpp
similarity index 59%
rename from inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/reverse_sequence.cpp
index 01d6aaf8933..a957185e9b7 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reverse_sequence.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "reverse_sequence_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reverse_sequence/reverse_sequence_kernel_selector.h"
 #include "reverse_sequence/reverse_sequence_kernel_ref.h"
@@ -13,13 +13,13 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
-struct reverse_sequence_gpu : typed_primitive_gpu_impl<reverse_sequence> {
-    using parent = typed_primitive_gpu_impl<reverse_sequence>;
+namespace ocl {
+struct reverse_sequence_impl : typed_primitive_impl_ocl<reverse_sequence> {
+    using parent = typed_primitive_impl_ocl<reverse_sequence>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reverse_sequence_gpu>(*this);
+        return make_unique<reverse_sequence_impl>(*this);
     }
 
 public:
@@ -41,7 +41,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto reverse_sequence = new reverse_sequence_gpu(arg, best_kernels[0]);
+        auto reverse_sequence = new reverse_sequence_impl(arg, best_kernels[0]);
 
         return reverse_sequence;
     }
@@ -49,15 +49,16 @@ public:
 
 namespace detail {
 
-attach_reverse_sequence_gpu::attach_reverse_sequence_gpu() {
-    auto val_fw = reverse_sequence_gpu::create;
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+attach_reverse_sequence_impl::attach_reverse_sequence_impl() {
+    implementation_map<reverse_sequence>::add(impl_types::ocl, reverse_sequence_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/roi_pooling.cpp
similarity index 85%
rename from inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/roi_pooling.cpp
index fef441b15b8..41acf052424 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/roi_pooling.cpp
@@ -3,15 +3,15 @@
 //
 
 #include "roi_pooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "roi_pooling/roi_pooling_kernel_selector.h"
 #include "roi_pooling/roi_pooling_kernel_ref.h"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
 namespace {
 kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
@@ -33,12 +33,12 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
 }
 }  // namespace
 
-struct roi_pooling_gpu : typed_primitive_gpu_impl<roi_pooling> {
-    using parent = typed_primitive_gpu_impl<roi_pooling>;
+struct roi_pooling_impl : typed_primitive_impl_ocl<roi_pooling> {
+    using parent = typed_primitive_impl_ocl<roi_pooling>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<roi_pooling_gpu>(*this);
+        return make_unique<roi_pooling_impl>(*this);
     }
 
 protected:
@@ -108,7 +108,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto roi_pool = new roi_pooling_gpu(arg, best_kernels[0]);
+        auto roi_pool = new roi_pooling_impl(arg, best_kernels[0]);
 
         return roi_pool;
     }
@@ -116,13 +116,13 @@ public:
 
 namespace detail {
 
-attach_roi_pooling_gpu::attach_roi_pooling_gpu() {
-    implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                         roi_pooling_gpu::create);
-    implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                         roi_pooling_gpu::create);
+attach_roi_pooling_impl::attach_roi_pooling_impl() {
+    implementation_map<roi_pooling>::add(impl_types::ocl, roi_pooling_impl::create, {
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
new file mode 100644
index 00000000000..3e1e90d6c8c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
@@ -0,0 +1,136 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "scale_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "eltwise/eltwise_kernel_selector.h"
+#include "eltwise/eltwise_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct scale_impl : typed_primitive_impl_ocl<scale> {
+    using parent = typed_primitive_impl_ocl<scale>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<scale_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
+        args.output = instance.output_memory_ptr();
+
+        if (_outer.bias_term()) {
+            args.inputs.push_back(instance.bias_memory());
+        }
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const scale_node& arg) {
+        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
+        auto ew_optional_params =
+            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
+
+        ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
+
+        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
+                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
+                                        kernel_selector::eltwise_mode::MUL});
+
+        if (arg.bias_term()) {
+            ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
+            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
+                                             kernel_selector::eltwise_params::InputType::Buffer(2)},
+                                            kernel_selector::eltwise_mode::ADD});
+        }
+
+        ew_params.layoutBased = true;
+
+        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto scale = new scale_impl(arg, best_kernels[0]);
+
+        return scale;
+    }
+};
+
+namespace detail {
+
+attach_scale_impl::attach_scale_impl() {
+    implementation_map<scale>::add(impl_types::ocl, scale_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::i32, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_elements_update.cpp
similarity index 62%
rename from inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_elements_update.cpp
index 85e482e02e0..3b41755f2a4 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_elements_update.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "scatter_elements_update_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "scatter_update/scatter_elements_update_kernel_selector.h"
 #include "scatter_update/scatter_elements_update_kernel_ref.h"
@@ -13,7 +13,7 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatter_elements_update_axis axis, const scatter_elements_update_node& arg) {
     switch (axis) {
         case scatter_elements_update::along_x:
@@ -34,12 +34,12 @@ kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatt
     return kernel_selector::scatter_update_axis::X;
 }
 
-struct scatter_elements_update_gpu : typed_primitive_gpu_impl<scatter_elements_update> {
-    using parent = typed_primitive_gpu_impl<scatter_elements_update>;
+struct scatter_elements_update_impl : typed_primitive_impl_ocl<scatter_elements_update> {
+    using parent = typed_primitive_impl_ocl<scatter_elements_update>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<scatter_elements_update_gpu>(*this);
+        return make_unique<scatter_elements_update_impl>(*this);
     }
 
 public:
@@ -61,7 +61,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto scatter_elements_update = new scatter_elements_update_gpu(arg, best_kernels[0]);
+        auto scatter_elements_update = new scatter_elements_update_impl(arg, best_kernels[0]);
 
         return scatter_elements_update;
     }
@@ -69,21 +69,20 @@ public:
 
 namespace detail {
 
-attach_scatter_elements_update_gpu::attach_scatter_elements_update_gpu() {
-    auto val_fw = scatter_elements_update_gpu::create;
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+attach_scatter_elements_update_impl::attach_scatter_elements_update_impl() {
+    implementation_map<scatter_elements_update>::add(impl_types::ocl, scatter_elements_update_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_nd_update.cpp
similarity index 50%
rename from inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_nd_update.cpp
index 445361bf5b8..83f14957841 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/scatter_nd_update_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_nd_update.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "scatter_nd_update_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "scatter_update/scatter_nd_update_kernel_selector.h"
 #include "scatter_update/scatter_nd_update_kernel_ref.h"
@@ -13,14 +13,14 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct scatter_nd_update_gpu : typed_primitive_gpu_impl<scatter_nd_update> {
-    using parent = typed_primitive_gpu_impl<scatter_nd_update>;
+struct scatter_nd_update_impl : typed_primitive_impl_ocl<scatter_nd_update> {
+    using parent = typed_primitive_impl_ocl<scatter_nd_update>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<scatter_nd_update_gpu>(*this);
+        return make_unique<scatter_nd_update_impl>(*this);
     }
 
 public:
@@ -42,7 +42,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto scatter_nd_update = new scatter_nd_update_gpu(arg, best_kernels[0]);
+        auto scatter_nd_update = new scatter_nd_update_impl(arg, best_kernels[0]);
 
         return scatter_nd_update;
     }
@@ -50,21 +50,20 @@ public:
 
 namespace detail {
 
-attach_scatter_nd_update_gpu::attach_scatter_nd_update_gpu() {
-    auto val_fw = scatter_nd_update_gpu::create;
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<scatter_nd_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+attach_scatter_nd_update_impl::attach_scatter_nd_update_impl() {
+    implementation_map<scatter_nd_update>::add(impl_types::ocl, scatter_nd_update_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_update.cpp
similarity index 61%
rename from inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_update.cpp
index 10629a08090..6821a0f24a1 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/scatter_update_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_update.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "scatter_update_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "scatter_update/scatter_update_kernel_selector.h"
 #include "scatter_update/scatter_update_kernel_ref.h"
@@ -13,7 +13,7 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 kernel_selector::scatter_update_axis convert_axis(scatter_update::scatter_update_axis axis, const scatter_update_node& arg) {
     switch (axis) {
         case scatter_update::along_x:
@@ -34,12 +34,12 @@ kernel_selector::scatter_update_axis convert_axis(scatter_update::scatter_update
     return kernel_selector::scatter_update_axis::X;
 }
 
-struct scatter_update_gpu : typed_primitive_gpu_impl<scatter_update> {
-    using parent = typed_primitive_gpu_impl<scatter_update>;
+struct scatter_update_impl : typed_primitive_impl_ocl<scatter_update> {
+    using parent = typed_primitive_impl_ocl<scatter_update>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<scatter_update_gpu>(*this);
+        return make_unique<scatter_update_impl>(*this);
     }
 
 public:
@@ -61,7 +61,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto scatter_update = new scatter_update_gpu(arg, best_kernels[0]);
+        auto scatter_update = new scatter_update_impl(arg, best_kernels[0]);
 
         return scatter_update;
     }
@@ -69,21 +69,20 @@ public:
 
 namespace detail {
 
-attach_scatter_update_gpu::attach_scatter_update_gpu() {
-    auto val_fw = scatter_update_gpu::create;
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<scatter_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+attach_scatter_update_impl::attach_scatter_update_impl() {
+    implementation_map<scatter_update>::add(impl_types::ocl, scatter_update_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/select.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/select.cpp
new file mode 100644
index 00000000000..101bcec39bd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/select.cpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "select_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "select/select_kernel_selector.h"
+#include "select/select_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct select_impl : typed_primitive_impl_ocl<select> {
+    using parent = typed_primitive_impl_ocl<select>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<select_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const select_node& arg) {
+        auto select_params = get_default_params<kernel_selector::select_params>(arg);
+        auto select_optional_params =
+            get_default_optional_params<kernel_selector::select_optional_params>(arg.get_program());
+
+        for (size_t i = 1; i < arg.inputs_count(); i++) {
+            select_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
+        }
+
+        auto& kernel_selector = kernel_selector::select_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(select_params, select_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto select = new select_impl(arg, best_kernels[0]);
+
+        return select;
+    }
+};
+
+namespace detail {
+
+attach_select_impl::attach_select_impl() {
+    implementation_map<select>::add(impl_types::ocl, select_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/shuffle_channels.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/shuffle_channels.cpp
new file mode 100644
index 00000000000..e8e76c7483b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/shuffle_channels.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shuffle_channels_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "shuffle_channels/shuffle_channels_kernel_selector.h"
+#include "shuffle_channels/shuffle_channels_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct shuffle_channels_impl : typed_primitive_impl_ocl<shuffle_channels> {
+    using parent = typed_primitive_impl_ocl<shuffle_channels>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<shuffle_channels_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const shuffle_channels_node& arg) {
+        auto shuffle_channels_params = get_default_params<kernel_selector::shuffle_channels_params>(arg);
+        auto shuffle_channels_optional_params =
+            get_default_optional_params<kernel_selector::shuffle_channels_optional_params>(arg.get_program());
+
+        const int32_t number_of_dims = 4;
+        int32_t axis = arg.get_primitive()->axis;
+
+        if (axis < 0)
+            axis += number_of_dims;
+
+        shuffle_channels_params.group = arg.get_primitive()->group;
+        shuffle_channels_params.axis = axis;
+
+        auto& kernel_selector = kernel_selector::shuffle_channels_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(shuffle_channels_params, shuffle_channels_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto shuffle_channels = new shuffle_channels_impl(arg, best_kernels[0]);
+
+        return shuffle_channels;
+    }
+};
+
+namespace detail {
+
+attach_shuffle_channels_impl::attach_shuffle_channels_impl() {
+    implementation_map<shuffle_channels>::add(impl_types::ocl, shuffle_channels_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/softmax.cpp
similarity index 65%
rename from inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/softmax.cpp
index 868ca8913dd..3db5e891633 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/softmax_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/softmax.cpp
@@ -3,22 +3,22 @@
 //
 
 #include "softmax_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "softmax/softmax_kernel_selector.h"
 #include "softmax/softmax_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct softmax_gpu : typed_primitive_gpu_impl<softmax> {
-    using parent = typed_primitive_gpu_impl<softmax>;
+struct softmax_impl : typed_primitive_impl_ocl<softmax> {
+    using parent = typed_primitive_impl_ocl<softmax>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<softmax_gpu>(*this);
+        return make_unique<softmax_impl>(*this);
     }
 
     static primitive_impl* create(const softmax_node& arg) {
@@ -74,7 +74,7 @@ struct softmax_gpu : typed_primitive_gpu_impl<softmax> {
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto softmax_node = new softmax_gpu(arg, best_kernels[0]);
+        auto softmax_node = new softmax_impl(arg, best_kernels[0]);
 
         return softmax_node;
     }
@@ -82,18 +82,19 @@ struct softmax_gpu : typed_primitive_gpu_impl<softmax> {
 
 namespace detail {
 
-attach_softmax_gpu::attach_softmax_gpu() {
-    auto val_fw = softmax_gpu::create;
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<softmax>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
+attach_softmax_impl::attach_softmax_impl() {
+    implementation_map<softmax>::add(impl_types::ocl, softmax_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_batch.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_batch.cpp
new file mode 100644
index 00000000000..1d78ffda114
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_batch.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "space_to_batch_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "space_to_batch/space_to_batch_kernel_selector.h"
+#include "space_to_batch/space_to_batch_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include "data_inst.h"
+#include <vector>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+struct space_to_batch_impl : typed_primitive_impl_ocl<space_to_batch> {
+    using parent = typed_primitive_impl_ocl<space_to_batch>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<space_to_batch_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const space_to_batch_node& arg) {
+        auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
+        auto space_to_batch_optional_params =
+            get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
+
+        auto primitive = arg.get_primitive();
+
+        space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
+        space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
+        space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
+
+        auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto space_to_batch = new space_to_batch_impl(arg, best_kernels[0]);
+
+        return space_to_batch;
+    }
+};
+
+namespace detail {
+
+attach_space_to_batch_impl::attach_space_to_batch_impl() {
+    implementation_map<space_to_batch>::add(impl_types::ocl, space_to_batch_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_depth.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_depth.cpp
new file mode 100644
index 00000000000..c0a0c692f3c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/space_to_depth.cpp
@@ -0,0 +1,76 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "space_to_depth_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "space_to_depth/space_to_depth_kernel_selector.h"
+#include "space_to_depth/space_to_depth_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+struct space_to_depth_impl : typed_primitive_impl_ocl<space_to_depth> {
+    using parent = typed_primitive_impl_ocl<space_to_depth>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<space_to_depth_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const space_to_depth_node& arg) {
+        auto space_to_depth_params = get_default_params<kernel_selector::space_to_depth_params>(arg);
+        auto space_to_depth_optional_params =
+                get_default_optional_params<kernel_selector::space_to_depth_optional_params>(arg.get_program());
+
+        space_to_depth_params.depth_mode = (arg.get_primitive()->mode == space_to_depth::blocks_first) ?
+                                           kernel_selector::SpaceToDepthMode::BLOCKS_FIRST :
+                                           kernel_selector::SpaceToDepthMode::DEPTH_FIRST;
+
+        space_to_depth_params.block_size = arg.get_primitive()->block_size;
+
+        auto& kernel_selector = kernel_selector::space_to_depth_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(space_to_depth_params, space_to_depth_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto space_to_depth = new space_to_depth_impl(arg, best_kernels[0]);
+
+        return space_to_depth;
+    }
+};
+
+namespace detail {
+
+attach_space_to_depth_impl::attach_space_to_depth_impl() {
+    implementation_map<space_to_depth>::add(impl_types::ocl, space_to_depth_impl::create, {
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/strided_slice.cpp
similarity index 78%
rename from inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
rename to inference-engine/thirdparty/clDNN/src/impls/ocl/strided_slice.cpp
index d1ddf2d4d69..bc788c5a5e4 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/strided_slice.cpp
@@ -3,8 +3,8 @@
 //
 
 #include "strided_slice_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "strided_slice/strided_slice_kernel_ref.h"
 #include "strided_slice/strided_slice_kernel_selector.h"
@@ -15,14 +15,14 @@
 using namespace cldnn;
 
 namespace cldnn {
-namespace gpu {
+namespace ocl {
 
-struct strided_slice_gpu : typed_primitive_gpu_impl<strided_slice> {
-    using parent = typed_primitive_gpu_impl<strided_slice>;
+struct strided_slice_impl : typed_primitive_impl_ocl<strided_slice> {
+    using parent = typed_primitive_impl_ocl<strided_slice>;
     using parent::parent;
 
     std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<strided_slice_gpu>(*this);
+        return make_unique<strided_slice_impl>(*this);
     }
 
 public:
@@ -104,7 +104,7 @@ public:
                          best_kernels.empty(),
                          "Cannot find a proper kernel with this arguments");
 
-        auto strided_slice = new strided_slice_gpu(arg, best_kernels[0]);
+        auto strided_slice = new strided_slice_impl(arg, best_kernels[0]);
 
         return strided_slice;
     }
@@ -112,19 +112,19 @@ public:
 
 namespace detail {
 
-attach_strided_slice_gpu::attach_strided_slice_gpu() {
-    auto val_fw = strided_slice_gpu::create;
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
+attach_strided_slice_impl::attach_strided_slice_impl() {
+    implementation_map<strided_slice>::add(impl_types::ocl, strided_slice_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+    });
 }
 
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/tile.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/tile.cpp
new file mode 100644
index 00000000000..74d6eb78136
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/tile.cpp
@@ -0,0 +1,70 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "tile_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "tile/tile_kernel_selector.h"
+#include "tile/tile_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct tile_impl : typed_primitive_impl_ocl<tile> {
+    using parent = typed_primitive_impl_ocl<tile>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<tile_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const tile_node& arg) {
+        auto tile_params = get_default_params<kernel_selector::tile_params>(arg);
+        auto tile_optional_params =
+            get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
+
+        auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto tile = new tile_impl(arg, best_kernels[0]);
+
+        return tile;
+    }
+};
+
+namespace detail {
+
+attach_tile_impl::attach_tile_impl() {
+    implementation_map<tile>::add(impl_types::ocl, tile_impl::create, {
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/implementation_map.h b/inference-engine/thirdparty/clDNN/src/include/implementation_map.h
deleted file mode 100644
index 1a6bb516006..00000000000
--- a/inference-engine/thirdparty/clDNN/src/include/implementation_map.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <map>
-#include <functional>
-#include <typeinfo>
-#include <tuple>
-#include <string>
-
-template <typename T, typename U>
-class singleton_map : public std::map<T, U> {
-    singleton_map() : std::map<T, U>() {}
-    singleton_map(singleton_map const&) = delete;
-    void operator=(singleton_map const&) = delete;
-
-public:
-    static singleton_map& instance() {
-        static singleton_map instance_;
-        return instance_;
-    }
-};
-
-namespace cldnn {
-
-struct permute;
-struct reorder;
-struct custom_gpu_primitive;
-struct generic_layer;
-struct reshape;
-struct data;
-struct mutable_data;
-struct input_layout;
-struct prior_box;
-struct loop;
-
-struct primitive_impl;
-
-template <class PType>
-struct typed_program_node;
-
-template <typename primitive_kind>
-struct implementation_key {
-    typedef std::tuple<engine_types, data_types, format::type> type;
-    type operator()(engine_types engine_type, const typed_program_node<primitive_kind>& primitive) {
-        return std::make_tuple(engine_type,
-                               primitive.get_dependency(0).get_output_layout().data_type,
-                               primitive.get_dependency(0).get_output_layout().format);
-    }
-    type operator()(engine_types engine_type, const layout& proposed_layout) {
-        return std::make_tuple(engine_type, proposed_layout.data_type, proposed_layout.format);
-    }
-};
-
-template <>
-struct implementation_key<permute> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<permute>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<reorder> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<reorder>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<generic_layer> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<generic_layer>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<custom_gpu_primitive> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<custom_gpu_primitive>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<reshape> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<reshape>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<data> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<data>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<mutable_data> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<mutable_data>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<input_layout> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<input_layout>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<prior_box> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<prior_box>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <>
-struct implementation_key<loop> {
-    typedef cldnn::engine_types type;
-    type operator()(engine_types engine_type, const typed_program_node<loop>&) { return engine_type; }
-    type operator()(engine_types engine_type, const layout&) { return engine_type; }
-};
-
-template <typename primitive_kind>
-class implementation_map {
-public:
-    using key_builder = implementation_key<primitive_kind>;
-    using key_type = typename key_builder::type;
-    using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
-    using map_type = singleton_map<key_type, factory_type>;
-
-    // TODO: Replace enigne_type here with impl_type
-    // And add a check that engine do support specific impl_type
-    static factory_type get(engine_types engine_type, const typed_program_node<primitive_kind>& primitive) {
-        // lookup in database; throw if not found
-        auto key = key_builder()(engine_type, primitive);
-        auto it = map_type::instance().find(key);
-        if (it == std::end(map_type::instance()))
-            throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
-                                     " could not find any implementation to match key");
-        // create implementation & attach it to result
-        return it->second;
-    }
-
-    // check if for a given engine and type there exist an implementation
-    static bool check(engine_types engine_type, const typed_program_node<primitive_kind>& primitive) {
-        auto key = key_builder()(engine_type, primitive);
-        auto it = map_type::instance().find(key);
-        if (it == std::end(map_type::instance()))
-            return false;
-        else
-            return true;
-    }
-
-    // check if there exists a kernel implementation of a primitive with output set it primitive's output layout
-    static bool check_io_eq(engine_types engine_type, const typed_program_node<primitive_kind>& primitive) {
-        auto key = key_builder()(engine_type, primitive.get_output_layout());
-        auto it = map_type::instance().find(key);
-        if (it == std::end(map_type::instance()))
-            return false;
-        else
-            return true;
-    }
-
-    static void add(typename map_type::key_type key, factory_type factory) {
-        map_type::instance().insert({key, factory});
-    }
-
-    static void add(std::initializer_list<typename map_type::value_type> il) { map_type::instance().insert(il); }
-};
-}  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h b/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h
index ab15eed29be..39ef6fdb1a9 100644
--- a/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h
+++ b/inference-engine/thirdparty/clDNN/src/include/layout_optimizer.h
@@ -109,7 +109,7 @@ private:
     // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
     bool _output_size_handling_enabled;
 
-    std::map<primitive_id, format::type> _format_forcing;
+    std::map<primitive_id, std::pair<format::type, impl_types>> _forcing_map;
     static const std::vector<std::pair<format::type, bool>> optimized_formats;  // pair of format type and allowed weak restriction
     size_t _total_conv;
     std::map<std::pair<format::type, bool>, size_t> _optimized_conv_count;
@@ -168,6 +168,7 @@ public:
     explicit layout_optimizer(bool output_size_handling_enabled = true);
 
     format get_preferred_format(program_node& node);
+    impl_types get_preferred_impl_type(program_node& node);
 
     bool is_format_supported(program_node& node, format::type fmt);
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/network_impl.h b/inference-engine/thirdparty/clDNN/src/include/network_impl.h
index 7da36aaad54..ebef3ca5190 100644
--- a/inference-engine/thirdparty/clDNN/src/include/network_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/network_impl.h
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
 #include "cldnn/graph/network.hpp"
@@ -10,7 +9,8 @@
 #include "cldnn/runtime/event.hpp"
 #include "cldnn/runtime/stream.hpp"
 #include "program_impl.h"
-#include "implementation_map.h"
+#include "topology_impl.h"
+#include "impls/implementation_map.hpp"
 
 #include <map>
 #include <vector>
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
index 80987966343..52d1fd4694c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
@@ -85,6 +85,7 @@ public:
     primitive_id org_id() const { return _node.get_org_primitive_id(); }
     bool can_be_optimized() const { return _node.can_be_optimized(); }
     std::shared_ptr<const primitive> desc() const { return _node.get_primitive(); }
+    program_node const& get_node() const { return _node; }
     network_impl& get_network() const { return _network; }
     uint32_t get_network_id() const;
     void set_output_memory(memory::ptr mem);
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
index 8ccd776183c..53425561413 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
@@ -4,10 +4,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "cldnn/runtime/memory.hpp"
-#include "cldnn/primitives/primitive.hpp"
-#include "cldnn/graph/program.hpp"
-#include "topology_impl.h"
+
+#include "cldnn/runtime/layout.hpp"
 
 #include <memory>
 #include <string>
@@ -19,6 +17,7 @@ struct program_node;
 struct primitive_impl;
 class primitive_inst;
 struct program_impl;
+struct primitive;
 
 struct primitive_type {
     virtual ~primitive_type() = default;
@@ -27,11 +26,9 @@ struct primitive_type {
                                                       const std::shared_ptr<primitive> prim) const = 0;
     virtual std::shared_ptr<primitive_inst> create_instance(network_impl& network,
                                                             const program_node& node) const = 0;
-    virtual std::unique_ptr<primitive_impl> choose_impl(const engine& engine,
-                                                        const program_node& node) const = 0;
-    virtual bool does_an_implementation_exist(const engine& engine, const program_node& node) const = 0;
-    virtual bool does_possible_implementation_exist(const engine& engine,
-                                                    const program_node& node) const = 0;
+    virtual std::unique_ptr<primitive_impl> choose_impl(const program_node& node) const = 0;
+    virtual bool does_an_implementation_exist(const program_node& node) const = 0;
+    virtual bool does_possible_implementation_exist(const program_node& node) const = 0;
     virtual layout calc_output_layout(const program_node& node) const = 0;
     virtual std::string to_string(const program_node& node) const = 0;
 };
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
index 3dc14aa77d2..5e34a7bba5f 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
@@ -12,7 +12,7 @@
 #include "program_node.h"
 #include "primitive_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 
 #include <memory>
 #include <string>
@@ -37,25 +37,25 @@ struct primitive_type_base : primitive_type {
     }
 
     // TODO: Should we get rid of engine type in impl map? Or we must pass internal build engine to get real ocl type?
-    std::unique_ptr<primitive_impl> choose_impl(const engine& /* engine */, const cldnn::program_node& node) const override {
+    std::unique_ptr<primitive_impl> choose_impl(const cldnn::program_node& node) const override {
         if (node.type() != this)
             throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch");
 
-        auto factory = implementation_map<PType>::get(engine_types::ocl, node);
+        auto factory = implementation_map<PType>::get(node);
         return std::move(std::unique_ptr<primitive_impl>(factory(node)));
     }
 
-    bool does_an_implementation_exist(const engine& /* engine */, const cldnn::program_node& node) const override {
+    bool does_an_implementation_exist(const cldnn::program_node& node) const override {
         if (node.type() != this)
-            throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch");
+            throw std::invalid_argument("primitive_type_base::does_an_implementation_exist: primitive type mismatch");
 
-        return implementation_map<PType>::check(engine_types::ocl, node);
+        return implementation_map<PType>::check(node);
     }
 
-    bool does_possible_implementation_exist(const engine& /* engine */, const cldnn::program_node& node) const override {
+    bool does_possible_implementation_exist(const cldnn::program_node& node) const override {
         if (node.type() != this)
-            throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch");
-        return implementation_map<PType>::check_io_eq(engine_types::ocl, node);
+            throw std::invalid_argument("primitive_type_base::does_possible_implementation_exist: primitive type mismatch");
+        return implementation_map<PType>::check_io_eq(node);
     }
 
     cldnn::layout calc_output_layout(const cldnn::program_node& node) const override {
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_node.h b/inference-engine/thirdparty/clDNN/src/include/program_node.h
index bbb1748c8ab..d0be0db6039 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_node.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_node.h
@@ -6,6 +6,7 @@
 
 #include "cldnn/primitives/primitive.hpp"
 #include "cldnn/primitives/activation.hpp"
+#include "cldnn/primitives/implementation_desc.hpp"
 
 #include "kernel_selector_helper.h"
 #include "meta_utils.h"
@@ -94,6 +95,9 @@ public:
     primitive_impl* get_selected_impl() const { return selected_impl.get(); }
     void set_selected_impl(std::unique_ptr<primitive_impl> impl);
 
+    void set_preferred_impl_type(impl_types impl) { impl_type = impl; }
+    impl_types get_preferred_impl_type() const { return impl_type; }
+
     std::vector<program_node*> const& get_dependencies() const { return dependencies; }
     program_node& get_dependency(size_t idx) const { return *dependencies.at(idx); }
 
@@ -325,6 +329,7 @@ protected:
     // list of primitives that can reuse same memory buffers due to execution order conflicts
     std::set<primitive_id> memory_dependencies;
 
+    impl_types impl_type = impl_types::any;
     bool constant = false;
     bool data_flow = false;
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp b/inference-engine/thirdparty/clDNN/src/kernel_runner.cpp
similarity index 100%
rename from inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp
rename to inference-engine/thirdparty/clDNN/src/kernel_runner.cpp
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h b/inference-engine/thirdparty/clDNN/src/kernel_runner.h
similarity index 100%
rename from inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h
rename to inference-engine/thirdparty/clDNN/src/kernel_runner.h
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
index 9c26b09cf27..d2a00a2f4cf 100644
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -106,16 +106,15 @@ bool layout_optimizer::is_format_supported(program_node& node, format::type fmt)
     if (node.is_type<input_layout>())
         return node.get_output_layout().format == fmt;
 
-    if (!_format_forcing.empty() && _format_forcing.count(node.id()))
-        return _format_forcing.at(node.id()) == fmt;
+    if (!_forcing_map.empty() && _forcing_map.count(node.id()))
+        return _forcing_map.at(node.id()).first == fmt;
 
-    auto& engine = node.get_program().get_engine();
     auto prev_layout = node.get_output_layout();
     auto new_layout = prev_layout;
     new_layout.format = fmt;
     node.set_output_layout(new_layout, false);
 
-    auto supported = node.type()->does_possible_implementation_exist(engine, node);
+    auto supported = node.type()->does_possible_implementation_exist(node);
 
     node.set_output_layout(prev_layout, false);
 
@@ -824,12 +823,21 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
     return layout(expected_data_type, expected_format, expected_tensor);
 }
 
+impl_types layout_optimizer::get_preferred_impl_type(program_node& node) {
+    impl_types preferred_impl = impl_types::any;
+    if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
+        preferred_impl = _forcing_map.at(node.id()).second;
+    }
+
+    return preferred_impl;
+}
+
 format layout_optimizer::get_preferred_format(program_node& node) {
     format expected = format::any;
     auto output_layout = node.get_output_layout();
 
-    if (!_format_forcing.empty() && _format_forcing.count(node.id()) != 0) {
-        expected = _format_forcing.at(node.id());
+    if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
+        expected = _forcing_map.at(node.id()).first;
     } else if (node.is_type<convolution>()) {
         auto& conv_node = node.as<convolution>();
         auto weights_layout = conv_node.weights(0).get_output_layout();
@@ -977,7 +985,7 @@ bool layout_optimizer::is_format_optimized(const deconvolution_node& node, const
 
 void layout_optimizer::set_implementation_forcing(const implementation_forcing_map& map) {
     for (const auto& kv : map) {
-        _format_forcing.emplace(kv.first, kv.second.output_format);
+        _forcing_map.emplace(kv.first, std::make_pair(kv.second.output_format, kv.second.impl_type));
     }
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp
index 62bbf38d317..377e3a2a065 100644
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -55,8 +55,10 @@
 #include "strided_slice_inst.h"
 #include "loop_inst.h"
 #include "to_string_utils.h"
-#include "gpu/register_gpu.hpp"
 #include "runtime/cldnn_itt.hpp"
+#include "impls/ocl/register.hpp"
+#include "impls/cpu/register.hpp"
+#include "impls/common/register.hpp"
 
 #include "cldnn/runtime/memory.hpp"
 #include "cldnn/runtime/engine.hpp"
@@ -126,7 +128,9 @@ program_impl::~program_impl() {
 void program_impl::init_primitives() {
     static bool is_initialized = false;
     if (!is_initialized) {
-        gpu::register_implementations_gpu();
+        common::register_implementations();
+        cpu::register_implementations();
+        ocl::register_implementations();
         is_initialized = true;
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/program_node.cpp b/inference-engine/thirdparty/clDNN/src/program_node.cpp
index 4e24eacf25a..a2b964ff591 100644
--- a/inference-engine/thirdparty/clDNN/src/program_node.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_node.cpp
@@ -72,6 +72,9 @@ std::unique_ptr<json_composite> program_node::desc_to_json() const {
     node_info->add("id", id());
     node_info->add("type", desc->type_string());
     node_info->add("valid output layout", bool_to_str(valid_output_layout));
+    std::stringstream s;
+    s << get_preferred_impl_type();
+    node_info->add("preferred impl", s.str());
 
     json_composite output_layout_info;
     output_layout_info.add("data type", dt_to_str(output_layout.data_type));