[GPU] Impls refactoring (#6603)

2021-07-15 12:05:34 +03:00 · 2021-07-15 12:05:34 +03:00 · 788e76722f
commit 788e76722f
parent 7be2b782ba
121 changed files with 3590 additions and 3851 deletions
--- a/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/graph/program.hpp
@ -2,11 +2,10 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-///////////////////////////////////////////////////////////////////////////////////////////////////
-
 #pragma once

 #include "cldnn/runtime/engine.hpp"
+#include "cldnn/primitives/implementation_desc.hpp"

 #include "topology.hpp"

@ -99,14 +98,6 @@ struct learning_params {
    learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
 };

-/// @brief Description of primitives implementation.
-struct implementation_desc {
-    format::type output_format;  ///< Output format.
-    std::string kernel_name;  ///< GPU kernel name.
-};
-
-using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
-
 /// @brief Represents user-provided program build option.
 struct build_option {
    /// @brief Allow primitives fusing during program build (default: false).
--- a/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
@ -0,0 +1,70 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/runtime/tensor.hpp"
+
+#include <map>
+#include <ostream>
+
+namespace cldnn {
+
+/// @brief Primitives implementation type.
+enum class impl_types : uint8_t {
+    cpu = 1 << 0,
+    common = 1 << 1,
+    ocl = 1 << 2,
+    any = 0xFF,
+};
+
+inline impl_types operator&(impl_types a, impl_types b) {
+    typedef std::underlying_type<impl_types>::type underlying_type;
+    return static_cast<impl_types>(static_cast<underlying_type>(a) & static_cast<underlying_type>(b));
+}
+
+inline impl_types operator|(impl_types a, impl_types b) {
+    typedef std::underlying_type<impl_types>::type underlying_type;
+    return static_cast<impl_types>(static_cast<underlying_type>(a) | static_cast<underlying_type>(b));
+}
+
+inline impl_types operator~(impl_types a) {
+    typedef std::underlying_type<impl_types>::type underlying_type;
+    return static_cast<impl_types>(~static_cast<underlying_type>(a));
+}
+
+inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) {
+    switch (impl_type) {
+        case impl_types::cpu: out << "cpu"; break;
+        case impl_types::common: out << "common"; break;
+        case impl_types::ocl: out << "ocl"; break;
+        case impl_types::any: out << "any"; break;
+        default: out << "unknown"; break;
+    }
+
+    return out;
+}
+
+/// @brief Description of primitives implementation.
+struct implementation_desc {
+    format::type output_format;  ///< Output format.
+    std::string kernel_name;     ///< GPU kernel name.
+    impl_types impl_type;        ///< GPU implementation type.
+
+    implementation_desc() :
+        output_format(format::any),
+        kernel_name(""),
+        impl_type(impl_types::any) {}
+
+    implementation_desc(format::type output_format,
+                        std::string kernel_name,
+                        impl_types impl_type = impl_types::any) :
+        output_format(output_format),
+        kernel_name(kernel_name),
+        impl_type(impl_type) {}
+};
+
+using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
+
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
@ -38,12 +38,15 @@ file(GLOB __CLDNN_Headers__include
    "${__CLDNN_Directory__include}/*.hpp"
  )

-set(__CLDNN_Directory__gpu             "${CMAKE_CURRENT_SOURCE_DIR}/gpu")
-set(__CLDNN_Label__gpu                 "gpu")
+set(__CLDNN_Directory__impls             "${CMAKE_CURRENT_SOURCE_DIR}/impls")
+set(__CLDNN_Label__gpu                 "impls")
 file(GLOB __CLDNN_Sources__gpu
-    "${__CLDNN_Directory__gpu}/*.h"
-    "${__CLDNN_Directory__gpu}/*.hpp"
-    "${__CLDNN_Directory__gpu}/*.cpp"
+    "${__CLDNN_Directory__impls}/common/*.hpp"
+    "${__CLDNN_Directory__impls}/common/*.cpp"
+    "${__CLDNN_Directory__impls}/cpu/*.hpp"
+    "${__CLDNN_Directory__impls}/cpu/*.cpp"
+    "${__CLDNN_Directory__impls}/ocl/*.hpp"
+    "${__CLDNN_Directory__impls}/ocl/*.cpp"
  )

 set(__CLDNN_Directory__cg_cache        "${CLDNN__CODEGEN_INCDIR}")
@ -130,5 +133,5 @@ endif()
 # ======================================================================================================

 ie_sse42_optimization_flags(sse4_2_flags)
-set_source_files_properties(gpu/detection_output_cpu.cpp half.cpp
+set_source_files_properties(impls/cpu/detection_output.cpp half.cpp
  PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
--- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
@ -1,126 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "activation_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "activation/activation_kernel_selector.h"
-#include "activation/activation_kernel_base.h"
-#include "register_gpu.hpp"
-
-namespace cldnn {
-namespace gpu {
-
-struct activation_gpu : typed_primitive_gpu_impl<activation> {
-    using parent = typed_primitive_gpu_impl<activation>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<activation_gpu>(*this);
-    }
-
-    kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-
-        if (_outer.is_parameterized()) {
-            args.slope = instance.slope_memory();
-        }
-
-        return args;
-    }
-
-    static primitive_impl* create(const activation_node& arg) {
-        auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
-        auto activation_optional_params =
-            get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
-
-        convert_new_activation_func(arg.get_primitive(), activation_params.activations);
-
-        if (arg.is_parameterized()) {
-            const auto& slope_layout = arg.slope_input().get_output_layout();
-            const auto& output_layout = arg.get_output_layout();
-
-            const auto params_num =
-                kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
-
-            CLDNN_ERROR_LESS_THAN(arg.id(),
-                                  "Slope layout size count",
-                                  slope_layout.size.count(),
-                                  "output_layout.size.feature[0] * params_num",
-                                  static_cast<size_t>(output_layout.size.feature[0] * params_num),
-                                  "Error - not enough data inside additional params buffer");
-
-            activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
-        }
-
-        auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto activation = new activation_gpu(arg, best_kernels[0]);
-
-        return activation;
-    }
-};
-
-namespace detail {
-
-attach_activation_gpu::attach_activation_gpu() {
-    auto val_fw = activation_gpu::create;
-
-    implementation_map<activation>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw},
-        // block f16 format
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
-        // 3D
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw},
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw },
-        // bfwzyx
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw},
-        // fs_b_yx_fsv32
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
-    });
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/average_unpooling_gpu.cpp
@ -1,86 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "average_unpooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "average_unpooling/average_unpooling_kernel_selector.h"
-#include "average_unpooling/average_unpooling_kernel_base.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct average_unpooling_gpu : typed_primitive_gpu_impl<average_unpooling> {
-    using parent = typed_primitive_gpu_impl<average_unpooling>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<average_unpooling_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const average_unpooling_node& arg) {
-        auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
-        auto average_unpooling_optional_params =
-            get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
-        auto& params = average_unpooling_params;
-
-        auto primitive = arg.get_primitive();
-        auto stride = primitive->stride;
-
-        params.unpoolSize = {
-            (uint32_t)primitive->size.spatial[0],
-            (uint32_t)primitive->size.spatial[1],
-        };
-
-        params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
-
-        auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto average_unpool = new average_unpooling_gpu(arg, best_kernels[0]);
-
-        return average_unpool;
-    }
-};
-
-namespace detail {
-
-attach_average_unpooling_gpu::attach_average_unpooling_gpu() {
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                                average_unpooling_gpu::create);
-    implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                                average_unpooling_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/batch_to_space_gpu.cpp
@ -1,77 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "batch_to_space_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "batch_to_space/batch_to_space_kernel_selector.h"
-#include "batch_to_space/batch_to_space_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "data_inst.h"
-#include <vector>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct batch_to_space_gpu : typed_primitive_gpu_impl<batch_to_space> {
-    using parent = typed_primitive_gpu_impl<batch_to_space>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<batch_to_space_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const batch_to_space_node& arg) {
-        auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
-        auto batch_to_space_optional_params =
-            get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
-
-        auto primitive = arg.get_primitive();
-
-        batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
-        batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
-        batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
-
-        auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto batch_to_space = new batch_to_space_gpu(arg, best_kernels[0]);
-
-        return batch_to_space;
-    }
-};
-
-namespace detail {
-
-attach_batch_to_space_gpu::attach_batch_to_space_gpu() {
-    auto val_fw = batch_to_space_gpu::create;
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/border_gpu.cpp
@ -1,100 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "border_inst.h"
-
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "border/border_kernel_selector.h"
-#include "border/border_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-namespace cldnn {
-namespace gpu {
-
-struct border_gpu : typed_primitive_gpu_impl<border> {
-    using parent = typed_primitive_gpu_impl<border>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<border_gpu>(*this);
-    }
-
-    static primitive_impl* create(const border_node& arg) {
-        auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
-        auto b_optional_params =
-            get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
-
-        auto desc = arg.get_primitive();
-
-        b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
-        b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
-        b_params.border_value = desc->border_value;
-
-        switch (desc->type) {
-            case border_type::constant:
-                b_params.b_type = kernel_selector::border_type::CONSTANT;
-                break;
-            case border_type::edge:
-                b_params.b_type = kernel_selector::border_type::EDGE;
-                break;
-            case border_type::mirror:
-                b_params.b_type = kernel_selector::border_type::MIRROR;
-                break;
-            case border_type::mirror_101:
-                b_params.b_type = kernel_selector::border_type::MIRROR_101;
-                break;
-            default:
-                assert(
-                    false &&
-                    "Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
-        }
-
-        auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        return new border_gpu(arg, best_kernels[0]);
-    }
-};
-
-namespace detail {
-
-attach_border_gpu::attach_border_gpu() {
-    auto val_fw = border_gpu::create;
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
@ -1,163 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "concatenation_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "concatenation/concatenation_kernel_selector.h"
-#include "concatenation/concatenation_kernel_base.h"
-
-#include <initializer_list>
-
-namespace cldnn {
-namespace gpu {
-
-namespace {
-kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
-    switch (axis) {
-        case concatenation::along_x:
-            return kernel_selector::concat_axis::X;
-        case concatenation::along_y:
-            return kernel_selector::concat_axis::Y;
-        case concatenation::along_z:
-            return kernel_selector::concat_axis::Z;
-        case concatenation::along_w:
-            return kernel_selector::concat_axis::W;
-        case concatenation::along_f:
-            return kernel_selector::concat_axis::FEATURE;
-        case concatenation::along_b:
-            return kernel_selector::concat_axis::BATCH;
-        default:
-            return kernel_selector::concat_axis::X;
-    }
-}
-}  // namespace
-
-struct concatenation_gpu : typed_primitive_gpu_impl<concatenation> {
-    using parent = typed_primitive_gpu_impl<concatenation>;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<concatenation_gpu>(*this);
-    }
-
-    concatenation_gpu(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
-        if (!_outer.can_be_optimized()) {
-            CLDNN_ERROR_NOT_EQUAL(_outer.id(),
-                                  "Input count",
-                                  _outer.inputs_count(),
-                                  "kds size",
-                                  kd.kernels.size(),
-                                  "Error - not enough kernels for concatenation");
-        }
-    }
-
-protected:
-    bool optimized_out(concatenation_inst& instance) const override {
-        return parent::optimized_out(instance) || _outer.can_be_optimized();
-    }
-
-public:
-    static primitive_impl* create(const concatenation_node& arg) {
-        if (arg.can_be_optimized()) {
-            return new concatenation_gpu(arg, {});
-        }
-
-        auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
-        auto concat_optional_params =
-            get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
-        auto axis = arg.get_primitive()->axis;
-
-        concat_params.inputs.resize(arg.inputs_count());
-        for (size_t i = 0; i < arg.inputs_count(); ++i) {
-            const layout& input_layout = arg.input(i).get_output_layout();
-            concat_params.inputs[i] = convert_data_tensor(input_layout);
-        }
-
-        concat_params.axis = convert_axis(axis);
-        concat_optional_params.kernelPerInput = true;
-
-        auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        concatenation_gpu* concat = new concatenation_gpu(arg, best_kernels[0]);
-
-        return concat;
-    }
-};
-
-namespace detail {
-
-attach_concatenation_gpu::attach_concatenation_gpu() {
-    implementation_map<concatenation>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create},
-        // 5D
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
-        { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
-        // block f16 format
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), concatenation_gpu::create},
-        // MMAD
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), concatenation_gpu::create},
-        // 6D
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), concatenation_gpu::create},
-    });
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
@ -1,127 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "crop_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "eltwise/eltwise_kernel_selector.h"
-#include "eltwise/eltwise_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-namespace cldnn {
-namespace gpu {
-
-struct crop_gpu : typed_primitive_gpu_impl<crop> {
-    using parent = typed_primitive_gpu_impl<crop>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<crop_gpu>(*this);
-    }
-
-protected:
-    bool optimized_out(crop_inst& instance) const override {
-        return parent::optimized_out(instance) || _outer.can_be_optimized();
-    }
-
-public:
-    static primitive_impl* create(const crop_node& arg) {
-        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
-        auto ew_optional_params =
-            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
-
-        ew_params.operations.push_back(
-            {{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
-
-        const auto& input_layout = arg.input().get_output_layout();
-        ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
-
-        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto crop = new crop_gpu(arg, best_kernels[0]);
-
-        return crop;
-    }
-};
-
-namespace detail {
-
-attach_crop_gpu::attach_crop_gpu() {
-    auto val_fw = crop_gpu::create;
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
@ -1,173 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "deconvolution_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "deconvolution/deconvolution_kernel_selector.h"
-#include "deconvolution/deconvolution_kernel_base.h"
-#include <algorithm>
-
-namespace cldnn {
-namespace gpu {
-
-struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution> {
-    using parent = typed_primitive_gpu_impl<deconvolution>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<deconvolution_gpu>(*this);
-    }
-
-protected:
-    // TODO: share it with convolution and fully connected
-    bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
-        bool res = true;
-
-        CLDNN_ERROR_NOT_EQUAL(_outer.id(),
-                              "deconvolution filling value",
-                              _outer.get_output_layout().data_padding.filling_value(),
-                              "padding mode",
-                              0.0f,
-                              "Unknown padding mode in deconvolution.");
-
-        return res;
-    }
-
-    kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-
-        args.weights = instance.weights_memory(split);
-        args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
-
-        return args;
-    }
-
-    int32_t get_split() const override { return _outer.get_split(); }
-
-    uint32_t get_groups() const override { return _outer.get_groups(); }
-
-public:
-    static primitive_impl* create(const deconvolution_node& arg) {
-        const auto& primitive = arg.get_primitive();
-        const auto& weights_layout = arg.weights(0).get_output_layout();
-
-        const auto& weights_size = weights_layout.size;
-
-        const auto& split = primitive->split();
-        const auto& stride = primitive->stride;
-#if 0  // TODO: support dilation
-        const auto& dilation = primitive->dilation;
-#else
-        const tensor dilation = {0, 0, 1, 1, 1};
-#endif
-        const auto actual_split = split;
-
-        const auto& input_offset = primitive->input_offset;
-        const auto& groups = primitive->groups;
-
-        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
-            arg,
-            (groups > 1) ? 1 : actual_split,
-            1,
-            primitive->grouped_weights_shape);
-        auto deconv_optional_params =
-            get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
-
-        deconv_params.split = split;
-        deconv_params.groups = groups;
-
-        auto spatial_size = arg.get_output_layout().format.dimension() - 2;
-        uint32_t kx = weights_size.spatial[0];
-        uint32_t ky = weights_size.spatial[1];
-        uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
-        deconv_params.filterSize = { kx, ky, kz };
-
-        deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
-                                 (uint32_t)std::max(-input_offset.spatial[1], 0),
-                                 (uint32_t)std::max(-input_offset.spatial[2], 0)};
-
-        deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
-
-        deconv_params.dilation = {(uint32_t)dilation.spatial[0],
-                                  (uint32_t)dilation.spatial[1],
-                                  (uint32_t)dilation.spatial[2]};
-
-        auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with these arguments");
-        auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
-
-        return deconv;
-    }
-};
-
-namespace detail {
-
-attach_deconvolution_gpu::attach_deconvolution_gpu() {
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-    implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
-                                           deconvolution_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
@ -1,70 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "depth_to_space_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "depth_to_space/depth_to_space_kernel_selector.h"
-#include "depth_to_space/depth_to_space_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "common_types.h"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct depth_to_space_gpu : typed_primitive_gpu_impl<depth_to_space> {
-    using parent = typed_primitive_gpu_impl<depth_to_space>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<depth_to_space_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const depth_to_space_node& arg) {
-        auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
-        auto depth_to_space_optional_params =
-            get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
-
-        depth_to_space_params.block_size = arg.get_primitive()->block_size;
-        depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
-                                                                                                    : kernel_selector::depth_to_space_mode::DEPTH_FIRST;
-
-        auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto depth_to_space = new depth_to_space_gpu(arg, best_kernels[0]);
-
-        return depth_to_space;
-    }
-};
-
-namespace detail {
-
-attach_depth_to_space_gpu::attach_depth_to_space_gpu() {
-    auto val_fw = depth_to_space_gpu::create;
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
@ -1,195 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "eltwise_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "eltwise/eltwise_kernel_selector.h"
-#include "eltwise/eltwise_kernel_base.h"
-#include <vector>
-
-namespace cldnn {
-namespace gpu {
-
-struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
-    using parent = typed_primitive_gpu_impl<eltwise>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<eltwise_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const eltwise_node& arg) {
-        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
-        auto ew_optional_params =
-            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
-
-        for (size_t i = 1; i < arg.inputs_count(); i++) {
-            ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
-        }
-
-        const auto& primitive = arg.get_primitive();
-
-        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
-                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
-                                        convert_to_eltwise_mode(primitive->mode)});
-
-        for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
-            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
-                                             kernel_selector::eltwise_params::InputType::Buffer(i)},
-                                            convert_to_eltwise_mode(primitive->mode)});
-        }
-
-        if (primitive->mode == eltwise_mode::sum) {
-            ew_params.coefficients = primitive->coefficients;
-        }
-
-        for (size_t i = 0; i < ew_params.inputs.size(); i++) {
-            if (!ew_params.inputs[i].SameDims(ew_params.output)) {
-                std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
-                std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
-                bool broadcast = false;
-                for (size_t d = 0; d < output_size.size(); d++) {
-                    if (output_size[d] != 1 && input_size[d] == 1)
-                        broadcast = true;
-                }
-                if (broadcast) {
-                    ew_params.broadcast = true;
-                    break;
-                } else {
-                    ew_params.layoutBased = true;
-                    break;
-                }
-            }
-        }
-
-        // stride
-        if (!primitive->stride.empty()) {
-            const auto& stride = primitive->stride;
-            ew_params.stride.resize(stride.size());
-            for (size_t i = 0; i < primitive->stride.size(); i++) {
-                ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
-                                       (uint32_t)stride[i].spatial[1],
-                                       (uint32_t)stride[i].spatial[2]};
-            }
-        }
-
-        // check if strides are the same
-        if (!ew_params.stride.empty()) {
-            const auto& stride = ew_params.stride[0];
-            for (size_t i = 1; i < ew_params.stride.size(); i++) {
-                if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
-                    ew_params.layoutBased = true;
-            }
-        } else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
-            ew_params.broadcast = true;
-        }
-
-        // TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
-        bool quantization = true;
-        for (size_t i = 0; i < arg.inputs_count(); i++) {
-            if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
-                arg.input(i).get_output_layout().data_type != data_types::i8) {
-                quantization = false;
-            }
-        }
-        ew_params.int8_quantization = quantization;
-
-        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto eltwise = new eltwise_gpu(arg, best_kernels[0]);
-
-        return eltwise;
-    }
-};
-
-namespace detail {
-
-attach_eltwise_gpu::attach_eltwise_gpu() {
-    implementation_map<eltwise>::add(
-        {{ std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create },
-         // block f16
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), eltwise_gpu::create },
-         // 3D
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
-         // 4D
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create },
-
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
-
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
-         // MMAD
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create },
-
-         //
-         { std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }});
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/gather_nd_gpu.cpp
@ -1,68 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "gather_nd_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "gather/gather_nd_kernel_selector.h"
-#include "gather/gather_nd_kernel_ref.h"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct gather_nd_gpu : typed_primitive_gpu_impl<gather_nd> {
-    using parent = typed_primitive_gpu_impl<gather_nd>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gather_nd_gpu>(*this);
-    }
-
-    static primitive_impl* create(const gather_nd_node& arg) {
-        auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
-        auto gather_nd_optional_params =
-            get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
-
-        gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
-        gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
-
-        gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
-
-        auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto gather_nd = new gather_nd_gpu(arg, best_kernels[0]);
-
-        return gather_nd;
-    }
-};
-
-namespace detail {
-
-attach_gather_nd_gpu::attach_gather_nd_gpu() {
-    auto val_fw = gather_nd_gpu::create;
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/kd_selector.h
@ -1,266 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <initializer_list>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-namespace cldnn {
-namespace gpu {
-namespace mputils {
-template <typename... Tys>
-struct type_tuple;
-
-template <std::size_t... Idxs>
-struct index_tuple {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct count_tt;
-
-template <typename Ty, typename... Tys, typename ElemTy>
-struct count_tt<type_tuple<Ty, Tys...>, ElemTy>
-    : std::integral_constant<std::size_t,
-                             count_tt<type_tuple<Tys...>, ElemTy>::value +
-                                 static_cast<std::size_t>(std::is_same<Ty, ElemTy>::value)> {};
-
-template <typename ElemTy>
-struct count_tt<type_tuple<>, ElemTy> : std::integral_constant<std::size_t, 0> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy>
-struct size_tt;
-
-template <typename... Tys>
-struct size_tt<type_tuple<Tys...>> : std::integral_constant<std::size_t, sizeof...(Tys)> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct split_tt;
-
-namespace detail {
-template <typename TypeTupleTy, typename ElemTy, typename FirstTupleTy>
-struct split_tt_helper1;
-
-template <typename Ty, typename... Tys, typename ElemTy, typename... FirstTys>
-struct split_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<FirstTys...>>
-    : split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<FirstTys..., Ty>> {};
-
-template <typename Ty, typename... Tys, typename... FirstTys>
-struct split_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<FirstTys...>> {
-    using first_type = type_tuple<FirstTys...>;
-    using second_type = type_tuple<Tys...>;
-};
-
-template <typename ElemTy, typename... FirstTys>
-struct split_tt_helper1<type_tuple<>, ElemTy, type_tuple<FirstTys...>> {
-    using first_type = type_tuple<>;
-    using second_type = type_tuple<FirstTys...>;
-};
-}  // namespace detail
-
-template <typename... Tys, typename ElemTy>
-struct split_tt<type_tuple<Tys...>, ElemTy> : detail::split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct index_of_tt;
-
-static constexpr std::size_t npos = static_cast<std::size_t>(-1);
-
-namespace detail {
-template <typename TypeTupleTy, typename ElemTy, std::size_t Pos>
-struct index_of_tt_helper1;
-
-template <typename Ty, typename... Tys, typename ElemTy, std::size_t Pos>
-struct index_of_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, Pos>
-    : index_of_tt_helper1<type_tuple<Tys...>, ElemTy, Pos + 1> {};
-
-template <typename Ty, typename... Tys, std::size_t Pos>
-struct index_of_tt_helper1<type_tuple<Ty, Tys...>, Ty, Pos> : std::integral_constant<std::size_t, Pos> {};
-
-template <typename ElemTy, std::size_t Pos>
-struct index_of_tt_helper1<type_tuple<>, ElemTy, Pos> : std::integral_constant<std::size_t, npos> {};
-}  // namespace detail
-
-template <typename... Tys, typename ElemTy>
-struct index_of_tt<type_tuple<Tys...>, ElemTy> : detail::index_of_tt_helper1<type_tuple<Tys...>, ElemTy, 0> {};
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy, typename ElemTy>
-struct remove_tt;
-
-namespace detail {
-template <typename TypeTupleTy, typename ElemTy, typename ResultTupleTy>
-struct remove_tt_helper1;
-
-template <typename Ty, typename... Tys, typename ElemTy, typename... ResultTys>
-struct remove_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<ResultTys...>>
-    : remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<ResultTys..., Ty>> {};
-
-template <typename Ty, typename... Tys, typename... ResultTys>
-struct remove_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<ResultTys...>>
-    : remove_tt_helper1<type_tuple<Tys...>, Ty, type_tuple<ResultTys...>> {};
-
-template <typename ElemTy, typename... ResultTys>
-struct remove_tt_helper1<type_tuple<>, ElemTy, type_tuple<ResultTys...>> {
-    using type = type_tuple<ResultTys...>;
-};
-}  // namespace detail
-
-template <typename... Tys, typename ElemTy>
-struct remove_tt<type_tuple<Tys...>, ElemTy> : detail::remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
-
-template <typename TypeTupleTy, typename ElemTy>
-using remove_tt_t = typename remove_tt<TypeTupleTy, ElemTy>::type;
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <template <typename...> class VariadicTTy, typename TypeTupleTy>
-struct make_vttype_tt;
-
-template <template <typename...> class VariadicTTy, typename... Tys>
-struct make_vttype_tt<VariadicTTy, type_tuple<Tys...>> {
-    using type = VariadicTTy<Tys...>;
-};
-
-template <template <typename...> class VariadicTTy, typename TypeTupleTy>
-using make_vttype_tt_t = typename make_vttype_tt<VariadicTTy, TypeTupleTy>::type;
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-template <typename TypeTupleTy>
-struct make_indexer_tt;
-
-namespace detail {
-template <typename TypeTupleTy, std::size_t Idx, typename IdxTupleTy>
-struct make_indexer_tt_helper1;
-
-template <typename Ty, typename... Tys, std::size_t Idx, std::size_t... Idxs>
-struct make_indexer_tt_helper1<type_tuple<Ty, Tys...>, Idx, index_tuple<Idxs...>>
-    : make_indexer_tt_helper1<type_tuple<Tys...>, Idx + 1, index_tuple<Idxs..., Idx>> {};
-
-template <std::size_t Idx, typename IdxTupleTy>
-struct make_indexer_tt_helper1<type_tuple<>, Idx, IdxTupleTy> {
-    using type = IdxTupleTy;
-};
-
-}  // namespace detail
-
-template <typename... Tys>
-struct make_indexer_tt<type_tuple<Tys...>> : detail::make_indexer_tt_helper1<type_tuple<Tys...>, 0, index_tuple<>> {};
-
-template <typename TypeTupleTy>
-using make_indexer_tt_t = typename make_indexer_tt<TypeTupleTy>::type;
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-namespace detail {
-template <template <typename> class DefaultValSelectorTTy,
-          std::size_t DefaultedStartPos,
-          std::size_t Idx,
-          typename ArgTy>
-constexpr auto select_arg_or_default(ArgTy&& arg) -> typename std::decay<ArgTy>::type {
-    return (Idx < DefaultedStartPos) ? std::forward<ArgTy>(arg)
-                                     : DefaultValSelectorTTy<typename std::decay<ArgTy>::type>::value;
-}
-
-template <template <typename> class DefaultValSelectorTTy,
-          std::size_t DefaultedStartPos,
-          std::size_t... Idxs,
-          typename... ArgTys>
-constexpr auto make_partially_defaulted_std_tuple(index_tuple<Idxs...>&&, ArgTys&&... args)
-    -> std::tuple<typename std::decay<ArgTys>::type...> {
-    return std::make_tuple(
-        select_arg_or_default<DefaultValSelectorTTy, DefaultedStartPos, Idxs>(std::forward<ArgTys>(args))...);
-}
-}  // namespace detail
-
-template <template <typename> class DefaultValSelectorTTy, std::size_t DefaultedStartPos, typename... ArgTys>
-constexpr auto make_partially_defaulted_std_tuple(ArgTys&&... args)
-    -> std::tuple<typename std::decay<ArgTys>::type...> {
-    return detail::make_partially_defaulted_std_tuple<DefaultValSelectorTTy, DefaultedStartPos>(
-        make_indexer_tt_t<type_tuple<ArgTys...>>(),
-        std::forward<ArgTys>(args)...);
-}
-
-// -----------------------------------------------------------------------------------------------------------------------
-
-}  // namespace mputils
-
-/// Marker type that separates required selectors from optional ones in kernel selector signature.
-struct kd_optional_selector_t {};
-
-template <typename Ty>
-struct kd_default_value_selector {
-    static constexpr Ty value = static_cast<Ty>(0);
-};
-
-template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename SelectorsTupleTy>
-class kd_selector;
-
-template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename... SelectorTys>
-class kd_selector<KernelDataTy, OuterTy, ReqSelectorCount, mputils::type_tuple<SelectorTys...>> {
-    using _selector_types = mputils::type_tuple<SelectorTys...>;
-    static_assert(mputils::count_tt<_selector_types, kd_optional_selector_t>::value == 0,
-                  "Optional selectors separator can be specified only in template alias. "
-                  "Please do not use this class directly - use kd_selector_t alias instead.");
-    static_assert(mputils::size_tt<_selector_types>::value > 0, "At least one selector type must be specified.");
-    static_assert(ReqSelectorCount <= mputils::size_tt<_selector_types>::value,
-                  "Number of required selectors is invalid.");
-
-public:
-    using key_type = mputils::make_vttype_tt_t<std::tuple, _selector_types>;
-
-    using hash_type = std::hash<key_type>;
-    using mapped_type = KernelDataTy (*)(const OuterTy&);
-    using map_type = std::unordered_map<key_type, mapped_type, hash_type>;
-    using value_type = typename map_type::value_type;
-
-private:
-    map_type _kernel_map;
-
-    template <std::size_t Idx>
-    KernelDataTy _get_kernel(mputils::index_tuple<Idx>&&, const OuterTy& outer, const SelectorTys&... selectors) {
-        auto value = _kernel_map.find(
-            mputils::make_partially_defaulted_std_tuple<kd_default_value_selector, Idx - 1>(selectors...));
-        if (value == _kernel_map.end())
-            return _get_kernel(mputils::index_tuple<Idx - 1>(), outer, selectors...);
-
-        return value->second(outer);
-    }
-
-    static KernelDataTy _get_kernel(mputils::index_tuple<ReqSelectorCount>&&, const OuterTy&, const SelectorTys&...) {
-        throw std::runtime_error("ERROR: no default element in map for kernel data!!!");
-    }
-
-public:
-    kd_selector(const std::initializer_list<value_type>& l) : _kernel_map(l) {}
-
-    KernelDataTy get_kernel(const OuterTy& outer, const SelectorTys&... selectors) {
-        return _get_kernel(mputils::index_tuple<sizeof...(SelectorTys) + 1>(), outer, selectors...);
-    }
-};
-
-template <typename KernelDataTy, typename OuterTy, typename... SelectorTys>
-using kd_selector_t =
-    kd_selector<KernelDataTy,
-                OuterTy,
-                mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value !=
-                        mputils::npos
-                    ? mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value
-                    : sizeof...(SelectorTys),
-                mputils::remove_tt_t<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>>;
-
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/lrn_gpu.cpp
@ -1,84 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "lrn_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "lrn/lrn_kernel_selector.h"
-#include "lrn/lrn_kernel_base.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct lrn_gpu : typed_primitive_gpu_impl<lrn> {
-    using parent = typed_primitive_gpu_impl<lrn>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lrn_gpu>(*this);
-    }
-
-    static primitive_impl* create(const lrn_node& arg) {
-        auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
-        auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
-
-        const auto& primitive = arg.get_primitive();
-
-        lrn_params.alpha = primitive->alpha;
-        lrn_params.beta = primitive->beta;
-        lrn_params.k = primitive->k;
-        lrn_params.localSize = primitive->size;
-        lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
-        lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
-                                  ? kernel_selector::lrn_mode::WITHIN_CHANNEL
-                                  : kernel_selector::lrn_mode::ACROSS_CHANNEL;
-
-        auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto lrn = new lrn_gpu(arg, best_kernels[0]);
-
-        return lrn;
-    }
-};
-
-namespace detail {
-
-attach_lrn_gpu::attach_lrn_gpu() {
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), lrn_gpu::create);
-
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), lrn_gpu::create);
-    implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), lrn_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/mutable_data_gpu.cpp
@ -1,32 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mutable_data_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct mutable_data_gpu : public typed_primitive_gpu_impl<mutable_data> {
-    using parent = typed_primitive_gpu_impl<mutable_data>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<mutable_data_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_gpu(arg, {}); }
-};
-
-namespace detail {
-
-attach_mutable_data_gpu::attach_mutable_data_gpu() {
-    implementation_map<mutable_data>::add({{engine_types::ocl, mutable_data_gpu::create}});
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp
@ -1,106 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mvn_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "mvn/mvn_kernel_selector.h"
-#include "mvn/mvn_kernel_base.h"
-
-#include <algorithm>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct mvn_gpu : typed_primitive_gpu_impl<mvn> {
-    using parent = typed_primitive_gpu_impl<mvn>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<mvn_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const mvn_node& arg) {
-        auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
-        auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
-
-        mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
-                                                                  : kernel_selector::mvn_mode::WITHIN_CHANNELS;
-        mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
-        mvn_params.epsilon = arg.get_primitive()->epsilon;
-
-        mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
-                                                                     : kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
-
-        auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto mvn = new mvn_gpu(arg, best_kernels[0]);
-
-        return mvn;
-    }
-};
-
-namespace detail {
-
-attach_mvn_gpu::attach_mvn_gpu() {
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-    implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
-                                 mvn_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/normalize_gpu.cpp
@ -1,93 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "normalize_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "normalize/normalize_kernel_selector.h"
-#include "normalize/normalize_kernel_base.h"
-
-#include <algorithm>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct normalize_gpu : typed_primitive_gpu_impl<normalize> {
-    using parent = typed_primitive_gpu_impl<normalize>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<normalize_gpu>(*this);
-    }
-
-protected:
-     kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        args.scale_table = instance.scale_memory();
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const normalize_node& arg) {
-        auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
-        auto norm_optional_params =
-            get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
-
-        const auto& scale_layout = arg.scale().get_output_layout();
-
-        norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
-                                                                   : kernel_selector::normalize_mode::WITHIN_SPATIAL;
-        norm_params.epsilon = arg.get_primitive()->epsilon;
-        norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
-
-        auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto lrn = new normalize_gpu(arg, best_kernels[0]);
-
-        return lrn;
-    }
-};
-
-namespace detail {
-
-attach_normalize_gpu::attach_normalize_gpu() {
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                       normalize_gpu::create);
-    implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf),
-                                       normalize_gpu::create);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
@ -1,74 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "one_hot_inst.h"
-
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "one_hot/one_hot_kernel_selector.h"
-#include "one_hot/one_hot_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include <vector>
-
-namespace cldnn {
-namespace gpu {
-
-struct one_hot_gpu : typed_primitive_gpu_impl<one_hot> {
-    using parent = typed_primitive_gpu_impl<one_hot>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<one_hot_gpu>(*this);
-    }
-
-    static primitive_impl* create(const one_hot_node& arg) {
-        auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
-        auto oh_optional_params =
-            get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
-
-        oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
-        oh_params.on_value = arg.get_primitive()->on_value;
-        oh_params.off_value = arg.get_primitive()->off_value;
-
-        auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
-                            arg.get_output_layout().size.sizes(format::bfzyx) :
-                            arg.get_output_layout().size.sizes(format::bfyx);
-
-        oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
-
-        auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with these arguments");
-
-        return new one_hot_gpu(arg, best_kernels[0]);
-    }
-};
-
-namespace detail {
-
-attach_one_hot_gpu::attach_one_hot_gpu() {
-    auto val_fw = one_hot_gpu::create;
-
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/quantize_gpu.cpp
@ -1,174 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "quantize_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "quantize/quantize_kernel_selector.h"
-#include "quantize/quantize_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct quantize_gpu : typed_primitive_gpu_impl<quantize> {
-    using parent = typed_primitive_gpu_impl<quantize>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<quantize_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
-        kernel_arguments_data args;
-
-        for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
-            args.inputs.push_back(instance.input_memory_ptr(i));
-        }
-        if (instance.node.get_scale_shift_opt()) {
-            if (instance.node.get_dependencies().size() == 9) {
-                args.inputs.push_back(instance.dep_memory_ptr(5));
-                args.inputs.push_back(instance.dep_memory_ptr(6));
-                args.inputs.push_back(instance.dep_memory_ptr(7));
-                args.inputs.push_back(instance.dep_memory_ptr(8));
-            }
-        }
-        args.output = instance.output_memory_ptr();
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const quantize_node& arg) {
-        auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
-        auto quantize_optional_params =
-            get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
-
-        quantize_params.levels = arg.get_levels();
-        quantize_params.packed_binary_output = arg.get_packed_binary_output();
-        quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
-        quantize_params.has_post_scale = arg.get_need_post_scale();
-        quantize_params.has_post_shift = arg.get_need_post_shift();
-        quantize_params.has_pre_shift = arg.get_need_pre_shift();
-        quantize_params.has_clamp = arg.get_need_clamp();
-
-        quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
-        quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
-        quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
-        quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
-        quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
-
-        quantize_params.in_lo = arg.get_input_lo_val();
-        quantize_params.in_hi = arg.get_input_hi_val();
-        quantize_params.in_scale = arg.get_input_scale_val();
-        quantize_params.in_shift = arg.get_input_shift_val();
-        quantize_params.out_scale = arg.get_output_scale_val();
-        quantize_params.out_shift = arg.get_output_shift_val();
-
-        for (size_t i = 1; i < arg.inputs_count(); i++) {
-            quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
-        }
-        const auto& output_layout = arg.get_output_layout();
-        quantize_params.output = convert_data_tensor(output_layout);
-
-        auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto quantize = new quantize_gpu(arg, best_kernels[0]);
-
-        return quantize;
-    }
-};
-
-namespace detail {
-
-attach_quantize_gpu::attach_quantize_gpu() {
-    auto val_fw = quantize_gpu::create;
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fs_b_yx_fsv32), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp
@ -1,86 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#include "register_gpu.hpp"
-
-namespace cldnn { namespace gpu {
-
-#define REGISTER_GPU(prim)                      \
-    static detail::attach_##prim##_gpu attach_##prim
-
-void register_implementations_gpu() {
-    REGISTER_GPU(activation);
-    REGISTER_GPU(arg_max_min);
-    REGISTER_GPU(average_unpooling);
-    REGISTER_GPU(binary_convolution);
-    REGISTER_GPU(border);
-    REGISTER_GPU(broadcast);
-    REGISTER_GPU(concatenation);
-    REGISTER_GPU(condition);
-    REGISTER_GPU(convolution);
-    REGISTER_GPU(crop);
-    REGISTER_GPU(custom_gpu_primitive);
-    REGISTER_GPU(data);
-    REGISTER_GPU(deconvolution);
-    REGISTER_GPU(deformable_conv);
-    REGISTER_GPU(deformable_interp);
-    REGISTER_GPU(depth_to_space);
-    REGISTER_GPU(batch_to_space);
-    REGISTER_GPU(detection_output);
-    REGISTER_GPU(eltwise);
-    REGISTER_GPU(fully_connected);
-    REGISTER_GPU(gather);
-    REGISTER_GPU(gather_nd);
-    REGISTER_GPU(gemm);
-    REGISTER_GPU(input_layout);
-    REGISTER_GPU(lrn);
-    REGISTER_GPU(lstm_gemm);
-    REGISTER_GPU(lstm_elt);
-    REGISTER_GPU(max_unpooling);
-    REGISTER_GPU(mutable_data);
-    REGISTER_GPU(mvn);
-    REGISTER_GPU(normalize);
-    REGISTER_GPU(one_hot);
-    REGISTER_GPU(permute);
-    REGISTER_GPU(pooling);
-    REGISTER_GPU(prior_box);
-    REGISTER_GPU(proposal);
-    REGISTER_GPU(pyramid_roi_align);
-    REGISTER_GPU(quantize);
-    REGISTER_GPU(reduce);
-    REGISTER_GPU(region_yolo);
-    REGISTER_GPU(reorder);
-    REGISTER_GPU(reorg_yolo);
-    REGISTER_GPU(reshape);
-    REGISTER_GPU(reverse_sequence);
-    REGISTER_GPU(roi_pooling);
-    REGISTER_GPU(scale);
-    REGISTER_GPU(scatter_update);
-    REGISTER_GPU(scatter_nd_update);
-    REGISTER_GPU(scatter_elements_update);
-    REGISTER_GPU(select);
-    REGISTER_GPU(shuffle_channels);
-    REGISTER_GPU(softmax);
-    REGISTER_GPU(space_to_batch);
-    REGISTER_GPU(space_to_depth);
-    REGISTER_GPU(strided_slice);
-    REGISTER_GPU(tile);
-    REGISTER_GPU(fused_conv_eltwise);
-    REGISTER_GPU(lstm_dynamic_input);
-    REGISTER_GPU(lstm_dynamic_timeloop);
-    REGISTER_GPU(generic_layer);
-    REGISTER_GPU(gather_tree);
-    REGISTER_GPU(resample);
-    REGISTER_GPU(non_max_suppression);
-    REGISTER_GPU(grn);
-    REGISTER_GPU(ctc_greedy_decoder);
-    REGISTER_GPU(cum_sum);
-    REGISTER_GPU(embedding_bag);
-    REGISTER_GPU(extract_image_patches);
-    REGISTER_GPU(loop);
-}
-
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
@ -1,144 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "scale_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "eltwise/eltwise_kernel_selector.h"
-#include "eltwise/eltwise_kernel_base.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct scale_gpu : typed_primitive_gpu_impl<scale> {
-    using parent = typed_primitive_gpu_impl<scale>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<scale_gpu>(*this);
-    }
-
-protected:
-    kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
-        kernel_arguments_data args = parent::get_arguments(instance, split);
-        args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
-        args.output = instance.output_memory_ptr();
-
-        if (_outer.bias_term()) {
-            args.inputs.push_back(instance.bias_memory());
-        }
-        return args;
-    }
-
-public:
-    static primitive_impl* create(const scale_node& arg) {
-        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
-        auto ew_optional_params =
-            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
-
-        ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
-
-        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
-                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
-                                        kernel_selector::eltwise_mode::MUL});
-
-        if (arg.bias_term()) {
-            ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
-            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
-                                             kernel_selector::eltwise_params::InputType::Buffer(2)},
-                                            kernel_selector::eltwise_mode::ADD});
-        }
-
-        ew_params.layoutBased = true;
-
-        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto scale = new scale_gpu(arg, best_kernels[0]);
-
-        return scale;
-    }
-};
-
-namespace detail {
-
-attach_scale_gpu::attach_scale_gpu() {
-    auto val_fw = scale_gpu::create;
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fs_b_yx_fsv32), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv32), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/select_gpu.cpp
@ -1,70 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "select_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "kernel_selector_helper.h"
-#include "select/select_kernel_selector.h"
-#include "select/select_kernel_base.h"
-
-namespace cldnn {
-namespace gpu {
-
-struct select_gpu : typed_primitive_gpu_impl<select> {
-    using parent = typed_primitive_gpu_impl<select>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<select_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const select_node& arg) {
-        auto select_params = get_default_params<kernel_selector::select_params>(arg);
-        auto select_optional_params =
-            get_default_optional_params<kernel_selector::select_optional_params>(arg.get_program());
-
-        for (size_t i = 1; i < arg.inputs_count(); i++) {
-            select_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
-        }
-
-        auto& kernel_selector = kernel_selector::select_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(select_params, select_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto select = new select_gpu(arg, best_kernels[0]);
-
-        return select;
-    }
-};
-
-namespace detail {
-
-attach_select_gpu::attach_select_gpu() {
-    implementation_map<select>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), select_gpu::create},
-
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), select_gpu::create},
-
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), select_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), select_gpu::create}});
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
@ -1,85 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "shuffle_channels_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "shuffle_channels/shuffle_channels_kernel_selector.h"
-#include "shuffle_channels/shuffle_channels_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct shuffle_channels_gpu : typed_primitive_gpu_impl<shuffle_channels> {
-    using parent = typed_primitive_gpu_impl<shuffle_channels>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<shuffle_channels_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const shuffle_channels_node& arg) {
-        auto shuffle_channels_params = get_default_params<kernel_selector::shuffle_channels_params>(arg);
-        auto shuffle_channels_optional_params =
-            get_default_optional_params<kernel_selector::shuffle_channels_optional_params>(arg.get_program());
-
-        const int32_t number_of_dims = 4;
-        int32_t axis = arg.get_primitive()->axis;
-
-        if (axis < 0)
-            axis += number_of_dims;
-
-        shuffle_channels_params.group = arg.get_primitive()->group;
-        shuffle_channels_params.axis = axis;
-
-        auto& kernel_selector = kernel_selector::shuffle_channels_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(shuffle_channels_params, shuffle_channels_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto shuffle_channels = new shuffle_channels_gpu(arg, best_kernels[0]);
-
-        return shuffle_channels;
-    }
-};
-
-namespace detail {
-
-attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
-    auto val_fw = shuffle_channels_gpu::create;
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/space_to_batch_gpu.cpp
@ -1,77 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "space_to_batch_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "space_to_batch/space_to_batch_kernel_selector.h"
-#include "space_to_batch/space_to_batch_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-#include "data_inst.h"
-#include <vector>
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct space_to_batch_gpu : typed_primitive_gpu_impl<space_to_batch> {
-    using parent = typed_primitive_gpu_impl<space_to_batch>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<space_to_batch_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const space_to_batch_node& arg) {
-        auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
-        auto space_to_batch_optional_params =
-            get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
-
-        auto primitive = arg.get_primitive();
-
-        space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
-        space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
-        space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
-
-        auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto space_to_batch = new space_to_batch_gpu(arg, best_kernels[0]);
-
-        return space_to_batch;
-    }
-};
-
-namespace detail {
-
-attach_space_to_batch_gpu::attach_space_to_batch_gpu() {
-    auto val_fw = space_to_batch_gpu::create;
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/space_to_depth_gpu.cpp
@ -1,79 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "space_to_depth_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "space_to_depth/space_to_depth_kernel_selector.h"
-#include "space_to_depth/space_to_depth_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-struct space_to_depth_gpu : typed_primitive_gpu_impl<space_to_depth> {
-    using parent = typed_primitive_gpu_impl<space_to_depth>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<space_to_depth_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const space_to_depth_node& arg) {
-        auto space_to_depth_params = get_default_params<kernel_selector::space_to_depth_params>(arg);
-        auto space_to_depth_optional_params =
-                get_default_optional_params<kernel_selector::space_to_depth_optional_params>(arg.get_program());
-
-        space_to_depth_params.depth_mode = (arg.get_primitive()->mode == space_to_depth::blocks_first) ?
-                                           kernel_selector::SpaceToDepthMode::BLOCKS_FIRST :
-                                           kernel_selector::SpaceToDepthMode::DEPTH_FIRST;
-
-        space_to_depth_params.block_size = arg.get_primitive()->block_size;
-
-        auto& kernel_selector = kernel_selector::space_to_depth_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(space_to_depth_params, space_to_depth_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto space_to_depth = new space_to_depth_gpu(arg, best_kernels[0]);
-
-        return space_to_depth;
-    }
-};
-
-namespace detail {
-
-attach_space_to_depth_gpu::attach_space_to_depth_gpu() {
-    auto val_fw = space_to_depth_gpu::create;
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/tile_gpu.cpp
@ -1,72 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "tile_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
-#include "kernel_selector_helper.h"
-#include "tile/tile_kernel_selector.h"
-#include "tile/tile_kernel_ref.h"
-#include "cldnn/runtime/error_handler.hpp"
-
-using namespace cldnn;
-
-namespace cldnn {
-namespace gpu {
-
-struct tile_gpu : typed_primitive_gpu_impl<tile> {
-    using parent = typed_primitive_gpu_impl<tile>;
-    using parent::parent;
-
-    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<tile_gpu>(*this);
-    }
-
-public:
-    static primitive_impl* create(const tile_node& arg) {
-        auto tile_params = get_default_params<kernel_selector::tile_params>(arg);
-        auto tile_optional_params =
-            get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
-
-        auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
-        auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
-
-        CLDNN_ERROR_BOOL(arg.id(),
-                         "Best_kernel.empty()",
-                         best_kernels.empty(),
-                         "Cannot find a proper kernel with this arguments");
-
-        auto tile = new tile_gpu(arg, best_kernels[0]);
-
-        return tile;
-    }
-};
-
-namespace detail {
-
-attach_tile_gpu::attach_tile_gpu() {
-    auto val_fw = tile_gpu::create;
-
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-}
-
-}  // namespace detail
-}  // namespace gpu
-}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
@ -51,7 +51,7 @@ void add_required_reorders::run(program_impl& p) {
            continue;  // only nodes with dependencies
        if (usr->is_type<data>())
            continue;
-        if (usr->type()->does_an_implementation_exist(p.get_engine(), *usr))
+        if (usr->type()->does_an_implementation_exist(*usr))
            continue;

        bool correct_layout_selected = false;
@ -71,7 +71,7 @@ void add_required_reorders::run(program_impl& p) {
                                          node->get_output_layout().format,
                                          original_layout.size);
                    usr->set_output_layout(current_layout, false);
-                    if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                    if (usr->type()->does_possible_implementation_exist(*usr)) {
                        correct_layout_selected = true;
                        break;
                    } else if (original_layout.data_type == data_types::i64) {
@ -80,14 +80,14 @@ void add_required_reorders::run(program_impl& p) {
                        current_layout = original_layout;
                        current_layout.data_type = data_types::i32;
                        usr->set_output_layout(current_layout, false);
-                        if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                        if (usr->type()->does_possible_implementation_exist(*usr)) {
                            correct_layout_selected = true;
                        } else {
                            current_layout = original_layout;
                            current_layout.data_type = data_types::i32;
                            current_layout.format = node->get_output_layout().format;
                            usr->set_output_layout(current_layout, false);
-                            if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                            if (usr->type()->does_possible_implementation_exist(*usr)) {
                                correct_layout_selected = true;
                            }
                        }
@ -148,7 +148,7 @@ void add_required_reorders::run(program_impl& p) {
                                      new_layout_format,
                                      original_layout.size);
                usr->set_output_layout(current_layout, false);
-                if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                if (usr->type()->does_possible_implementation_exist(*usr)) {
                    correct_layout_selected = true;
                    break;
                }
@ -164,7 +164,7 @@ void add_required_reorders::run(program_impl& p) {

                    usr->set_output_layout(original_layout_i32, false);

-                    if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                    if (usr->type()->does_possible_implementation_exist(*usr)) {
                        correct_layout_selected = true;
                    }

@ -174,7 +174,7 @@ void add_required_reorders::run(program_impl& p) {
                                                  new_layout_format,
                                                  original_layout_i32.size);
                            usr->set_output_layout(current_layout_i32, false);
-                            if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
+                            if (usr->type()->does_possible_implementation_exist(*usr)) {
                                correct_layout_selected = true;
                                break;
                            }
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
@ -42,7 +42,7 @@ void compile_graph::run(program_impl& p) {
                auto& node = *(std::next(proc_order.begin(), i));
                node->set_unique_id(std::to_string(i));
                if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
-                    node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
+                    node->selected_impl = node->type()->choose_impl(*node);
                }
            }
        });
@ -51,7 +51,7 @@ void compile_graph::run(program_impl& p) {
 #else
    for (auto& node : p.get_processing_order()) {
        if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
-            node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
+            node->selected_impl = node->type()->choose_impl(*node);
        }
    }
 #endif
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_input_reorder.cpp
@ -5,7 +5,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 #include "pass_manager.h"
-#include "gpu/primitive_gpu_base.h"
+#include "impls/ocl/primitive_base.hpp"
 #include "fully_connected/fully_connected_params.h"
 #include <memory>
 #include <stdexcept>
@ -45,7 +45,7 @@ void post_input_reorder::run(program_impl& p) {
        const auto impl = node->get_selected_impl();
        // add a reorder if primitive's input format doesn't match implementation's input format
        if (node->is_type<fully_connected>()) {
-            const auto& fc_impl = dynamic_cast<const gpu::typed_primitive_gpu_impl<fully_connected>&>(*impl);
+            const auto& fc_impl = dynamic_cast<const ocl::typed_primitive_impl_ocl<fully_connected>&>(*impl);
            const auto& fc_params = *static_cast<kernel_selector::fully_connected_params*>(fc_impl._kernel_data.params.get());

            auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout());
@ -62,7 +62,7 @@ void post_input_reorder::run(program_impl& p) {
                reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
                reorder.get_output_layout(false);
                node->set_output_layout(previous_layout, false);
-                reorder.set_selected_impl(reorder.type()->choose_impl(p.get_engine(), reorder));
+                reorder.set_selected_impl(reorder.type()->choose_impl(reorder));
            }
        }
    }
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
@ -56,7 +56,7 @@ void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
            // Don't run impl selection to avoid double compilation of reorder kernels
            // in main program and internal program for constant propagation
            if (!g_node.is_constant())
-                g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
+                g_node.selected_impl = g_node.type()->choose_impl(g_node);
        }
    }

--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
@ -27,9 +27,8 @@ void remove_redundant_reorders::run(program_impl& p) {
        if (!update_implementations)
            return;

-        auto& eng = p.get_engine();
        node.set_unique_id(node.get_unique_id() + "_reorder");
-        auto new_impl = node.type()->choose_impl(eng, node);
+        auto new_impl = node.type()->choose_impl(node);
        node.set_selected_impl(std::move(new_impl));
    };

@ -300,7 +299,7 @@ void remove_redundant_reorders::run(program_impl& p) {
                continue;

            input.set_output_layout(output_layout, false);
-            if (input.type()->does_possible_implementation_exist(p.get_engine(), input)) {
+            if (input.type()->does_possible_implementation_exist(input)) {
                p.replace_all_usages(node, input);
                p.add_optimized_primitive_info(node.id());
                p.remove_all_connections(node);
--- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
@ -58,7 +58,10 @@ std::map<program_node*, format::type> get_preferred_formats(program_impl& p, lay
            continue;

        auto ex = lo.get_preferred_format(*n);
+        auto impl = lo.get_preferred_impl_type(*n);
        fmt_map[n] = ex;
+
+        n->set_preferred_impl_type(impl);
    }
    return fmt_map;
 }
--- a/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/condition.cpp
@ -4,23 +4,23 @@

 #include "condition_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
-#include "register_gpu.hpp"
+#include "impls/implementation_map.hpp"
+#include "register.hpp"

 #include <algorithm>
 #include <vector>

 namespace cldnn {
-namespace gpu {
+namespace common {

-struct condition_gpu : typed_primitive_impl<condition> {
+struct condition_impl : typed_primitive_impl<condition> {
    const condition_node& outer;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<condition_gpu>(*this);
+        return make_unique<condition_impl>(*this);
    }

-    explicit condition_gpu(const condition_node& outer) : outer(outer) {}
+    explicit condition_impl(const condition_node& outer) : outer(outer) {}

    event::ptr execute_impl(const std::vector<event::ptr>& events, condition_inst& instance) override {
        for (auto& a : events) {
@ -42,7 +42,7 @@ struct condition_gpu : typed_primitive_impl<condition> {
        return ev;
    }

-    static primitive_impl* create(const condition_node& arg) { return new condition_gpu(arg); }
+    static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); }

    void init_kernels() override {}

@ -117,13 +117,13 @@ private:

 namespace detail {

-attach_condition_gpu::attach_condition_gpu() {
-    implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                        condition_gpu::create);
-    implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                        condition_gpu::create);
+attach_condition_common::attach_condition_common() {
+    implementation_map<condition>::add(impl_types::common, condition_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace common
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/loop.cpp
@ -5,25 +5,25 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #include "loop_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
-#include "register_gpu.hpp"
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
 #include "mutable_data_inst.h"
 #include "input_layout_inst.h"
 #include <vector>
 #include <algorithm>

 namespace cldnn {
-namespace gpu {
-struct loop_gpu : typed_primitive_impl<loop> {
+namespace common {
+struct loop_impl : typed_primitive_impl<loop> {
    const loop_node& node;
    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<loop_gpu>(*this);
+        return make_unique<loop_impl>(*this);
    }

    void init_kernels() override {}

-    loop_gpu(const loop_gpu& other) : typed_primitive_impl<loop>(other), node(other.node) {}
-    explicit loop_gpu(const loop_node& node) : node(node) {}
+    loop_impl(const loop_impl& other) : typed_primitive_impl<loop>(other), node(other.node) {}
+    explicit loop_impl(const loop_node& node) : node(node) {}

    // read scala value from data primitive
    static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
@ -216,14 +216,14 @@ struct loop_gpu : typed_primitive_impl<loop> {
        return ev;
    }

-    static primitive_impl* create(const loop_node& arg) { return new loop_gpu(arg); }
+    static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); }
 };

 namespace detail {
-attach_loop_gpu::attach_loop_gpu() {
-    implementation_map<loop>::add({{engine_types::ocl, loop_gpu::create}});
+attach_loop_common::attach_loop_common() {
+    implementation_map<loop>::add(impl_types::common, loop_impl::create, {});
 }
 }  // namespace detail

-}  // namespace gpu
+}  // namespace common
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
@ -0,0 +1,22 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "register.hpp"
+
+namespace cldnn {
+namespace common {
+
+#define REGISTER_COMMON(prim)                      \
+    static detail::attach_##prim##_common attach_##prim
+
+void register_implementations() {
+    REGISTER_COMMON(condition);
+    REGISTER_COMMON(data);
+    REGISTER_COMMON(input_layout);
+    REGISTER_COMMON(loop);
+    REGISTER_COMMON(prior_box);
+}
+
+}  // namespace common
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
@ -0,0 +1,35 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/primitives/condition.hpp"
+#include "cldnn/primitives/loop.hpp"
+#include "cldnn/primitives/data.hpp"
+#include "cldnn/primitives/input_layout.hpp"
+#include "cldnn/primitives/prior_box.hpp"
+
+
+namespace cldnn {
+namespace common {
+void register_implementations();
+
+namespace detail {
+
+#define REGISTER_COMMON(prim)           \
+    struct attach_##prim##_common {     \
+        attach_##prim##_common();       \
+    }
+
+REGISTER_COMMON(condition);
+REGISTER_COMMON(data);
+REGISTER_COMMON(input_layout);
+REGISTER_COMMON(loop);
+REGISTER_COMMON(prior_box);
+
+#undef REGISTER_COMMON
+
+}  // namespace detail
+}  // namespace common
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/common/wait_for_events.cpp
@ -6,21 +6,20 @@
 #include "data_inst.h"
 #include "prior_box_inst.h"
 #include "input_layout_inst.h"
-#include "implementation_map.h"
-#include "register_gpu.hpp"
-
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
 #include "network_impl.h"
 #include <vector>

 namespace cldnn {
-namespace gpu {
+namespace common {

-class wait_for_events_gpu : public primitive_impl {
+class wait_for_events_impl : public primitive_impl {
 public:
-    explicit wait_for_events_gpu(const program_node& /*node*/) {}
+    explicit wait_for_events_impl(const program_node& /*node*/) {}

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<wait_for_events_gpu>(*this);
+        return make_unique<wait_for_events_impl>(*this);
    }

    void init_kernels() override {}
@ -33,32 +32,32 @@ public:

    bool validate(const primitive_inst&) const override { return true; }

-    static primitive_impl* create_data(const data_node& data) { return new wait_for_events_gpu(data); }
+    static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); }

    static primitive_impl* create_input_layout(const input_layout_node& input) {
-        return new wait_for_events_gpu(input);
+        return new wait_for_events_impl(input);
    }

    static primitive_impl* create_prior_box(const prior_box_node& prior_box) {
        // This primitive is being executed on CPU during network compilation.
-        return new wait_for_events_gpu(prior_box);
+        return new wait_for_events_impl(prior_box);
    }
 };

 namespace detail {

-attach_data_gpu::attach_data_gpu() {
-    implementation_map<data>::add({ {engine_types::ocl, wait_for_events_gpu::create_data} });
+attach_data_common::attach_data_common() {
+    implementation_map<data>::add(impl_types::common, wait_for_events_impl::create_data, {});
 }

-attach_input_layout_gpu::attach_input_layout_gpu() {
-    implementation_map<input_layout>::add({{engine_types::ocl, wait_for_events_gpu::create_input_layout}});
+attach_input_layout_common::attach_input_layout_common() {
+    implementation_map<input_layout>::add(impl_types::common, wait_for_events_impl::create_input_layout, {});
 }

-attach_prior_box_gpu::attach_prior_box_gpu() {
-    implementation_map<prior_box>::add({{engine_types::ocl, wait_for_events_gpu::create_prior_box}});
+attach_prior_box_common::attach_prior_box_common() {
+    implementation_map<prior_box>::add(impl_types::common, wait_for_events_impl::create_prior_box, {});
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace common
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/cpu_impl_helpers.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/cpu_impl_helpers.hpp
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/detection_output.cpp
@ -4,9 +4,9 @@

 #include "detection_output_inst.h"
 #include "network_impl.h"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "math_utils.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include "cpu_impl_helpers.hpp"

 #include <algorithm>
@ -24,7 +24,7 @@
 #endif

 namespace cldnn {
-namespace gpu {
+namespace cpu {

 namespace {
    using bounding_box = cldnn::cpu::bounding_box;
@ -43,15 +43,15 @@ bool comp_score_descend<std::pair<int, int>>(const std::pair<float, std::pair<in
 }

 /************************ Detection Output CPU ************************/
-struct detection_output_cpu : typed_primitive_impl<detection_output> {
+struct detection_output_impl : typed_primitive_impl<detection_output> {
    enum NMSType {CAFFE, MXNET};
    const detection_output_node& outer;
    NMSType nms_type;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<detection_output_cpu>(*this);
+        return make_unique<detection_output_impl>(*this);
    }
-    explicit detection_output_cpu(const detection_output_node& outer)
+    explicit detection_output_impl(const detection_output_node& outer)
        : outer(outer)
        , nms_type(outer.get_primitive()->decrease_label_id ? MXNET : CAFFE) {}

@ -822,17 +822,19 @@ struct detection_output_cpu : typed_primitive_impl<detection_output> {

    void init_kernels() override {}

-    static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); }
+    static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); }
 };

 namespace detail {

-attach_detection_output_gpu::attach_detection_output_gpu() {
-    implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_cpu::create);
-    implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_cpu::create);
+attach_detection_output_impl::attach_detection_output_impl() {
+    implementation_map<detection_output>::add(impl_types::cpu, detection_output_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx)
+    });
 }

 }  // namespace detail

-}  // namespace gpu
+}  // namespace cpu
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/non_max_suppression.cpp
@ -5,7 +5,7 @@
 #include "non_max_suppression_inst.h"
 #include "primitive_inst.h"
 #include "network_impl.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include "cpu_impl_helpers.hpp"

 #include <vector>
@ -15,7 +15,7 @@
 #include <tuple>

 namespace cldnn {
-namespace {
+namespace cpu {

 using namespace cldnn::cpu;

@ -372,14 +372,14 @@ void run(non_max_suppression_inst& instance) {
    store_result(stream, instance.output_memory_ptr(), result);
 }

-struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
+struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
    using parent = typed_primitive_impl<non_max_suppression>;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<non_max_suppression_cpu>(*this);
+        return make_unique<non_max_suppression_impl>(*this);
    }

-    non_max_suppression_cpu() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_cpu") {}
+    non_max_suppression_impl() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_impl") {}

    virtual event::ptr execute_impl(const std::vector<event::ptr>& event, typed_primitive_inst<non_max_suppression>& instance) {
        for (auto e : event) {
@ -396,23 +396,20 @@ struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
    }

    static primitive_impl* create(const non_max_suppression_node&) {
-        return new non_max_suppression_cpu();
+        return new non_max_suppression_impl();
    }
    void init_kernels() override {}
 };
-}  // namespace
-
-namespace gpu {
 namespace detail {

-attach_non_max_suppression_gpu::attach_non_max_suppression_gpu() {
-    implementation_map<non_max_suppression>::add({
-        {std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), non_max_suppression_cpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), non_max_suppression_cpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), non_max_suppression_cpu::create}
+attach_non_max_suppression_impl::attach_non_max_suppression_impl() {
+    implementation_map<non_max_suppression>::add(impl_types::cpu, non_max_suppression_impl::create, {
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace cpu
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/proposal.cpp
@ -4,10 +4,10 @@

 #include "proposal_inst.h"
 #include "cldnn/runtime/engine.hpp"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "network_impl.h"
 #include "cldnn/runtime/error_handler.hpp"
-#include "register_gpu.hpp"
+#include "register.hpp"

 #include <algorithm>
 #include <string>
@ -17,7 +17,7 @@
 #define EPSILON 0.00001f

 namespace cldnn {
-namespace gpu {
+namespace cpu {

 namespace {

@ -190,13 +190,13 @@ struct im_info_t {
    int min_bbox_y;
 };

-struct proposal_gpu : typed_primitive_impl<proposal> {
+struct proposal_impl : typed_primitive_impl<proposal> {
    const proposal_node& outer;

-    explicit proposal_gpu(const proposal_node& arg) : outer(arg) {}
+    explicit proposal_impl(const proposal_node& arg) : outer(arg) {}

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<proposal_gpu>(*this);
+        return make_unique<proposal_impl>(*this);
    }

    template <typename dtype>
@ -442,19 +442,19 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
            CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items");
        }

-        return new proposal_gpu(arg);
+        return new proposal_impl(arg);
    }
 };

 namespace detail {

-attach_proposal_gpu::attach_proposal_gpu() {
-    implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                      proposal_gpu::create);
-    implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                      proposal_gpu::create);
+attach_proposal_impl::attach_proposal_impl() {
+    implementation_map<proposal>::add(impl_types::cpu, proposal_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx)
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace cpu
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
@ -0,0 +1,20 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "register.hpp"
+
+namespace cldnn {
+namespace cpu {
+
+#define REGISTER_CPU(prim)                                \
+    static detail::attach_##prim##_impl attach_##prim
+
+void register_implementations() {
+    REGISTER_CPU(detection_output);
+    REGISTER_CPU(proposal);
+    REGISTER_CPU(non_max_suppression);
+}
+
+}  // namespace cpu
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
@ -0,0 +1,31 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "cldnn/primitives/detection_output.hpp"
+#include "cldnn/primitives/proposal.hpp"
+#include "cldnn/primitives/non_max_suppression.hpp"
+
+namespace cldnn {
+namespace cpu {
+void register_implementations();
+
+namespace detail {
+
+
+#define REGISTER_CPU(prim)        \
+    struct attach_##prim##_impl { \
+        attach_##prim##_impl();   \
+    }
+
+REGISTER_CPU(proposal);
+REGISTER_CPU(non_max_suppression);
+REGISTER_CPU(detection_output);
+
+#undef REGISTER_CPU
+
+}  // namespace detail
+}  // namespace cpu
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
@ -0,0 +1,188 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <functional>
+#include <typeinfo>
+#include <tuple>
+#include <string>
+
+namespace cldnn {
+
+template <typename T, typename U>
+class singleton_map : public std::map<T, U> {
+    singleton_map() : std::map<T, U>() {}
+    singleton_map(singleton_map const&) = delete;
+    void operator=(singleton_map const&) = delete;
+
+public:
+    static singleton_map& instance() {
+        static singleton_map instance_;
+        return instance_;
+    }
+};
+
+
+struct permute;
+struct reorder;
+struct custom_gpu_primitive;
+struct generic_layer;
+struct reshape;
+struct data;
+struct mutable_data;
+struct input_layout;
+struct prior_box;
+struct loop;
+
+struct primitive_impl;
+
+template <class PType>
+struct typed_program_node;
+
+template <typename primitive_kind>
+struct implementation_key {
+    typedef std::tuple<data_types, format::type> type;
+    type operator()(const typed_program_node<primitive_kind>& primitive) {
+        return std::make_tuple(primitive.get_dependency(0).get_output_layout().data_type,
+                               primitive.get_dependency(0).get_output_layout().format);
+    }
+    type operator()(const layout& proposed_layout) {
+        return std::make_tuple(proposed_layout.data_type, proposed_layout.format);
+    }
+};
+
+template <>
+struct implementation_key<permute> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<permute>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<reorder> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<reorder>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<generic_layer> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<generic_layer>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<custom_gpu_primitive> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<custom_gpu_primitive>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<reshape> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<reshape>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<data> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<data>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<mutable_data> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<mutable_data>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<input_layout> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<input_layout>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<prior_box> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<prior_box>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <>
+struct implementation_key<loop> {
+    typedef int32_t type;
+    type operator()(const typed_program_node<loop>&) { return -1; }
+    type operator()(const layout&) { return -1; }
+};
+
+template <typename primitive_kind>
+class implementation_map {
+public:
+    using key_builder = implementation_key<primitive_kind>;
+    using key_type = typename key_builder::type;
+    using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
+    using map_type = singleton_map<impl_types, std::pair<std::set<key_type>, factory_type>>;
+
+    static factory_type get(const typed_program_node<primitive_kind>& primitive) {
+        impl_types target_impl_type = primitive.get_preferred_impl_type();
+        // lookup in database; throw if not found
+        auto key = key_builder()(primitive);
+        for (auto& kv : map_type::instance()) {
+            impl_types impl_type = kv.first;
+            if ((target_impl_type & impl_type) != impl_type)
+                continue;
+
+            std::set<key_type>& keys_set = kv.second.first;
+            auto& factory = kv.second.second;
+            if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
+                return factory;
+            }
+        }
+        throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
+                                     " could not find any implementation to match key");
+    }
+
+    // check if for a given engine and type there exist an implementation
+    static bool check(const typed_program_node<primitive_kind>& primitive) {
+        impl_types target_impl_type = primitive.get_preferred_impl_type();
+        auto key = key_builder()(primitive);
+        return check_key(target_impl_type, key);
+    }
+
+    // check if there exists a kernel implementation of a primitive with output set it primitive's output layout
+    static bool check_io_eq(const typed_program_node<primitive_kind>& primitive) {
+        impl_types target_impl_type = primitive.get_preferred_impl_type();
+        auto key = key_builder()(primitive.get_output_layout());
+        return check_key(target_impl_type, key);
+    }
+
+    static bool check_key(impl_types target_impl_type, key_type key) {
+        for (auto& kv : map_type::instance()) {
+            impl_types impl_type = kv.first;
+            if ((target_impl_type & impl_type) != impl_type)
+                continue;
+            std::set<key_type>& keys_set = kv.second.first;
+            if (keys_set.empty())
+                return true;
+            return keys_set.find(key) != keys_set.end();
+        }
+        return false;
+    }
+
+    static void add(impl_types impl_type, factory_type factory, std::set<key_type> keys) {
+        if (impl_type == impl_types::any) {
+            throw std::runtime_error("[CLDNN] Can't register impl with type any");
+        }
+        map_type::instance().insert({impl_type, {keys, factory}});
+    }
+};
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
@ -0,0 +1,123 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "activation_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "activation/activation_kernel_selector.h"
+#include "activation/activation_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct activation_impl : typed_primitive_impl_ocl<activation> {
+    using parent = typed_primitive_impl_ocl<activation>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<activation_impl>(*this);
+    }
+
+    kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+
+        if (_outer.is_parameterized()) {
+            args.slope = instance.slope_memory();
+        }
+
+        return args;
+    }
+
+    static primitive_impl* create(const activation_node& arg) {
+        auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
+        auto activation_optional_params =
+            get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
+
+        convert_new_activation_func(arg.get_primitive(), activation_params.activations);
+
+        if (arg.is_parameterized()) {
+            const auto& slope_layout = arg.slope_input().get_output_layout();
+            const auto& output_layout = arg.get_output_layout();
+
+            const auto params_num =
+                kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
+
+            CLDNN_ERROR_LESS_THAN(arg.id(),
+                                  "Slope layout size count",
+                                  slope_layout.size.count(),
+                                  "output_layout.size.feature[0] * params_num",
+                                  static_cast<size_t>(output_layout.size.feature[0] * params_num),
+                                  "Error - not enough data inside additional params buffer");
+
+            activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
+        }
+
+        auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto activation = new activation_impl(arg, best_kernels[0]);
+
+        return activation;
+    }
+};
+
+namespace detail {
+
+attach_activation_impl::attach_activation_impl() {
+    implementation_map<activation>::add(impl_types::ocl, activation_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i32, format::yxfb),
+        // block f16 format
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        // 3D
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        // bfwzyx
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        // fs_b_yx_fsv32
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/arg_max_min.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/arg_max_min.cpp
@ -3,8 +3,8 @@
 //

 #include "arg_max_min_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "arg_max_min/arg_max_min_kernel_selector.h"
@ -12,14 +12,14 @@
 #include "kernel_runner.h"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct arg_max_min_gpu : typed_primitive_gpu_impl<arg_max_min> {
-    using parent = typed_primitive_gpu_impl<arg_max_min>;
+struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
+    using parent = typed_primitive_impl_ocl<arg_max_min>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<arg_max_min_gpu>(*this);
+        return make_unique<arg_max_min_impl>(*this);
    }

 protected:
@ -98,37 +98,27 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto conv = new arg_max_min_gpu(arg, best_kernels[0]);
+        auto conv = new arg_max_min_impl(arg, best_kernels[0]);

        return conv;
    }
 };

 namespace detail {
-
-    attach_arg_max_min_gpu::attach_arg_max_min_gpu() {
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                             arg_max_min_gpu::create);
-        implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                             arg_max_min_gpu::create);
-    }
-
+attach_arg_max_min_impl::attach_arg_max_min_impl() {
+    implementation_map<arg_max_min>::add(impl_types::ocl, arg_max_min_impl::create,  {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+    });
+}
 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
@ -0,0 +1,79 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "average_unpooling_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "average_unpooling/average_unpooling_kernel_selector.h"
+#include "average_unpooling/average_unpooling_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct average_unpooling_impl : typed_primitive_impl_ocl<average_unpooling> {
+    using parent = typed_primitive_impl_ocl<average_unpooling>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<average_unpooling_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const average_unpooling_node& arg) {
+        auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
+        auto average_unpooling_optional_params =
+            get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
+        auto& params = average_unpooling_params;
+
+        auto primitive = arg.get_primitive();
+        auto stride = primitive->stride;
+
+        params.unpoolSize = {
+            (uint32_t)primitive->size.spatial[0],
+            (uint32_t)primitive->size.spatial[1],
+        };
+
+        params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
+
+        auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto average_unpool = new average_unpooling_impl(arg, best_kernels[0]);
+
+        return average_unpool;
+    }
+};
+
+namespace detail {
+
+attach_average_unpooling_impl::attach_average_unpooling_impl() {
+    implementation_map<average_unpooling>::add(impl_types::ocl, average_unpooling_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
@ -0,0 +1,78 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "batch_to_space_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "batch_to_space/batch_to_space_kernel_selector.h"
+#include "batch_to_space/batch_to_space_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include "data_inst.h"
+#include <vector>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+struct batch_to_space_impl : typed_primitive_impl_ocl<batch_to_space> {
+    using parent = typed_primitive_impl_ocl<batch_to_space>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<batch_to_space_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const batch_to_space_node& arg) {
+        auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
+        auto batch_to_space_optional_params =
+            get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
+
+        auto primitive = arg.get_primitive();
+
+        batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
+        batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
+        batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
+
+        auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto batch_to_space = new batch_to_space_impl(arg, best_kernels[0]);
+
+        return batch_to_space;
+    }
+};
+
+namespace detail {
+
+attach_batch_to_space_impl::attach_batch_to_space_impl() {
+    implementation_map<batch_to_space>::add(impl_types::ocl, batch_to_space_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/binary_convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/binary_convolution.cpp
@ -5,8 +5,8 @@
 #include "cldnn/primitives/scale.hpp"
 #include "cldnn/primitives/quantize.hpp"
 #include "binary_convolution_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@ -16,14 +16,14 @@
 #include <memory>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct binary_convolution_gpu : typed_primitive_gpu_impl<binary_convolution> {
-    using parent = typed_primitive_gpu_impl<binary_convolution>;
+struct binary_convolution_impl : typed_primitive_impl_ocl<binary_convolution> {
+    using parent = typed_primitive_impl_ocl<binary_convolution>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<binary_convolution_gpu>(*this);
+        return make_unique<binary_convolution_impl>(*this);
    }

 protected:
@ -125,7 +125,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto conv = new binary_convolution_gpu(arg, best_kernels[0]);
+        auto conv = new binary_convolution_impl(arg, best_kernels[0]);

        return conv;
    }
@ -133,12 +133,12 @@ public:

 namespace detail {

-attach_binary_convolution_gpu::attach_binary_convolution_gpu() {
-    implementation_map<binary_convolution>::add(
-        std::make_tuple(engine_types::ocl, data_types::bin, format::b_fs_yx_32fp),
-        binary_convolution_gpu::create);
+attach_binary_convolution_impl::attach_binary_convolution_impl() {
+    implementation_map<binary_convolution>::add(impl_types::ocl, binary_convolution_impl::create, {
+        std::make_tuple(data_types::bin, format::b_fs_yx_32fp),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
@ -0,0 +1,96 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "border_inst.h"
+
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "border/border_kernel_selector.h"
+#include "border/border_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct border_impl : typed_primitive_impl_ocl<border> {
+    using parent = typed_primitive_impl_ocl<border>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<border_impl>(*this);
+    }
+
+    static primitive_impl* create(const border_node& arg) {
+        auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
+        auto b_optional_params =
+            get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
+
+        auto desc = arg.get_primitive();
+
+        b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
+        b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
+        b_params.border_value = desc->border_value;
+
+        switch (desc->type) {
+            case border_type::constant:
+                b_params.b_type = kernel_selector::border_type::CONSTANT;
+                break;
+            case border_type::edge:
+                b_params.b_type = kernel_selector::border_type::EDGE;
+                break;
+            case border_type::mirror:
+                b_params.b_type = kernel_selector::border_type::MIRROR;
+                break;
+            case border_type::mirror_101:
+                b_params.b_type = kernel_selector::border_type::MIRROR_101;
+                break;
+            default:
+                assert(
+                    false &&
+                    "Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
+        }
+
+        auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        return new border_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+
+attach_border_impl::attach_border_impl() {
+    implementation_map<border>::add(impl_types::ocl, border_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/broadcast.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/broadcast.cpp
@ -4,22 +4,22 @@

 #include "broadcast_inst.h"

-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "broadcast/broadcast_kernel_selector.h"
 #include "broadcast/broadcast_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
-    using parent = typed_primitive_gpu_impl<broadcast>;
+struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
+    using parent = typed_primitive_impl_ocl<broadcast>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<broadcast_gpu>(*this);
+        return make_unique<broadcast_impl>(*this);
    }

    static primitive_impl* create(const broadcast_node& arg) {
@ -57,29 +57,29 @@ struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        return new broadcast_gpu(arg, best_kernels[0]);
+        return new broadcast_impl(arg, best_kernels[0]);
    }
 };

 namespace detail {

-attach_broadcast_gpu::attach_broadcast_gpu() {
-    auto val_fw = broadcast_gpu::create;
-
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
+attach_broadcast_impl::attach_broadcast_impl() {
+    implementation_map<broadcast>::add(impl_types::ocl, broadcast_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
@ -0,0 +1,159 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "concatenation_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "concatenation/concatenation_kernel_selector.h"
+#include "concatenation/concatenation_kernel_base.h"
+
+#include <initializer_list>
+
+namespace cldnn {
+namespace ocl {
+
+namespace {
+kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
+    switch (axis) {
+        case concatenation::along_x:
+            return kernel_selector::concat_axis::X;
+        case concatenation::along_y:
+            return kernel_selector::concat_axis::Y;
+        case concatenation::along_z:
+            return kernel_selector::concat_axis::Z;
+        case concatenation::along_w:
+            return kernel_selector::concat_axis::W;
+        case concatenation::along_f:
+            return kernel_selector::concat_axis::FEATURE;
+        case concatenation::along_b:
+            return kernel_selector::concat_axis::BATCH;
+        default:
+            return kernel_selector::concat_axis::X;
+    }
+}
+}  // namespace
+
+struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
+    using parent = typed_primitive_impl_ocl<concatenation>;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<concatenation_impl>(*this);
+    }
+
+    concatenation_impl(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
+        if (!_outer.can_be_optimized()) {
+            CLDNN_ERROR_NOT_EQUAL(_outer.id(),
+                                  "Input count",
+                                  _outer.inputs_count(),
+                                  "kds size",
+                                  kd.kernels.size(),
+                                  "Error - not enough kernels for concatenation");
+        }
+    }
+
+protected:
+    bool optimized_out(concatenation_inst& instance) const override {
+        return parent::optimized_out(instance) || _outer.can_be_optimized();
+    }
+
+public:
+    static primitive_impl* create(const concatenation_node& arg) {
+        if (arg.can_be_optimized()) {
+            return new concatenation_impl(arg, {});
+        }
+
+        auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
+        auto concat_optional_params =
+            get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
+        auto axis = arg.get_primitive()->axis;
+
+        concat_params.inputs.resize(arg.inputs_count());
+        for (size_t i = 0; i < arg.inputs_count(); ++i) {
+            const layout& input_layout = arg.input(i).get_output_layout();
+            concat_params.inputs[i] = convert_data_tensor(input_layout);
+        }
+
+        concat_params.axis = convert_axis(axis);
+        concat_optional_params.kernelPerInput = true;
+
+        auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        concatenation_impl* concat = new concatenation_impl(arg, best_kernels[0]);
+
+        return concat;
+    }
+};
+
+namespace detail {
+
+attach_concatenation_impl::attach_concatenation_impl() {
+    implementation_map<concatenation>::add(impl_types::ocl, concatenation_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i64, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i64, format::byxf),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i64, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp
@ -4,8 +4,8 @@

 #include "convolution_inst.h"
 #include "eltwise_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@ -15,14 +15,14 @@
 #include <memory>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct convolution_gpu : typed_primitive_gpu_impl<convolution> {
-    using parent = typed_primitive_gpu_impl<convolution>;
+struct convolution_impl : typed_primitive_impl_ocl<convolution> {
+    using parent = typed_primitive_impl_ocl<convolution>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<convolution_gpu>(*this);
+        return make_unique<convolution_impl>(*this);
    }

 protected:
@ -151,7 +151,7 @@ public:
                         "Best_kernel.empty()",
                         best_kernels.empty(),
                         "Cannot find a proper kernel with these arguments");
-        auto conv = new convolution_gpu(arg, best_kernels[0]);
+        auto conv = new convolution_impl(arg, best_kernels[0]);

        return conv;
    }
@ -159,55 +159,49 @@ public:

 namespace detail {

-attach_convolution_gpu::attach_convolution_gpu() {
-    auto val_fw = convolution_gpu::create;
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::winograd_2x3_s1_data), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::winograd_2x3_s1_data), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-    // block f16 format
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    // block i8 format
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
-    // MMAD
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
-
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
-    implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
+attach_convolution_impl::attach_convolution_impl() {
+    implementation_map<convolution>::add(impl_types::ocl, convolution_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::winograd_2x3_s1_data),
+        std::make_tuple(data_types::f16, format::winograd_2x3_s1_data),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
@ -0,0 +1,118 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "crop_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "eltwise/eltwise_kernel_selector.h"
+#include "eltwise/eltwise_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct crop_impl : typed_primitive_impl_ocl<crop> {
+    using parent = typed_primitive_impl_ocl<crop>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<crop_impl>(*this);
+    }
+
+protected:
+    bool optimized_out(crop_inst& instance) const override {
+        return parent::optimized_out(instance) || _outer.can_be_optimized();
+    }
+
+public:
+    static primitive_impl* create(const crop_node& arg) {
+        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
+        auto ew_optional_params =
+            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
+
+        ew_params.operations.push_back(
+            {{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
+
+        const auto& input_layout = arg.input().get_output_layout();
+        ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
+
+        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto crop = new crop_impl(arg, best_kernels[0]);
+
+        return crop;
+    }
+};
+
+namespace detail {
+
+attach_crop_impl::attach_crop_impl() {
+    implementation_map<crop>::add(impl_types::ocl, crop_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i64, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i64, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
+        std::make_tuple(data_types::i64, format::fyxb),
+        std::make_tuple(data_types::i32, format::fyxb),
+        std::make_tuple(data_types::i8, format::fyxb),
+        std::make_tuple(data_types::u8, format::fyxb),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i64, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/ctc_greedy_decoder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/ctc_greedy_decoder.cpp
@ -3,8 +3,8 @@
 //

 #include "ctc_greedy_decoder_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "ctc_greedy_decoder/ctc_greedy_decoder_kernel_selector.h"
@ -15,14 +15,14 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct ctc_greedy_decoder_gpu : typed_primitive_gpu_impl<ctc_greedy_decoder> {
-    using parent = typed_primitive_gpu_impl<ctc_greedy_decoder>;
+struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl<ctc_greedy_decoder> {
+    using parent = typed_primitive_impl_ocl<ctc_greedy_decoder>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<ctc_greedy_decoder_gpu>(*this);
+        return make_unique<ctc_greedy_decoder_impl>(*this);
    }

 public:
@ -51,7 +51,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto grn = new ctc_greedy_decoder_gpu(arg, best_kernels[0]);
+        auto grn = new ctc_greedy_decoder_impl(arg, best_kernels[0]);

        return grn;
    }
@ -59,13 +59,15 @@ public:

 namespace detail {

-attach_ctc_greedy_decoder_gpu::attach_ctc_greedy_decoder_gpu() {
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), ctc_greedy_decoder_gpu::create);
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), ctc_greedy_decoder_gpu::create);
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), ctc_greedy_decoder_gpu::create);
-    implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), ctc_greedy_decoder_gpu::create);
+attach_ctc_greedy_decoder_impl::attach_ctc_greedy_decoder_impl() {
+    implementation_map<ctc_greedy_decoder>::add(impl_types::ocl, ctc_greedy_decoder_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/cum_sum.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/cum_sum.cpp
@ -3,8 +3,8 @@
 //

 #include "cum_sum_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "cum_sum/cum_sum_kernel_selector.h"
 #include "cum_sum/cum_sum_kernel_ref.h"
@ -13,7 +13,7 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {

 namespace {
 kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
@ -36,12 +36,12 @@ kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
 }
 }  // namespace

-struct cum_sum_gpu : typed_primitive_gpu_impl<cum_sum> {
-    using parent = typed_primitive_gpu_impl<cum_sum>;
+struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
+    using parent = typed_primitive_impl_ocl<cum_sum>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<cum_sum_gpu>(*this);
+        return make_unique<cum_sum_impl>(*this);
    }

 public:
@ -62,7 +62,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto cum_sum = new cum_sum_gpu(arg, best_kernels[0]);
+        auto cum_sum = new cum_sum_impl(arg, best_kernels[0]);

        return cum_sum;
    }
@ -70,16 +70,17 @@ public:

 namespace detail {

-attach_cum_sum_gpu::attach_cum_sum_gpu() {
-    auto val_fw = cum_sum_gpu::create;
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
+attach_cum_sum_impl::attach_cum_sum_impl() {
+    implementation_map<cum_sum>::add(impl_types::ocl, cum_sum_impl::create, {
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
@ -4,12 +4,12 @@

 #include "custom_gpu_primitive_inst.h"
 #include "cldnn/runtime/engine.hpp"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
 #include "jitter.h"
 #include "cldnn/runtime/error_handler.hpp"
-#include "register_gpu.hpp"
+#include "register.hpp"

 #include <map>
 #include <sstream>
@ -17,24 +17,24 @@
 #include <memory>
 #include <string>

-using namespace cldnn;
 namespace kernel_selector {
 using jit_constants = kernel_selector::JitConstants;
 }

-namespace neural {
+namespace cldnn {
+namespace ocl {

-struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
+struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
    const custom_gpu_primitive_node& outer;
    std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
    std::vector<kernel::ptr> _kernels;
    kernel_id _kernel_id;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<custom_gpu_primitive_gpu>(*this);
+        return make_unique<custom_gpu_primitive_impl>(*this);
    }

-    custom_gpu_primitive_gpu(const custom_gpu_primitive_gpu& other)
+    custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
    : outer(other.outer)
    , cl_kernel(other.cl_kernel)
    , _kernels({})
@ -42,7 +42,7 @@ struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
        _kernels.emplace_back(std::move(outer.get_program().get_kernel(_kernel_id)->clone()));
    }

-    custom_gpu_primitive_gpu(const custom_gpu_primitive_node& arg,
+    custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
                             std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
        : outer(arg)
        , cl_kernel(cl_kernel)
@ -224,14 +224,15 @@ static primitive_impl* create(const custom_gpu_primitive_node& arg) {
        cl_kernel->params.arguments.push_back(get_arg(p));
    }

-    return new custom_gpu_primitive_gpu(arg, cl_kernel);
-}
-}  // namespace neural
-
-namespace cldnn { namespace gpu { namespace detail {
-
-attach_custom_gpu_primitive_gpu::attach_custom_gpu_primitive_gpu() {
-    implementation_map<custom_gpu_primitive>::add({{cldnn::engine_types::ocl, neural::create}});
+    return new custom_gpu_primitive_impl(arg, cl_kernel);
 }

-} } }  // namespace cldnn::gpu::detail
+namespace detail {
+
+attach_custom_gpu_primitive_impl::attach_custom_gpu_primitive_impl() {
+    implementation_map<custom_gpu_primitive>::add(cldnn::impl_types::ocl, create, {});
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
@ -0,0 +1,148 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "deconvolution_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "deconvolution/deconvolution_kernel_selector.h"
+#include "deconvolution/deconvolution_kernel_base.h"
+#include <algorithm>
+
+namespace cldnn {
+namespace ocl {
+
+struct deconvolution_impl : typed_primitive_impl_ocl<deconvolution> {
+    using parent = typed_primitive_impl_ocl<deconvolution>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<deconvolution_impl>(*this);
+    }
+
+protected:
+    // TODO: share it with convolution and fully connected
+    bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
+        bool res = true;
+
+        CLDNN_ERROR_NOT_EQUAL(_outer.id(),
+                              "deconvolution filling value",
+                              _outer.get_output_layout().data_padding.filling_value(),
+                              "padding mode",
+                              0.0f,
+                              "Unknown padding mode in deconvolution.");
+
+        return res;
+    }
+
+    kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+
+        args.weights = instance.weights_memory(split);
+        args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
+
+        return args;
+    }
+
+    int32_t get_split() const override { return _outer.get_split(); }
+
+    uint32_t get_groups() const override { return _outer.get_groups(); }
+
+public:
+    static primitive_impl* create(const deconvolution_node& arg) {
+        const auto& primitive = arg.get_primitive();
+        const auto& weights_layout = arg.weights(0).get_output_layout();
+
+        const auto& weights_size = weights_layout.size;
+
+        const auto& split = primitive->split();
+        const auto& stride = primitive->stride;
+#if 0  // TODO: support dilation
+        const auto& dilation = primitive->dilation;
+#else
+        const tensor dilation = {0, 0, 1, 1, 1};
+#endif
+        const auto actual_split = split;
+
+        const auto& input_offset = primitive->input_offset;
+        const auto& groups = primitive->groups;
+
+        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
+            arg,
+            (groups > 1) ? 1 : actual_split,
+            1,
+            primitive->grouped_weights_shape);
+        auto deconv_optional_params =
+            get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
+
+        deconv_params.split = split;
+        deconv_params.groups = groups;
+
+        auto spatial_size = arg.get_output_layout().format.dimension() - 2;
+        uint32_t kx = weights_size.spatial[0];
+        uint32_t ky = weights_size.spatial[1];
+        uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
+        deconv_params.filterSize = { kx, ky, kz };
+
+        deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
+                                 (uint32_t)std::max(-input_offset.spatial[1], 0),
+                                 (uint32_t)std::max(-input_offset.spatial[2], 0)};
+
+        deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
+
+        deconv_params.dilation = {(uint32_t)dilation.spatial[0],
+                                  (uint32_t)dilation.spatial[1],
+                                  (uint32_t)dilation.spatial[2]};
+
+        auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with these arguments");
+        auto deconv = new deconvolution_impl(arg, best_kernels[0]);
+
+        return deconv;
+    }
+};
+
+namespace detail {
+
+attach_deconvolution_impl::attach_deconvolution_impl() {
+    implementation_map<deconvolution>::add(impl_types::ocl, deconvolution_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/deformable_convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/deformable_convolution.cpp
@ -3,8 +3,8 @@
 //

 #include "deformable_convolution_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@ -13,14 +13,14 @@
 #include <algorithm>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct deformable_conv_gpu : typed_primitive_gpu_impl<deformable_conv> {
-    using parent = typed_primitive_gpu_impl<deformable_conv>;
+struct deformable_conv_impl : typed_primitive_impl_ocl<deformable_conv> {
+    using parent = typed_primitive_impl_ocl<deformable_conv>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<deformable_conv_gpu>(*this);
+        return make_unique<deformable_conv_impl>(*this);
    }

 protected:
@ -71,18 +71,18 @@ public:
                         "Best_kernel.empty()",
                         best_kernels.empty(),
                         "Cannot find a proper kernel with these arguments");
-        auto conv = new deformable_conv_gpu(arg, best_kernels[0]);
+        auto conv = new deformable_conv_impl(arg, best_kernels[0]);

        return conv;
    }
 };

-struct deformable_interp_gpu : typed_primitive_gpu_impl<deformable_interp> {
-    using parent = typed_primitive_gpu_impl<deformable_interp>;
+struct deformable_interp_impl : typed_primitive_impl_ocl<deformable_interp> {
+    using parent = typed_primitive_impl_ocl<deformable_interp>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<deformable_interp_gpu>(*this);
+        return make_unique<deformable_interp_impl>(*this);
    }

 protected:
@ -139,7 +139,7 @@ public:
                         "Best_kernel.empty()",
                         best_kernels.empty(),
                         "Cannot find a proper kernel with these arguments");
-        auto conv = new deformable_interp_gpu(arg, best_kernels[0]);
+        auto conv = new deformable_interp_impl(arg, best_kernels[0]);

        return conv;
    }
@ -147,20 +147,20 @@ public:

 namespace detail {

-attach_deformable_conv_gpu::attach_deformable_conv_gpu() {
-    implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                             deformable_conv_gpu::create);
-    implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                             deformable_conv_gpu::create);
+attach_deformable_conv_impl::attach_deformable_conv_impl() {
+    implementation_map<deformable_conv>::add(impl_types::ocl, deformable_conv_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }

-attach_deformable_interp_gpu::attach_deformable_interp_gpu() {
-    implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               deformable_interp_gpu::create);
-    implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               deformable_interp_gpu::create);
+attach_deformable_interp_impl::attach_deformable_interp_impl() {
+    implementation_map<deformable_interp>::add(impl_types::ocl, deformable_interp_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
@ -0,0 +1,71 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "depth_to_space_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "depth_to_space/depth_to_space_kernel_selector.h"
+#include "depth_to_space/depth_to_space_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include "common_types.h"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+struct depth_to_space_impl : typed_primitive_impl_ocl<depth_to_space> {
+    using parent = typed_primitive_impl_ocl<depth_to_space>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<depth_to_space_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const depth_to_space_node& arg) {
+        auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
+        auto depth_to_space_optional_params =
+            get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
+
+        depth_to_space_params.block_size = arg.get_primitive()->block_size;
+        depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
+                                                                                                    : kernel_selector::depth_to_space_mode::DEPTH_FIRST;
+
+        auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto depth_to_space = new depth_to_space_impl(arg, best_kernels[0]);
+
+        return depth_to_space;
+    }
+};
+
+namespace detail {
+
+attach_depth_to_space_impl::attach_depth_to_space_impl() {
+    implementation_map<depth_to_space>::add(impl_types::ocl, depth_to_space_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
@ -0,0 +1,188 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "eltwise_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "eltwise/eltwise_kernel_selector.h"
+#include "eltwise/eltwise_kernel_base.h"
+#include <vector>
+
+namespace cldnn {
+namespace ocl {
+
+struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
+    using parent = typed_primitive_impl_ocl<eltwise>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<eltwise_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const eltwise_node& arg) {
+        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
+        auto ew_optional_params =
+            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
+
+        for (size_t i = 1; i < arg.inputs_count(); i++) {
+            ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
+        }
+
+        const auto& primitive = arg.get_primitive();
+
+        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
+                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
+                                        convert_to_eltwise_mode(primitive->mode)});
+
+        for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
+            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
+                                             kernel_selector::eltwise_params::InputType::Buffer(i)},
+                                            convert_to_eltwise_mode(primitive->mode)});
+        }
+
+        if (primitive->mode == eltwise_mode::sum) {
+            ew_params.coefficients = primitive->coefficients;
+        }
+
+        for (size_t i = 0; i < ew_params.inputs.size(); i++) {
+            if (!ew_params.inputs[i].SameDims(ew_params.output)) {
+                std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
+                std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
+                bool broadcast = false;
+                for (size_t d = 0; d < output_size.size(); d++) {
+                    if (output_size[d] != 1 && input_size[d] == 1)
+                        broadcast = true;
+                }
+                if (broadcast) {
+                    ew_params.broadcast = true;
+                    break;
+                } else {
+                    ew_params.layoutBased = true;
+                    break;
+                }
+            }
+        }
+
+        // stride
+        if (!primitive->stride.empty()) {
+            const auto& stride = primitive->stride;
+            ew_params.stride.resize(stride.size());
+            for (size_t i = 0; i < primitive->stride.size(); i++) {
+                ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
+                                       (uint32_t)stride[i].spatial[1],
+                                       (uint32_t)stride[i].spatial[2]};
+            }
+        }
+
+        // check if strides are the same
+        if (!ew_params.stride.empty()) {
+            const auto& stride = ew_params.stride[0];
+            for (size_t i = 1; i < ew_params.stride.size(); i++) {
+                if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
+                    ew_params.layoutBased = true;
+            }
+        } else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
+            ew_params.broadcast = true;
+        }
+
+        // TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
+        bool quantization = true;
+        for (size_t i = 0; i < arg.inputs_count(); i++) {
+            if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
+                arg.input(i).get_output_layout().data_type != data_types::i8) {
+                quantization = false;
+            }
+        }
+        ew_params.int8_quantization = quantization;
+
+        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto eltwise = new eltwise_impl(arg, best_kernels[0]);
+
+        return eltwise;
+    }
+};
+
+namespace detail {
+
+attach_eltwise_impl::attach_eltwise_impl() {
+    implementation_map<eltwise>::add(impl_types::ocl, eltwise_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i64, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::i64, format::byxf),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i64, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/embedding_bag.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/embedding_bag.cpp
@ -3,8 +3,8 @@
 //

 #include "embedding_bag_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "embedding_bag/embedding_bag_kernel_selector.h"
 #include "embedding_bag/embedding_bag_kernel_ref.h"
@ -14,13 +14,13 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
-struct embedding_bag_gpu : typed_primitive_gpu_impl<embedding_bag> {
-    using parent = typed_primitive_gpu_impl<embedding_bag>;
+namespace ocl {
+struct embedding_bag_impl : typed_primitive_impl_ocl<embedding_bag> {
+    using parent = typed_primitive_impl_ocl<embedding_bag>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<embedding_bag_gpu>(*this);
+        return make_unique<embedding_bag_impl>(*this);
    }

 public:
@ -58,7 +58,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto embedding_bag = new embedding_bag_gpu(arg, best_kernels[0]);
+        auto embedding_bag = new embedding_bag_impl(arg, best_kernels[0]);

        return embedding_bag;
    }
@ -66,12 +66,13 @@ public:

 namespace detail {

-attach_embedding_bag_gpu::attach_embedding_bag_gpu() {
-    auto val_fw = embedding_bag_gpu::create;
-    implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+attach_embedding_bag_impl::attach_embedding_bag_impl() {
+    implementation_map<embedding_bag>::add(impl_types::ocl, embedding_bag_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/extract_image_patches.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/extract_image_patches.cpp
@ -3,8 +3,8 @@
 //

 #include "extract_image_patches_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"

@ -12,14 +12,14 @@
 #include "extract_image_patches/extract_image_patches_kernel_ref.h"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct extract_image_patches_gpu : typed_primitive_gpu_impl<extract_image_patches> {
-    using parent = typed_primitive_gpu_impl<extract_image_patches>;
+struct extract_image_patches_impl : typed_primitive_impl_ocl<extract_image_patches> {
+    using parent = typed_primitive_impl_ocl<extract_image_patches>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<extract_image_patches_gpu>(*this);
+        return make_unique<extract_image_patches_impl>(*this);
    }

 public:
@ -41,7 +41,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto extract_image_patches = new extract_image_patches_gpu(arg, best_kernels[0]);
+        auto extract_image_patches = new extract_image_patches_impl(arg, best_kernels[0]);

        return extract_image_patches;
    }
@ -49,16 +49,17 @@ public:

 namespace detail {

-attach_extract_image_patches_gpu::attach_extract_image_patches_gpu() {
-    implementation_map<extract_image_patches>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), extract_image_patches_gpu::create},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), extract_image_patches_gpu::create}});
+attach_extract_image_patches_impl::attach_extract_image_patches_impl() {
+    implementation_map<extract_image_patches>::add(impl_types::ocl, extract_image_patches_impl::create, {
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/fully_connected.cpp
@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 #include "fully_connected_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "fully_connected/fully_connected_kernel_selector.h"
 #include "fully_connected/fully_connected_params.h"
@ -20,14 +20,14 @@
 #include <memory>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct fully_connected_gpu : typed_primitive_gpu_impl<fully_connected> {
-    using parent = typed_primitive_gpu_impl<fully_connected>;
+struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
+    using parent = typed_primitive_impl_ocl<fully_connected>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<fully_connected_gpu>(*this);
+        return make_unique<fully_connected_impl>(*this);
    }

 protected:
@ -71,7 +71,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto fc = new fully_connected_gpu(arg, best_kernels[0]);
+        auto fc = new fully_connected_impl(arg, best_kernels[0]);

        return fc;
    }
@ -79,34 +79,29 @@ public:

 namespace detail {

-attach_fully_connected_gpu::attach_fully_connected_gpu() {
-    auto val_fw = fully_connected_gpu::create;
-
-    implementation_map<fully_connected>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
-        // MMAD
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw},
-        // IMAD
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw},
-        // fs_b_yx_fsv32
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
+attach_fully_connected_impl::attach_fully_connected_impl() {
+    implementation_map<fully_connected>::add(impl_types::ocl, fully_connected_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/fused_conv_eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/fused_conv_eltwise.cpp
@ -3,8 +3,8 @@
 //

 #include "fused_conv_eltwise_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_runner.h"
@ -14,14 +14,14 @@
 #include <memory>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct fused_conv_eltwise_gpu : typed_primitive_gpu_impl<fused_conv_eltwise> {
-    using parent = typed_primitive_gpu_impl<fused_conv_eltwise>;
+struct fused_conv_eltwise_impl : typed_primitive_impl_ocl<fused_conv_eltwise> {
+    using parent = typed_primitive_impl_ocl<fused_conv_eltwise>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<fused_conv_eltwise_gpu>(*this);
+        return make_unique<fused_conv_eltwise_impl>(*this);
    }

 protected:
@ -139,7 +139,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto conv = new fused_conv_eltwise_gpu(arg, best_kernels[0]);
+        auto conv = new fused_conv_eltwise_impl(arg, best_kernels[0]);

        return conv;
    }
@ -147,42 +147,27 @@ public:

 namespace detail {

-attach_fused_conv_eltwise_gpu::attach_fused_conv_eltwise_gpu() {
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
-                                                fused_conv_eltwise_gpu::create);
-    // IMAD
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
-                                                fused_conv_eltwise_gpu::create);
-    implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::image_2d_rgba),
-                                                fused_conv_eltwise_gpu::create);
+attach_fused_conv_eltwise_impl::attach_fused_conv_eltwise_impl() {
+    implementation_map<fused_conv_eltwise>::add(impl_types::ocl, fused_conv_eltwise_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::image_2d_rgba),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/gather.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather.cpp
@ -3,8 +3,8 @@
 //

 #include "gather_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "gather/gather_kernel_selector.h"
 #include "gather/gather_kernel_ref.h"
@ -13,7 +13,7 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {
 kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
    switch (axis) {
        case gather::along_x:
@ -33,12 +33,12 @@ kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
    }
 }

-struct gather_gpu : typed_primitive_gpu_impl<gather> {
-    using parent = typed_primitive_gpu_impl<gather>;
+struct gather_impl : typed_primitive_impl_ocl<gather> {
+    using parent = typed_primitive_impl_ocl<gather>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gather_gpu>(*this);
+        return make_unique<gather_impl>(*this);
    }

 public:
@ -61,7 +61,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto gather = new gather_gpu(arg, best_kernels[0]);
+        auto gather = new gather_impl(arg, best_kernels[0]);

        return gather;
    }
@ -69,21 +69,20 @@ public:

 namespace detail {

-attach_gather_gpu::attach_gather_gpu() {
-    auto val_fw = gather_gpu::create;
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+attach_gather_impl::attach_gather_impl() {
+    implementation_map<gather>::add(impl_types::ocl, gather_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
@ -0,0 +1,67 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gather_nd_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "gather/gather_nd_kernel_selector.h"
+#include "gather/gather_nd_kernel_ref.h"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
+    using parent = typed_primitive_impl_ocl<gather_nd>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<gather_nd_impl>(*this);
+    }
+
+    static primitive_impl* create(const gather_nd_node& arg) {
+        auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
+        auto gather_nd_optional_params =
+            get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
+
+        gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
+        gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
+
+        gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+
+        auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto gather_nd = new gather_nd_impl(arg, best_kernels[0]);
+
+        return gather_nd;
+    }
+};
+
+namespace detail {
+
+attach_gather_nd_impl::attach_gather_nd_impl() {
+    implementation_map<gather_nd>::add(impl_types::ocl, gather_nd_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_tree.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gather_tree.cpp
@ -4,22 +4,22 @@

 #include "gather_tree_inst.h"

-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "gather_tree/gather_tree_kernel_selector.h"
 #include "gather_tree/gather_tree_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
-    using parent = typed_primitive_gpu_impl<gather_tree>;
+struct gather_tree_impl : typed_primitive_impl_ocl<gather_tree> {
+    using parent = typed_primitive_impl_ocl<gather_tree>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gather_tree_gpu>(*this);
+        return make_unique<gather_tree_impl>(*this);
    }

    static primitive_impl* create(const gather_tree_node& arg) {
@ -39,22 +39,21 @@ struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
            best_kernels.empty(),
            "Cannot find a proper kernel with this arguments");

-        return new gather_tree_gpu(arg, best_kernels[0]);
+        return new gather_tree_impl(arg, best_kernels[0]);
    }
 };
 namespace detail {
-    attach_gather_tree_gpu::attach_gather_tree_gpu() {
-            auto val_fw = gather_tree_gpu::create;
-
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
-
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-            implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-        }
+attach_gather_tree_impl::attach_gather_tree_impl() {
+    implementation_map<gather_tree>::add(impl_types::ocl, gather_tree_impl::create, {
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+    });
+}

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/gemm.cpp
@ -4,22 +4,22 @@

 #include "gemm_inst.h"

-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "gemm/gemm_kernel_selector.h"
 #include "gemm/gemm_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct gemm_gpu : typed_primitive_gpu_impl<gemm> {
-    using parent = typed_primitive_gpu_impl<gemm>;
+struct gemm_impl : typed_primitive_impl_ocl<gemm> {
+    using parent = typed_primitive_impl_ocl<gemm>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<gemm_gpu>(*this);
+        return make_unique<gemm_impl>(*this);
    }

 public:
@ -53,28 +53,29 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        return new gemm_gpu(arg, best_kernels[0]);
+        return new gemm_impl(arg, best_kernels[0]);
    }
 };

 namespace detail {

-attach_gemm_gpu::attach_gemm_gpu() {
-    auto val_fw = gemm_gpu::create;
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
+attach_gemm_impl::attach_gemm_impl() {
+    implementation_map<gemm>::add(impl_types::ocl, gemm_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/generic_layer.cpp
@ -4,38 +4,37 @@

 #include "generic_layer_inst.h"
 #include "cldnn/runtime/engine.hpp"
-#include "implementation_map.h"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include <vector>

-using namespace cldnn;
+namespace cldnn {
+namespace ocl {

-namespace neural {
-
-struct generic_layer_gpu : typed_primitive_impl<generic_layer> {
+struct generic_layer_impl : typed_primitive_impl<generic_layer> {
    const generic_layer_node& outer;
    const kernel_selector::cl_kernel_data& _cl_kernel_data;
    std::vector<kernel::ptr> _kernels;
    kernel_id _kernel_id;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<generic_layer_gpu>(*this);
+        return make_unique<generic_layer_impl>(*this);
    }

-    generic_layer_gpu(const generic_layer_gpu& other)
+    generic_layer_impl(const generic_layer_impl& other)
    : outer(other.outer)
    , _cl_kernel_data(other._cl_kernel_data)
    , _kernels({})
    , _kernel_id(other._kernel_id) {
        if (other._kernels.empty()) {
-            throw std::runtime_error("Can't copy generic_layer_gpu node: kernels vector is empty");
+            throw std::runtime_error("Can't copy generic_layer_impl node: kernels vector is empty");
        }
        _kernels.push_back(other._kernels.front()->clone());
    }

-    generic_layer_gpu(const generic_layer_node& arg)
+    generic_layer_impl(const generic_layer_node& arg)
        : outer(arg)
        , _cl_kernel_data(*outer.get_primitive()->generic_params.clKernel.get())
        , _kernels() {
@ -109,17 +108,17 @@ struct generic_layer_cpu : typed_primitive_impl<generic_layer> {

 static primitive_impl* create(const generic_layer_node& arg) {
    if (arg.get_primitive()->generic_params.engine == kernel_selector::generic_kernel_params::Engine::GPU) {
-        return new generic_layer_gpu(arg);
+        return new generic_layer_impl(arg);
    } else {
        return new generic_layer_cpu(arg);
    }
 }

-}  // namespace neural
+namespace detail {
+attach_generic_layer_impl::attach_generic_layer_impl() {
+    implementation_map<generic_layer>::add(cldnn::impl_types::ocl, create, {});
+}

-namespace cldnn { namespace gpu { namespace detail {
-    attach_generic_layer_gpu::attach_generic_layer_gpu() {
-        implementation_map<generic_layer>::add({ {cldnn::engine_types::ocl, neural::create} });
-    }
-
-} } }  // namespace cldnn::gpu::detail
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/grn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/grn.cpp
@ -3,8 +3,8 @@
 //

 #include "grn_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "grn/grn_kernel_selector.h"
@ -15,14 +15,14 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct grn_gpu : typed_primitive_gpu_impl<grn> {
-    using parent = typed_primitive_gpu_impl<grn>;
+struct grn_impl : typed_primitive_impl_ocl<grn> {
+    using parent = typed_primitive_impl_ocl<grn>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<grn_gpu>(*this);
+        return make_unique<grn_impl>(*this);
    }

 public:
@ -40,7 +40,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto grn = new grn_gpu(arg, best_kernels[0]);
+        auto grn = new grn_impl(arg, best_kernels[0]);

        return grn;
    }
@ -48,11 +48,13 @@ public:

 namespace detail {

-attach_grn_gpu::attach_grn_gpu() {
-    implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), grn_gpu::create);
-    implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), grn_gpu::create);
+attach_grn_impl::attach_grn_impl() {
+    implementation_map<grn>::add(impl_types::ocl, grn_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
@ -0,0 +1,82 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "lrn_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "lrn/lrn_kernel_selector.h"
+#include "lrn/lrn_kernel_base.h"
+
+namespace cldnn {
+namespace ocl {
+
+struct lrn_impl : typed_primitive_impl_ocl<lrn> {
+    using parent = typed_primitive_impl_ocl<lrn>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<lrn_impl>(*this);
+    }
+
+    static primitive_impl* create(const lrn_node& arg) {
+        auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
+        auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
+
+        const auto& primitive = arg.get_primitive();
+
+        lrn_params.alpha = primitive->alpha;
+        lrn_params.beta = primitive->beta;
+        lrn_params.k = primitive->k;
+        lrn_params.localSize = primitive->size;
+        lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
+        lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
+                                  ? kernel_selector::lrn_mode::WITHIN_CHANNEL
+                                  : kernel_selector::lrn_mode::ACROSS_CHANNEL;
+
+        auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto lrn = new lrn_impl(arg, best_kernels[0]);
+
+        return lrn;
+    }
+};
+
+namespace detail {
+
+attach_lrn_impl::attach_lrn_impl() {
+    implementation_map<lrn>::add(impl_types::ocl, lrn_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_input.cpp
@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 #include "lstm_dynamic_input_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm_dynamic/lstm_dynamic_input_kernel_selector.h"
 #include "lstm_dynamic/lstm_dynamic_input_kernel_base.h"
@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct lstm_dynamic_input_gpu : typed_primitive_gpu_impl<lstm_dynamic_input> {
-    using parent = typed_primitive_gpu_impl<lstm_dynamic_input>;
+struct lstm_dynamic_input_impl : typed_primitive_impl_ocl<lstm_dynamic_input> {
+    using parent = typed_primitive_impl_ocl<lstm_dynamic_input>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_dynamic_input_gpu>(*this);
+        return make_unique<lstm_dynamic_input_impl>(*this);
    }

 protected:
@ -64,7 +64,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto lstm_dynamic = new lstm_dynamic_input_gpu(arg, best_kernels[0]);
+        auto lstm_dynamic = new lstm_dynamic_input_impl(arg, best_kernels[0]);

        return lstm_dynamic;
    }
@ -72,15 +72,13 @@ public:

 namespace detail {

-attach_lstm_dynamic_input_gpu::attach_lstm_dynamic_input_gpu() {
-    auto val_fw = lstm_dynamic_input_gpu::create;
-
-    implementation_map<lstm_dynamic_input>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+attach_lstm_dynamic_input_impl::attach_lstm_dynamic_input_impl() {
+    implementation_map<lstm_dynamic_input>::add(impl_types::ocl, lstm_dynamic_input_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_dynamic_timeloop.cpp
@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 #include "lstm_dynamic_timeloop_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_selector.h"
 #include "lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h"
@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct lstm_dynamic_timeloop_gpu : typed_primitive_gpu_impl<lstm_dynamic_timeloop> {
-    using parent = typed_primitive_gpu_impl<lstm_dynamic_timeloop>;
+struct lstm_dynamic_timeloop_impl : typed_primitive_impl_ocl<lstm_dynamic_timeloop> {
+    using parent = typed_primitive_impl_ocl<lstm_dynamic_timeloop>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_dynamic_timeloop_gpu>(*this);
+        return make_unique<lstm_dynamic_timeloop_impl>(*this);
    }

 protected:
@ -85,7 +85,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto lstm_dynamic = new lstm_dynamic_timeloop_gpu(arg, best_kernels[0]);
+        auto lstm_dynamic = new lstm_dynamic_timeloop_impl(arg, best_kernels[0]);

        return lstm_dynamic;
    }
@ -93,15 +93,13 @@ public:

 namespace detail {

-attach_lstm_dynamic_timeloop_gpu::attach_lstm_dynamic_timeloop_gpu() {
-    auto val_fw = lstm_dynamic_timeloop_gpu::create;
-
-    implementation_map<lstm_dynamic_timeloop>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
+attach_lstm_dynamic_timeloop_impl::attach_lstm_dynamic_timeloop_impl() {
+    implementation_map<lstm_dynamic_timeloop>::add(impl_types::ocl, lstm_dynamic_timeloop_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_elt.cpp
@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 #include "lstm_elt_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm/lstm_elt_kernel_selector.h"
 #include "lstm/lstm_elt_kernel_base.h"
@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct lstm_elt_gpu : typed_primitive_gpu_impl<lstm_elt> {
-    using parent = typed_primitive_gpu_impl<lstm_elt>;
+struct lstm_elt_impl : typed_primitive_impl_ocl<lstm_elt> {
+    using parent = typed_primitive_impl_ocl<lstm_elt>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_elt_gpu>(*this);
+        return make_unique<lstm_elt_impl>(*this);
    }

 protected:
@ -85,7 +85,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto lstm_elt = new lstm_elt_gpu(arg, best_kernels[0]);
+        auto lstm_elt = new lstm_elt_impl(arg, best_kernels[0]);

        return lstm_elt;
    }
@ -93,17 +93,15 @@ public:

 namespace detail {

-attach_lstm_elt_gpu::attach_lstm_elt_gpu() {
-    auto val_fw = lstm_elt_gpu::create;
-
-    implementation_map<lstm_elt>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
+attach_lstm_elt_impl::attach_lstm_elt_impl() {
+    implementation_map<lstm_elt>::add(impl_types::ocl, lstm_elt_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/lstm_gemm.cpp
@ -5,8 +5,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////

 #include "lstm_gemm_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "lstm/lstm_gemm_kernel_selector.h"
 #include "lstm/lstm_gemm_kernel_base.h"
@ -14,14 +14,14 @@
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct lstm_gemm_gpu : typed_primitive_gpu_impl<lstm_gemm> {
-    using parent = typed_primitive_gpu_impl<lstm_gemm>;
+struct lstm_gemm_impl : typed_primitive_impl_ocl<lstm_gemm> {
+    using parent = typed_primitive_impl_ocl<lstm_gemm>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<lstm_gemm_gpu>(*this);
+        return make_unique<lstm_gemm_impl>(*this);
    }

 protected:
@ -82,7 +82,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto lstm_gemm = new lstm_gemm_gpu(arg, best_kernels[0]);
+        auto lstm_gemm = new lstm_gemm_impl(arg, best_kernels[0]);

        return lstm_gemm;
    }
@ -90,17 +90,15 @@ public:

 namespace detail {

-attach_lstm_gemm_gpu::attach_lstm_gemm_gpu() {
-    auto val_fw = lstm_gemm_gpu::create;
-
-    implementation_map<lstm_gemm>::add({
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
-        {std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
+attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
+    implementation_map<lstm_gemm>::add(impl_types::ocl, lstm_gemm_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::fyxb),
+        std::make_tuple(data_types::f16, format::fyxb),
    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/max_unpooling.cpp
@ -3,8 +3,8 @@
 //

 #include "max_unpooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "network_impl.h"
 #include "kernel_selector_helper.h"
@ -13,14 +13,14 @@
 #include <vector>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct max_unpooling_gpu : typed_primitive_gpu_impl<max_unpooling> {
-    using parent = typed_primitive_gpu_impl<max_unpooling>;
+struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
+    using parent = typed_primitive_impl_ocl<max_unpooling>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<max_unpooling_gpu>(*this);
+        return make_unique<max_unpooling_impl>(*this);
    }

 protected:
@ -55,7 +55,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto max_unpool = new max_unpooling_gpu(arg, best_kernels[0]);
+        auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);

        return max_unpool;
    }
@ -63,27 +63,20 @@ public:

 namespace detail {

-attach_max_unpooling_gpu::attach_max_unpooling_gpu() {
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                           max_unpooling_gpu::create);
-    implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
-                                           max_unpooling_gpu::create);
+attach_max_unpooling_impl::attach_max_unpooling_impl() {
+    implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
@ -0,0 +1,32 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mutable_data_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+struct mutable_data_impl : public typed_primitive_impl_ocl<mutable_data> {
+    using parent = typed_primitive_impl_ocl<mutable_data>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<mutable_data_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_impl(arg, {}); }
+};
+
+namespace detail {
+
+attach_mutable_data_impl::attach_mutable_data_impl() {
+    implementation_map<mutable_data>::add(impl_types::ocl, mutable_data_impl::create, {});
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
@ -0,0 +1,86 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mvn_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "mvn/mvn_kernel_selector.h"
+#include "mvn/mvn_kernel_base.h"
+
+#include <algorithm>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct mvn_impl : typed_primitive_impl_ocl<mvn> {
+    using parent = typed_primitive_impl_ocl<mvn>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<mvn_impl>(*this);
+    }
+
+public:
+    static primitive_impl* create(const mvn_node& arg) {
+        auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
+        auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
+
+        mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
+                                                                  : kernel_selector::mvn_mode::WITHIN_CHANNELS;
+        mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
+        mvn_params.epsilon = arg.get_primitive()->epsilon;
+
+        mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
+                                                                     : kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
+
+        auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto mvn = new mvn_impl(arg, best_kernels[0]);
+
+        return mvn;
+    }
+};
+
+namespace detail {
+
+attach_mvn_impl::attach_mvn_impl() {
+    implementation_map<mvn>::add(impl_types::ocl, mvn_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
@ -0,0 +1,83 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "normalize_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "cldnn/runtime/error_handler.hpp"
+#include "kernel_selector_helper.h"
+#include "normalize/normalize_kernel_selector.h"
+#include "normalize/normalize_kernel_base.h"
+
+#include <algorithm>
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct normalize_impl : typed_primitive_impl_ocl<normalize> {
+    using parent = typed_primitive_impl_ocl<normalize>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<normalize_impl>(*this);
+    }
+
+protected:
+     kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        args.scale_table = instance.scale_memory();
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const normalize_node& arg) {
+        auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
+        auto norm_optional_params =
+            get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
+
+        const auto& scale_layout = arg.scale().get_output_layout();
+
+        norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
+                                                                   : kernel_selector::normalize_mode::WITHIN_SPATIAL;
+        norm_params.epsilon = arg.get_primitive()->epsilon;
+        norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
+
+        auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto lrn = new normalize_impl(arg, best_kernels[0]);
+
+        return lrn;
+    }
+};
+
+namespace detail {
+
+attach_normalize_impl::attach_normalize_impl() {
+    implementation_map<normalize>::add(impl_types::ocl, normalize_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
@ -0,0 +1,74 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "one_hot_inst.h"
+
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "one_hot/one_hot_kernel_selector.h"
+#include "one_hot/one_hot_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+#include <vector>
+
+namespace cldnn {
+namespace ocl {
+
+struct one_hot_impl : typed_primitive_impl_ocl<one_hot> {
+    using parent = typed_primitive_impl_ocl<one_hot>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<one_hot_impl>(*this);
+    }
+
+    static primitive_impl* create(const one_hot_node& arg) {
+        auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
+        auto oh_optional_params =
+            get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
+
+        oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
+        oh_params.on_value = arg.get_primitive()->on_value;
+        oh_params.off_value = arg.get_primitive()->off_value;
+
+        auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
+                            arg.get_output_layout().size.sizes(format::bfzyx) :
+                            arg.get_output_layout().size.sizes(format::bfyx);
+
+        oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
+
+        auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with these arguments");
+
+        return new one_hot_impl(arg, best_kernels[0]);
+    }
+};
+
+namespace detail {
+
+attach_one_hot_impl::attach_one_hot_impl() {
+    implementation_map<one_hot>::add(impl_types::ocl, one_hot_impl::create, {
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i64, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i64, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/permute.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/permute.cpp
@ -3,8 +3,8 @@
 //

 #include "permute_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "permute/permute_kernel_selector.h"
@ -13,14 +13,14 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct permute_gpu : typed_primitive_gpu_impl<permute> {
-    using parent = typed_primitive_gpu_impl<permute>;
+struct permute_impl : typed_primitive_impl_ocl<permute> {
+    using parent = typed_primitive_impl_ocl<permute>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<permute_gpu>(*this);
+        return make_unique<permute_impl>(*this);
    }

    static primitive_impl* create(const permute_node& arg) {
@ -38,7 +38,7 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto permute = new permute_gpu(arg, best_kernels[0]);
+        auto permute = new permute_impl(arg, best_kernels[0]);

        return permute;
    }
@ -46,12 +46,10 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {

 namespace detail {

-attach_permute_gpu::attach_permute_gpu() {
-    implementation_map<permute>::add({
-        {engine_types::ocl, permute_gpu::create},
-    });
+attach_permute_impl::attach_permute_impl() {
+    implementation_map<permute>::add(impl_types::ocl, permute_impl::create, {});
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/pooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/pooling.cpp
@ -3,8 +3,8 @@
 //

 #include "pooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "pooling/pooling_kernel_selector.h"
@ -12,7 +12,7 @@
 #include <algorithm>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

 namespace {
 void validate_args(const pooling_node& arg) {
@ -64,12 +64,12 @@ kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mo
 }
 }  // namespace

-struct pooling_gpu : typed_primitive_gpu_impl<pooling> {
-    using parent = typed_primitive_gpu_impl<pooling>;
+struct pooling_impl : typed_primitive_impl_ocl<pooling> {
+    using parent = typed_primitive_impl_ocl<pooling>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<pooling_gpu>(*this);
+        return make_unique<pooling_impl>(*this);
    }

 protected:
@ -148,7 +148,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto pool = new pooling_gpu(arg, best_kernels[0]);
+        auto pool = new pooling_impl(arg, best_kernels[0]);

        return pool;
    }
@ -156,66 +156,57 @@ public:

 namespace detail {

-attach_pooling_gpu::attach_pooling_gpu() {
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), pooling_gpu::create);
-
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), pooling_gpu::create);
-    implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), pooling_gpu::create);
+attach_pooling_impl::attach_pooling_impl() {
+    implementation_map<pooling>::add(impl_types::ocl, pooling_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i8, format::yxfb),
+        std::make_tuple(data_types::u8, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.cpp
@ -2,11 +2,11 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "primitive_gpu_base.h"
+#include "primitive_base.hpp"
 #include <list>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

 bool is_user_cpu(const program_node* user) {
    if (user->can_be_optimized()) {
@ -28,5 +28,5 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
    }
    return false;
 }
-}  // namespace gpu
-}  // namespace cldnn
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/primitive_base.hpp
@ -11,30 +11,30 @@
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "network_impl.h"
-#include "register_gpu.hpp"
+#include "register.hpp"
 #include <vector>
 #include <list>
 #include <utility>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

 // checks if any user in a list is a cpu primitive
 bool is_any_user_cpu(const std::list<const program_node*>& users);

 /*
 Base class for all GPU implementation of specified primitive type.
-For example, all gpu convolution implementations should derive from typed_primitive_gpu_impl<convolution>.
+For example, all gpu convolution implementations should derive from typed_primitive_impl_ocl<convolution>.
 */
 template <class PType>
-struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
+struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
    const typed_program_node<PType>& _outer;
    kernel_selector::kernel_data _kernel_data;
    std::vector<kernel_id> _kernel_ids;
    std::vector<kernel::ptr> _kernels;
    std::vector<memory::cptr> _intermediates_memory;

-    typed_primitive_gpu_impl(const typed_primitive_gpu_impl<PType>& other)
+    typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
    : typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name)
    , _outer(other._outer)
    , _kernel_data(other._kernel_data)
@ -52,7 +52,7 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
        }
    }

-    typed_primitive_gpu_impl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
+    typed_primitive_impl_ocl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
        : typed_primitive_impl<PType>(kd.weightsReorderParams, kd.kernelName),
          _outer(arg),
          _kernel_data(kd) {
@ -199,5 +199,5 @@ protected:
    }
 };

-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/pyramid_roi_align.cpp
@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "pyramid_roi_align/pyramid_roi_align_kernel_selector.h"
 #include "pyramid_roi_align/pyramid_roi_align_kernel_base.h"
@ -14,14 +14,14 @@
 #include <cmath>

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
-    using parent = typed_primitive_gpu_impl<pyramid_roi_align>;
+struct pyramid_roi_align_impl : typed_primitive_impl_ocl<pyramid_roi_align> {
+    using parent = typed_primitive_impl_ocl<pyramid_roi_align>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<pyramid_roi_align_gpu>(*this);
+        return make_unique<pyramid_roi_align_impl>(*this);
    }

    static primitive_impl* create(const pyramid_roi_align_node& arg) {
@ -54,28 +54,23 @@ struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        return new pyramid_roi_align_gpu(arg, best_kernels[0]);
+        return new pyramid_roi_align_impl(arg, best_kernels[0]);
    }
 };

 namespace detail {

-attach_pyramid_roi_align_gpu::attach_pyramid_roi_align_gpu() {
-    auto val_fw = pyramid_roi_align_gpu::create;
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
-                                               val_fw);
-    implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
-                                               val_fw);
+attach_pyramid_roi_align_impl::attach_pyramid_roi_align_impl() {
+    implementation_map<pyramid_roi_align>::add(impl_types::ocl, pyramid_roi_align_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f16, format::byxf),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
@ -0,0 +1,160 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "quantize_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "quantize/quantize_kernel_selector.h"
+#include "quantize/quantize_kernel_ref.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct quantize_impl : typed_primitive_impl_ocl<quantize> {
+    using parent = typed_primitive_impl_ocl<quantize>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<quantize_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
+        kernel_arguments_data args;
+
+        for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
+            args.inputs.push_back(instance.input_memory_ptr(i));
+        }
+        if (instance.node.get_scale_shift_opt()) {
+            if (instance.node.get_dependencies().size() == 9) {
+                args.inputs.push_back(instance.dep_memory_ptr(5));
+                args.inputs.push_back(instance.dep_memory_ptr(6));
+                args.inputs.push_back(instance.dep_memory_ptr(7));
+                args.inputs.push_back(instance.dep_memory_ptr(8));
+            }
+        }
+        args.output = instance.output_memory_ptr();
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const quantize_node& arg) {
+        auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
+        auto quantize_optional_params =
+            get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
+
+        quantize_params.levels = arg.get_levels();
+        quantize_params.packed_binary_output = arg.get_packed_binary_output();
+        quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
+        quantize_params.has_post_scale = arg.get_need_post_scale();
+        quantize_params.has_post_shift = arg.get_need_post_shift();
+        quantize_params.has_pre_shift = arg.get_need_pre_shift();
+        quantize_params.has_clamp = arg.get_need_clamp();
+
+        quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
+        quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
+        quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
+        quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
+        quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
+
+        quantize_params.in_lo = arg.get_input_lo_val();
+        quantize_params.in_hi = arg.get_input_hi_val();
+        quantize_params.in_scale = arg.get_input_scale_val();
+        quantize_params.in_shift = arg.get_input_shift_val();
+        quantize_params.out_scale = arg.get_output_scale_val();
+        quantize_params.out_shift = arg.get_output_shift_val();
+
+        for (size_t i = 1; i < arg.inputs_count(); i++) {
+            quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
+        }
+        const auto& output_layout = arg.get_output_layout();
+        quantize_params.output = convert_data_tensor(output_layout);
+
+        auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto quantize = new quantize_impl(arg, best_kernels[0]);
+
+        return quantize;
+    }
+};
+
+namespace detail {
+
+attach_quantize_impl::attach_quantize_impl() {
+    implementation_map<quantize>::add(impl_types::ocl, quantize_impl::create, {
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::i8, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::u8, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::u8, format::byxf),
+        std::make_tuple(data_types::i8, format::byxf),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/reduce.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reduce.cpp
@ -3,8 +3,8 @@
 //

 #include "reduce_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reduce/reduce_kernel_selector.h"
 #include "reduce/reduce_kernel_ref.h"
@ -15,7 +15,7 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {
 namespace {
 kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
    switch (mode) {
@ -49,12 +49,12 @@ kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
    }
 }
 }  // namespace
-struct reduce_gpu : typed_primitive_gpu_impl<reduce> {
-    using parent = typed_primitive_gpu_impl<reduce>;
+struct reduce_impl : typed_primitive_impl_ocl<reduce> {
+    using parent = typed_primitive_impl_ocl<reduce>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reduce_gpu>(*this);
+        return make_unique<reduce_impl>(*this);
    }

 public:
@ -71,7 +71,7 @@ public:

        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");

-        auto reduce = new reduce_gpu(arg, best_kernels[0]);
+        auto reduce = new reduce_impl(arg, best_kernels[0]);

        return reduce;
    }
@ -79,30 +79,31 @@ public:

 namespace detail {

-attach_reduce_gpu::attach_reduce_gpu() {
-    auto val_fw = reduce_gpu::create;
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
-    implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
+attach_reduce_impl::attach_reduce_impl() {
+    implementation_map<reduce>::add(impl_types::ocl, reduce_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/region_yolo.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/region_yolo.cpp
@ -3,22 +3,22 @@
 //

 #include "region_yolo_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "region_yolo/region_yolo_kernel_selector.h"
 #include "region_yolo/region_yolo_kernel_ref.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
-    using parent = typed_primitive_gpu_impl<region_yolo>;
+struct region_yolo_impl : typed_primitive_impl_ocl<region_yolo> {
+    using parent = typed_primitive_impl_ocl<region_yolo>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<region_yolo_gpu>(*this);
+        return make_unique<region_yolo_impl>(*this);
    }

    static primitive_impl* create(const region_yolo_node& arg) {
@ -41,7 +41,7 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto region_yolo_node = new region_yolo_gpu(arg, best_kernels[0]);
+        auto region_yolo_node = new region_yolo_impl(arg, best_kernels[0]);

        return region_yolo_node;
    }
@ -49,14 +49,15 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {

 namespace detail {

-attach_region_yolo_gpu::attach_region_yolo_gpu() {
-    implementation_map<region_yolo>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), region_yolo_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), region_yolo_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), region_yolo_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), region_yolo_gpu::create}});
+attach_region_yolo_impl::attach_region_yolo_impl() {
+    implementation_map<region_yolo>::add(impl_types::ocl, region_yolo_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
@ -0,0 +1,79 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "register.hpp"
+
+namespace cldnn {
+namespace ocl {
+
+#define REGISTER_OCL(prim)                      \
+    static detail::attach_##prim##_impl attach_##prim
+
+void register_implementations() {
+    REGISTER_OCL(activation);
+    REGISTER_OCL(arg_max_min);
+    REGISTER_OCL(average_unpooling);
+    REGISTER_OCL(binary_convolution);
+    REGISTER_OCL(border);
+    REGISTER_OCL(broadcast);
+    REGISTER_OCL(concatenation);
+    REGISTER_OCL(convolution);
+    REGISTER_OCL(crop);
+    REGISTER_OCL(custom_gpu_primitive);
+    REGISTER_OCL(deconvolution);
+    REGISTER_OCL(deformable_conv);
+    REGISTER_OCL(deformable_interp);
+    REGISTER_OCL(depth_to_space);
+    REGISTER_OCL(batch_to_space);
+    REGISTER_OCL(eltwise);
+    REGISTER_OCL(fully_connected);
+    REGISTER_OCL(gather);
+    REGISTER_OCL(gather_nd);
+    REGISTER_OCL(gemm);
+    REGISTER_OCL(lrn);
+    REGISTER_OCL(lstm_gemm);
+    REGISTER_OCL(lstm_elt);
+    REGISTER_OCL(max_unpooling);
+    REGISTER_OCL(mutable_data);
+    REGISTER_OCL(mvn);
+    REGISTER_OCL(normalize);
+    REGISTER_OCL(one_hot);
+    REGISTER_OCL(permute);
+    REGISTER_OCL(pooling);
+    REGISTER_OCL(pyramid_roi_align);
+    REGISTER_OCL(quantize);
+    REGISTER_OCL(reduce);
+    REGISTER_OCL(region_yolo);
+    REGISTER_OCL(reorder);
+    REGISTER_OCL(reorg_yolo);
+    REGISTER_OCL(reshape);
+    REGISTER_OCL(reverse_sequence);
+    REGISTER_OCL(roi_pooling);
+    REGISTER_OCL(scale);
+    REGISTER_OCL(scatter_update);
+    REGISTER_OCL(scatter_nd_update);
+    REGISTER_OCL(scatter_elements_update);
+    REGISTER_OCL(select);
+    REGISTER_OCL(shuffle_channels);
+    REGISTER_OCL(softmax);
+    REGISTER_OCL(space_to_batch);
+    REGISTER_OCL(space_to_depth);
+    REGISTER_OCL(strided_slice);
+    REGISTER_OCL(tile);
+    REGISTER_OCL(fused_conv_eltwise);
+    REGISTER_OCL(lstm_dynamic_input);
+    REGISTER_OCL(lstm_dynamic_timeloop);
+    REGISTER_OCL(generic_layer);
+    REGISTER_OCL(gather_tree);
+    REGISTER_OCL(resample);
+    REGISTER_OCL(grn);
+    REGISTER_OCL(ctc_greedy_decoder);
+    REGISTER_OCL(cum_sum);
+    REGISTER_OCL(embedding_bag);
+    REGISTER_OCL(extract_image_patches);
+}
+
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/register.hpp
@ -13,20 +13,16 @@
 #include "cldnn/primitives/border.hpp"
 #include "cldnn/primitives/broadcast.hpp"
 #include "cldnn/primitives/concatenation.hpp"
-#include "cldnn/primitives/condition.hpp"
 #include "cldnn/primitives/convolution.hpp"
 #include "cldnn/primitives/crop.hpp"
 #include "cldnn/primitives/custom_gpu_primitive.hpp"
-#include "cldnn/primitives/data.hpp"
 #include "cldnn/primitives/deconvolution.hpp"
 #include "cldnn/primitives/depth_to_space.hpp"
-#include "cldnn/primitives/detection_output.hpp"
 #include "cldnn/primitives/eltwise.hpp"
 #include "cldnn/primitives/fully_connected.hpp"
 #include "cldnn/primitives/gather.hpp"
 #include "cldnn/primitives/gather_nd.hpp"
 #include "cldnn/primitives/gemm.hpp"
-#include "cldnn/primitives/input_layout.hpp"
 #include "cldnn/primitives/lrn.hpp"
 #include "cldnn/primitives/lstm.hpp"
 #include "cldnn/primitives/lstm_dynamic.hpp"
@ -37,8 +33,6 @@
 #include "cldnn/primitives/one_hot.hpp"
 #include "cldnn/primitives/permute.hpp"
 #include "cldnn/primitives/pooling.hpp"
-#include "cldnn/primitives/prior_box.hpp"
-#include "cldnn/primitives/proposal.hpp"
 #include "cldnn/primitives/pyramid_roi_align.hpp"
 #include "cldnn/primitives/quantize.hpp"
 #include "cldnn/primitives/reduce.hpp"
@ -63,97 +57,88 @@
 #include "cldnn/primitives/fused_conv_eltwise.hpp"
 #include "cldnn/primitives/lstm_dynamic_input.hpp"
 #include "cldnn/primitives/lstm_dynamic_timeloop.hpp"
-#include "cldnn/primitives/non_max_suppression.hpp"
 #include "cldnn/primitives/grn.hpp"
 #include "cldnn/primitives/ctc_greedy_decoder.hpp"
-#include "cldnn/primitives/loop.hpp"
 #include "generic_layer.hpp"


-namespace cldnn { namespace gpu {
-void register_implementations_gpu();
+namespace cldnn {
+namespace ocl {
+void register_implementations();

 namespace detail {

-#define REGISTER_GPU(prim)              \
-    struct attach_##prim##_gpu {        \
-        attach_##prim##_gpu();          \
+#define REGISTER_OCL(prim)              \
+    struct attach_##prim##_impl {        \
+        attach_##prim##_impl();          \
    }

-REGISTER_GPU(activation);
-REGISTER_GPU(arg_max_min);
-REGISTER_GPU(average_unpooling);
-REGISTER_GPU(batch_to_space);
-REGISTER_GPU(binary_convolution);
-REGISTER_GPU(border);
-REGISTER_GPU(broadcast);
-REGISTER_GPU(concatenation);
-REGISTER_GPU(condition);
-REGISTER_GPU(convolution);
-REGISTER_GPU(crop);
-REGISTER_GPU(custom_gpu_primitive);
-REGISTER_GPU(data);
-REGISTER_GPU(deconvolution);
-REGISTER_GPU(deformable_conv);
-REGISTER_GPU(deformable_interp);
-REGISTER_GPU(depth_to_space);
-REGISTER_GPU(detection_output);
-REGISTER_GPU(eltwise);
-REGISTER_GPU(embed);
-REGISTER_GPU(fully_connected);
-REGISTER_GPU(gather);
-REGISTER_GPU(gather_nd);
-REGISTER_GPU(gemm);
-REGISTER_GPU(input_layout);
-REGISTER_GPU(lookup_table);
-REGISTER_GPU(lrn);
-REGISTER_GPU(lstm_gemm);
-REGISTER_GPU(lstm_elt);
-REGISTER_GPU(max_unpooling);
-REGISTER_GPU(mutable_data);
-REGISTER_GPU(mvn);
-REGISTER_GPU(normalize);
-REGISTER_GPU(one_hot);
-REGISTER_GPU(permute);
-REGISTER_GPU(pooling);
-REGISTER_GPU(prior_box);
-REGISTER_GPU(proposal);
-REGISTER_GPU(pyramid_roi_align);
-REGISTER_GPU(quantize);
-REGISTER_GPU(reduce);
-REGISTER_GPU(region_yolo);
-REGISTER_GPU(reorder);
-REGISTER_GPU(reorg_yolo);
-REGISTER_GPU(reshape);
-REGISTER_GPU(reverse_sequence);
-REGISTER_GPU(roi_pooling);
-REGISTER_GPU(scale);
-REGISTER_GPU(scatter_update);
-REGISTER_GPU(scatter_elements_update);
-REGISTER_GPU(scatter_nd_update);
-REGISTER_GPU(select);
-REGISTER_GPU(shuffle_channels);
-REGISTER_GPU(softmax);
-REGISTER_GPU(space_to_batch);
-REGISTER_GPU(space_to_depth);
-REGISTER_GPU(strided_slice);
-REGISTER_GPU(tile);
-REGISTER_GPU(fused_conv_eltwise);
-REGISTER_GPU(lstm_dynamic_input);
-REGISTER_GPU(lstm_dynamic_timeloop);
-REGISTER_GPU(generic_layer);
-REGISTER_GPU(gather_tree);
-REGISTER_GPU(resample);
-REGISTER_GPU(non_max_suppression);
-REGISTER_GPU(grn);
-REGISTER_GPU(ctc_greedy_decoder);
-REGISTER_GPU(cum_sum);
-REGISTER_GPU(embedding_bag);
-REGISTER_GPU(extract_image_patches);
-REGISTER_GPU(loop);
+REGISTER_OCL(activation);
+REGISTER_OCL(arg_max_min);
+REGISTER_OCL(average_unpooling);
+REGISTER_OCL(batch_to_space);
+REGISTER_OCL(binary_convolution);
+REGISTER_OCL(border);
+REGISTER_OCL(broadcast);
+REGISTER_OCL(concatenation);
+REGISTER_OCL(convolution);
+REGISTER_OCL(crop);
+REGISTER_OCL(custom_gpu_primitive);
+REGISTER_OCL(data);
+REGISTER_OCL(deconvolution);
+REGISTER_OCL(deformable_conv);
+REGISTER_OCL(deformable_interp);
+REGISTER_OCL(depth_to_space);
+REGISTER_OCL(eltwise);
+REGISTER_OCL(embed);
+REGISTER_OCL(fully_connected);
+REGISTER_OCL(gather);
+REGISTER_OCL(gather_nd);
+REGISTER_OCL(gemm);
+REGISTER_OCL(lrn);
+REGISTER_OCL(lstm_gemm);
+REGISTER_OCL(lstm_elt);
+REGISTER_OCL(max_unpooling);
+REGISTER_OCL(mutable_data);
+REGISTER_OCL(mvn);
+REGISTER_OCL(normalize);
+REGISTER_OCL(one_hot);
+REGISTER_OCL(permute);
+REGISTER_OCL(pooling);
+REGISTER_OCL(pyramid_roi_align);
+REGISTER_OCL(quantize);
+REGISTER_OCL(reduce);
+REGISTER_OCL(region_yolo);
+REGISTER_OCL(reorder);
+REGISTER_OCL(reorg_yolo);
+REGISTER_OCL(reshape);
+REGISTER_OCL(reverse_sequence);
+REGISTER_OCL(roi_pooling);
+REGISTER_OCL(scale);
+REGISTER_OCL(scatter_update);
+REGISTER_OCL(scatter_elements_update);
+REGISTER_OCL(scatter_nd_update);
+REGISTER_OCL(select);
+REGISTER_OCL(shuffle_channels);
+REGISTER_OCL(softmax);
+REGISTER_OCL(space_to_batch);
+REGISTER_OCL(space_to_depth);
+REGISTER_OCL(strided_slice);
+REGISTER_OCL(tile);
+REGISTER_OCL(fused_conv_eltwise);
+REGISTER_OCL(lstm_dynamic_input);
+REGISTER_OCL(lstm_dynamic_timeloop);
+REGISTER_OCL(generic_layer);
+REGISTER_OCL(gather_tree);
+REGISTER_OCL(resample);
+REGISTER_OCL(grn);
+REGISTER_OCL(ctc_greedy_decoder);
+REGISTER_OCL(cum_sum);
+REGISTER_OCL(embedding_bag);
+REGISTER_OCL(extract_image_patches);

-#undef REGISTER_GPU
+#undef REGISTER_OCL

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reorder.cpp
@ -3,22 +3,22 @@
 //

 #include "reorder_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reorder/reorder_kernel_selector.h"
 #include "reorder/reorder_kernel_base.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct reorder_gpu : typed_primitive_gpu_impl<reorder> {
-    using parent = typed_primitive_gpu_impl<reorder>;
+struct reorder_impl : typed_primitive_impl_ocl<reorder> {
+    using parent = typed_primitive_impl_ocl<reorder>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reorder_gpu>(*this);
+        return make_unique<reorder_impl>(*this);
    }

 protected:
@ -108,7 +108,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto reorder = new reorder_gpu(arg, best_kernels[0]);
+        auto reorder = new reorder_impl(arg, best_kernels[0]);

        return reorder;
    }
@ -116,10 +116,10 @@ public:

 namespace detail {

-attach_reorder_gpu::attach_reorder_gpu() {
-    implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}});
+attach_reorder_impl::attach_reorder_impl() {
+    implementation_map<reorder>::add(impl_types::ocl, reorder_impl::create, {});
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/reorg_yolo.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reorg_yolo.cpp
@ -3,22 +3,22 @@
 //

 #include "reorg_yolo_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reorg_yolo/reorg_yolo_kernel_selector.h"
 #include "reorg_yolo/reorg_yolo_kernel_ref.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
-    using parent = typed_primitive_gpu_impl<reorg_yolo>;
+struct reorg_yolo_impl : typed_primitive_impl_ocl<reorg_yolo> {
+    using parent = typed_primitive_impl_ocl<reorg_yolo>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reorg_yolo_gpu>(*this);
+        return make_unique<reorg_yolo_impl>(*this);
    }

    static primitive_impl* create(const reorg_yolo_node& arg) {
@ -38,7 +38,7 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto reorg_yolo_node = new reorg_yolo_gpu(arg, best_kernels[0]);
+        auto reorg_yolo_node = new reorg_yolo_impl(arg, best_kernels[0]);

        return reorg_yolo_node;
    }
@ -46,16 +46,17 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {

 namespace detail {

-attach_reorg_yolo_gpu::attach_reorg_yolo_gpu() {
-    auto val_fw = reorg_yolo_gpu::create;
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-    implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+attach_reorg_yolo_impl::attach_reorg_yolo_impl() {
+    implementation_map<reorg_yolo>::add(impl_types::ocl, reorg_yolo_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/resample.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/resample.cpp
@ -3,15 +3,15 @@
 //

 #include "resample_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "kernel_selector/core/actual_kernels/resample/resample_kernel_selector.h"
 #include "kernel_selector/core/actual_kernels/resample/resample_kernel_base.h"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

 namespace {
 inline kernel_selector::sample_type convert_to_sample_type(resample_type type) {
@ -96,12 +96,12 @@ inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis ax
 }
 }  // namespace

-struct resample_gpu : typed_primitive_gpu_impl<resample> {
-    using parent = typed_primitive_gpu_impl<resample>;
+struct resample_impl : typed_primitive_impl_ocl<resample> {
+    using parent = typed_primitive_impl_ocl<resample>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<resample_gpu>(*this);
+        return make_unique<resample_impl>(*this);
    }

    static primitive_impl* create(const resample_node& arg) {
@ -135,7 +135,7 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto resample = new resample_gpu(arg, best_kernels[0]);
+        auto resample = new resample_impl(arg, best_kernels[0]);

        return resample;
    }
@ -143,31 +143,32 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {

 namespace detail {

-attach_resample_gpu::attach_resample_gpu() {
-    implementation_map<resample>::add(
-        {{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), resample_gpu::create},
-         {std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), resample_gpu::create}});
+attach_resample_impl::attach_resample_impl() {
+    implementation_map<resample>::add(impl_types::ocl, resample_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/reshape.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reshape.cpp
@ -3,28 +3,28 @@
 //

 #include "reshape_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reshape/reshape_kernel_ref.h"
 #include "reshape/reshape_kernel_selector.h"
 #include "cldnn/runtime/error_handler.hpp"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

-struct reshape_gpu : public typed_primitive_gpu_impl<reshape> {
-    using parent = typed_primitive_gpu_impl<reshape>;
+struct reshape_impl : public typed_primitive_impl_ocl<reshape> {
+    using parent = typed_primitive_impl_ocl<reshape>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reshape_gpu>(*this);
+        return make_unique<reshape_impl>(*this);
    }

 public:
    static primitive_impl* create(reshape_node const& arg) {
        if (arg.can_be_optimized()) {
-            return new reshape_gpu(arg, {});
+            return new reshape_impl(arg, {});
        }

        auto reorder_params = get_default_params<kernel_selector::reshape_params>(arg);
@ -39,7 +39,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto reshape = new reshape_gpu(arg, best_kernels[0]);
+        auto reshape = new reshape_impl(arg, best_kernels[0]);

        return reshape;
    }
@ -47,10 +47,10 @@ public:

 namespace detail {

-attach_reshape_gpu::attach_reshape_gpu() {
-    implementation_map<reshape>::add({{engine_types::ocl, reshape_gpu::create}});
+attach_reshape_impl::attach_reshape_impl() {
+    implementation_map<reshape>::add(impl_types::ocl, reshape_impl::create, {});
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/reverse_sequence.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/reverse_sequence.cpp
@ -3,8 +3,8 @@
 //

 #include "reverse_sequence_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "reverse_sequence/reverse_sequence_kernel_selector.h"
 #include "reverse_sequence/reverse_sequence_kernel_ref.h"
@ -13,13 +13,13 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
-struct reverse_sequence_gpu : typed_primitive_gpu_impl<reverse_sequence> {
-    using parent = typed_primitive_gpu_impl<reverse_sequence>;
+namespace ocl {
+struct reverse_sequence_impl : typed_primitive_impl_ocl<reverse_sequence> {
+    using parent = typed_primitive_impl_ocl<reverse_sequence>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<reverse_sequence_gpu>(*this);
+        return make_unique<reverse_sequence_impl>(*this);
    }

 public:
@ -41,7 +41,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto reverse_sequence = new reverse_sequence_gpu(arg, best_kernels[0]);
+        auto reverse_sequence = new reverse_sequence_impl(arg, best_kernels[0]);

        return reverse_sequence;
    }
@ -49,15 +49,16 @@ public:

 namespace detail {

-attach_reverse_sequence_gpu::attach_reverse_sequence_gpu() {
-    auto val_fw = reverse_sequence_gpu::create;
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
-    implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+attach_reverse_sequence_impl::attach_reverse_sequence_impl() {
+    implementation_map<reverse_sequence>::add(impl_types::ocl, reverse_sequence_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/roi_pooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/roi_pooling.cpp
@ -3,15 +3,15 @@
 //

 #include "roi_pooling_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "cldnn/runtime/error_handler.hpp"
 #include "kernel_selector_helper.h"
 #include "roi_pooling/roi_pooling_kernel_selector.h"
 #include "roi_pooling/roi_pooling_kernel_ref.h"

 namespace cldnn {
-namespace gpu {
+namespace ocl {

 namespace {
 kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
@ -33,12 +33,12 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
 }
 }  // namespace

-struct roi_pooling_gpu : typed_primitive_gpu_impl<roi_pooling> {
-    using parent = typed_primitive_gpu_impl<roi_pooling>;
+struct roi_pooling_impl : typed_primitive_impl_ocl<roi_pooling> {
+    using parent = typed_primitive_impl_ocl<roi_pooling>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<roi_pooling_gpu>(*this);
+        return make_unique<roi_pooling_impl>(*this);
    }

 protected:
@ -108,7 +108,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto roi_pool = new roi_pooling_gpu(arg, best_kernels[0]);
+        auto roi_pool = new roi_pooling_impl(arg, best_kernels[0]);

        return roi_pool;
    }
@ -116,13 +116,13 @@ public:

 namespace detail {

-attach_roi_pooling_gpu::attach_roi_pooling_gpu() {
-    implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                         roi_pooling_gpu::create);
-    implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                         roi_pooling_gpu::create);
+attach_roi_pooling_impl::attach_roi_pooling_impl() {
+    implementation_map<roi_pooling>::add(impl_types::ocl, roi_pooling_impl::create, {
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
@ -0,0 +1,136 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "scale_inst.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
+#include "kernel_selector_helper.h"
+#include "eltwise/eltwise_kernel_selector.h"
+#include "eltwise/eltwise_kernel_base.h"
+#include "cldnn/runtime/error_handler.hpp"
+
+using namespace cldnn;
+
+namespace cldnn {
+namespace ocl {
+
+struct scale_impl : typed_primitive_impl_ocl<scale> {
+    using parent = typed_primitive_impl_ocl<scale>;
+    using parent::parent;
+
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<scale_impl>(*this);
+    }
+
+protected:
+    kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
+        kernel_arguments_data args = parent::get_arguments(instance, split);
+        args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
+        args.output = instance.output_memory_ptr();
+
+        if (_outer.bias_term()) {
+            args.inputs.push_back(instance.bias_memory());
+        }
+        return args;
+    }
+
+public:
+    static primitive_impl* create(const scale_node& arg) {
+        auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
+        auto ew_optional_params =
+            get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
+
+        ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
+
+        ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
+                                         kernel_selector::eltwise_params::InputType::Buffer(1)},
+                                        kernel_selector::eltwise_mode::MUL});
+
+        if (arg.bias_term()) {
+            ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
+            ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
+                                             kernel_selector::eltwise_params::InputType::Buffer(2)},
+                                            kernel_selector::eltwise_mode::ADD});
+        }
+
+        ew_params.layoutBased = true;
+
+        auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(),
+                         "Best_kernel.empty()",
+                         best_kernels.empty(),
+                         "Cannot find a proper kernel with this arguments");
+
+        auto scale = new scale_impl(arg, best_kernels[0]);
+
+        return scale;
+    }
+};
+
+namespace detail {
+
+attach_scale_impl::attach_scale_impl() {
+    implementation_map<scale>::add(impl_types::ocl, scale_impl::create, {
+        std::make_tuple(data_types::f32, format::yxfb),
+        std::make_tuple(data_types::f16, format::yxfb),
+        std::make_tuple(data_types::i32, format::yxfb),
+        std::make_tuple(data_types::f32, format::byxf),
+        std::make_tuple(data_types::f16, format::byxf),
+        std::make_tuple(data_types::i32, format::byxf),
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::u8, format::bfyx),
+        std::make_tuple(data_types::i8, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::u8, format::bfzyx),
+        std::make_tuple(data_types::i8, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+        std::make_tuple(data_types::u8, format::bfwzyx),
+        std::make_tuple(data_types::i8, format::bfwzyx),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
+        std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::i32, format::fs_b_yx_fsv32),
+        std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv4),
+        std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
+        std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
+        std::make_tuple(data_types::i32, format::b_fs_zyx_fsv32),
+    });
+}
+
+}  // namespace detail
+}  // namespace ocl
+}  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_elements_update.cpp
+++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/scatter_elements_update.cpp
@ -3,8 +3,8 @@
 //

 #include "scatter_elements_update_inst.h"
-#include "primitive_gpu_base.h"
-#include "implementation_map.h"
+#include "primitive_base.hpp"
+#include "impls/implementation_map.hpp"
 #include "kernel_selector_helper.h"
 #include "scatter_update/scatter_elements_update_kernel_selector.h"
 #include "scatter_update/scatter_elements_update_kernel_ref.h"
@ -13,7 +13,7 @@
 using namespace cldnn;

 namespace cldnn {
-namespace gpu {
+namespace ocl {
 kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatter_elements_update_axis axis, const scatter_elements_update_node& arg) {
    switch (axis) {
        case scatter_elements_update::along_x:
@ -34,12 +34,12 @@ kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatt
    return kernel_selector::scatter_update_axis::X;
 }

-struct scatter_elements_update_gpu : typed_primitive_gpu_impl<scatter_elements_update> {
-    using parent = typed_primitive_gpu_impl<scatter_elements_update>;
+struct scatter_elements_update_impl : typed_primitive_impl_ocl<scatter_elements_update> {
+    using parent = typed_primitive_impl_ocl<scatter_elements_update>;
    using parent::parent;

    std::unique_ptr<primitive_impl> clone() const override {
-        return make_unique<scatter_elements_update_gpu>(*this);
+        return make_unique<scatter_elements_update_impl>(*this);
    }

 public:
@ -61,7 +61,7 @@ public:
                         best_kernels.empty(),
                         "Cannot find a proper kernel with this arguments");

-        auto scatter_elements_update = new scatter_elements_update_gpu(arg, best_kernels[0]);
+        auto scatter_elements_update = new scatter_elements_update_impl(arg, best_kernels[0]);

        return scatter_elements_update;
    }
@ -69,21 +69,20 @@ public:

 namespace detail {

-attach_scatter_elements_update_gpu::attach_scatter_elements_update_gpu() {
-    auto val_fw = scatter_elements_update_gpu::create;
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
-
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
-
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
-    implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
+attach_scatter_elements_update_impl::attach_scatter_elements_update_impl() {
+    implementation_map<scatter_elements_update>::add(impl_types::ocl, scatter_elements_update_impl::create, {
+        std::make_tuple(data_types::f32, format::bfyx),
+        std::make_tuple(data_types::f16, format::bfyx),
+        std::make_tuple(data_types::i32, format::bfyx),
+        std::make_tuple(data_types::f32, format::bfzyx),
+        std::make_tuple(data_types::f16, format::bfzyx),
+        std::make_tuple(data_types::i32, format::bfzyx),
+        std::make_tuple(data_types::f32, format::bfwzyx),
+        std::make_tuple(data_types::f16, format::bfwzyx),
+        std::make_tuple(data_types::i32, format::bfwzyx),
+    });
 }

 }  // namespace detail
-}  // namespace gpu
+}  // namespace ocl
 }  // namespace cldnn
--- a/Show More
+++ b/Show More