[IE CLDNN] NGraph integration into cldnn plugin (#2506)

Co-authored-by: Roman Lyamin <roman.lyamin@intel.com> Co-authored-by: Mikhail Letavin <mikhail.letavin@intel.com>
2020-12-23 13:35:44 +03:00 · 2020-12-23 13:35:44 +03:00 · 241b0faea1
commit 241b0faea1
parent bd9bbe09c3
159 changed files with 8828 additions and 9773 deletions
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@ -11,7 +11,7 @@ if (LINUX)
    endif()
 endif()

-file(GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
+file(GLOB_RECURSE MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
 file(GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)

 addVersionDefines(cldnn_engine.cpp CI_BUILD_NUMBER CLDNN_VERSION)
@ -22,9 +22,10 @@ ie_add_plugin(NAME ${TARGET_NAME}
              VERSION_DEFINES_FOR cldnn_engine.cpp)

 target_link_libraries(${TARGET_NAME} PRIVATE clDNN_lib pugixml
-                                             inference_engine inference_engine_legacy
+                                             inference_engine
                                             inference_engine_transformations
-                                             inference_engine_lp_transformations)
+                                             inference_engine_lp_transformations
+                                             ${NGRAPH_LIBRARIES})

 set(CLDNN_TOP_FOLDER "${IE_MAIN_SOURCE_DIR}/thirdparty/clDNN")
 target_include_directories(${TARGET_NAME} PRIVATE
--- a/inference-engine/src/cldnn_engine/cldnn_common_utils.h
+++ b/inference-engine/src/cldnn_engine/cldnn_common_utils.h
@ -9,20 +9,10 @@
 #include <cpp_interfaces/exception2status.hpp>
 #include <api/layout.hpp>

-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
+#include "ngraph/type/element_type.hpp"

 namespace CLDNNPlugin {

-#ifndef NDEBUG
-#define THROW_CLDNN_EXCEPTION(desc)\
-do { \
-InferenceEngineException ex(__FILE__, __LINE__);\
-std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
-__LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
-#else
-#define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
-#endif  // NDEBUG
 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)

 const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def = 1) {
@ -34,33 +24,57 @@ const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, i
    case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
    case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
    case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
-    default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") for clDNN tensor");
+    default: THROW_IE_EXCEPTION << "Invalid dimensions size(" << dims.size() << ") for clDNN tensor";
    }
 };

 inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) {
    switch (p) {
-    case Precision::I16:
-    case Precision::U16:
-    case Precision::FP32:
+    case InferenceEngine::Precision::I16:
+    case InferenceEngine::Precision::U16:
+    case InferenceEngine::Precision::FP32:
        return cldnn::data_types::f32;
-    case Precision::FP16:
+    case InferenceEngine::Precision::FP16:
        return cldnn::data_types::f16;
-    case Precision::U8:
+    case InferenceEngine::Precision::U8:
        return cldnn::data_types::u8;
-    case Precision::I8:
+    case InferenceEngine::Precision::I8:
        return cldnn::data_types::i8;
-    case Precision::I32:
+    case InferenceEngine::Precision::I32:
        return cldnn::data_types::i32;
-    case Precision::I64:
+    case InferenceEngine::Precision::I64:
        return cldnn::data_types::i64;
-    case Precision::BIN:
+    case InferenceEngine::Precision::BIN:
        return cldnn::data_types::bin;
-    case Precision::BOOL:
+    case InferenceEngine::Precision::BOOL:
        return cldnn::data_types::i8;
    default:
        THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
-        break;
+    }
+}
+
+inline cldnn::data_types DataTypeFromPrecision(ngraph::element::Type t) {
+    switch (t) {
+    case ngraph::element::Type_t::i16:
+    case ngraph::element::Type_t::u16:
+    case ngraph::element::Type_t::f32:
+        return cldnn::data_types::f32;
+    case ngraph::element::Type_t::f16:
+        return cldnn::data_types::f16;
+    case ngraph::element::Type_t::u8:
+        return cldnn::data_types::u8;
+    case ngraph::element::Type_t::i8:
+        return cldnn::data_types::i8;
+    case ngraph::element::Type_t::i32:
+        return cldnn::data_types::i32;
+    case ngraph::element::Type_t::i64:
+        return cldnn::data_types::i64;
+    case ngraph::element::Type_t::boolean:
+        return cldnn::data_types::i8;
+    case ngraph::element::Type_t::u1:
+        return cldnn::data_types::bin;
+    default:
+        THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << t.get_type_name()<< " precision";
    }
 }

@ -81,7 +95,6 @@ inline cldnn::format FormatFromLayout(InferenceEngine::Layout l) {
        return cldnn::format::byxf;
    default:
        THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout";
-        break;
    }
 }

@ -107,7 +120,6 @@ inline cldnn::format FormatFromTensorDesc(InferenceEngine::TensorDesc desc) {
        return cldnn::format::byxf;
    default:
        THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << desc.getLayout() << " layout";
-        break;
    }
 }

@ -124,12 +136,11 @@ inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) {
        return cldnn::format::nv12;
    default:
        THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " image layout";
-        break;
    }
 }


-inline cldnn::format defaultFormatForDims(size_t dimensions) {
+inline cldnn::format DefaultFormatForDims(size_t dimensions) {
    switch (dimensions) {
    case 0:
    case 1:
@ -142,7 +153,7 @@ inline cldnn::format defaultFormatForDims(size_t dimensions) {
    case 6:
        return cldnn::format::bfwzyx;
    default:
-        THROW_CLDNN_EXCEPTION("Unsupported number of dimensions: " << dimensions);
+        THROW_IE_EXCEPTION << "Unsupported number of dimensions: " << dimensions;
    }

    return cldnn::format::bfyx;  // Should not get here
--- a/inference-engine/src/cldnn_engine/cldnn_config.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp
@ -7,6 +7,7 @@
 #include <cldnn/cldnn_config.hpp>
 #include "cldnn_config.h"
 #include "cpp_interfaces/exception2status.hpp"
+#include "details/ie_exception.hpp"
 #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
 #include "ie_api.h"
 #include "file_utils.h"
--- a/inference-engine/src/cldnn_engine/cldnn_config.h
+++ b/inference-engine/src/cldnn_engine/cldnn_config.h
@ -6,11 +6,6 @@

 #include <map>
 #include <string>
-#include <vector>
-
-#include "ie_blob.h"
-#include "cpp/ie_cnn_network.h"
-#include "debug_options.h"

 #include "cldnn_custom_layer.h"

--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@ -4,7 +4,6 @@

 #include <limits>
 #include <algorithm>
-
 #include <string>
 #include <map>
 #include <vector>
@ -12,62 +11,86 @@
 #include <cmath>
 #include <tuple>
 #include <cctype>
+#include <memory>

 #include "ie_metric_helpers.hpp"
-#include <ie_data.h>
-#include <cpp/ie_cnn_network.h>
-#include <description_buffer.hpp>
-#include <memory>
 #include "ie_plugin_config.hpp"
-#include "caseless.hpp"
-#include <legacy/details/ie_cnn_network_tools.h>
 #include <ngraph/opsets/opset2.hpp>
 #include <ngraph/opsets/opset3.hpp>
 #include <ngraph/opsets/opset4.hpp>
 #include <ngraph/opsets/opset5.hpp>
 #include <ngraph/pass/manager.hpp>
+#include <ngraph/pass/constant_folding.hpp>
 #include <generic_ie.hpp>
-#include <transformations/control_flow/unroll_tensor_iterator.hpp>
-#include <transformations/common_optimizations/common_optimizations.hpp>
-#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
+#include <ie_ngraph_utils.hpp>
+
 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
+#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
+
+#include <transformations/control_flow/unroll_tensor_iterator.hpp>
+
+#include <transformations/common_optimizations/common_optimizations.hpp>
+#include <transformations/op_conversions/convert_depth_to_space.hpp>
+#include <transformations/op_conversions/convert_space_to_depth.hpp>
+#include <transformations/op_conversions/convert_gelu.hpp>
+#include <transformations/op_conversions/convert_mod.hpp>
+#include <transformations/op_conversions/reduce_l1_decomposition.hpp>
+#include <transformations/op_conversions/reduce_l2_decomposition.hpp>
+#include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
+#include <transformations/op_conversions/softplus_decomposition.hpp>
+#include <transformations/op_conversions/convert_space_to_batch.hpp>
+#include <transformations/op_conversions/convert_batch_to_space.hpp>
+#include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
+#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
+#include <transformations/op_conversions/hswish_decomposition.hpp>
+#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
+#include <transformations/op_conversions/log_softmax_decomposition.hpp>
 #include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
 #include <transformations/op_conversions/convert_ti_to_sequences.hpp>
 #include <transformations/op_conversions/gru_cell_decomposition.hpp>
 #include <transformations/op_conversions/lstm_cell_decomposition.hpp>
 #include <transformations/op_conversions/rnn_cell_decomposition.hpp>
-#include <transformations/init_node_info.hpp>
+#include <transformations/op_conversions/bidirectional_sequences_decomposition.hpp>
+#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
+#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
+#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
 #include <transformations/convert_precision.hpp>
+#include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>

-#include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/convert_nms_5_to_legacy.hpp>
-#include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
-#include <legacy/convert_function_to_cnn_network.hpp>
-#include <legacy/ie_util_internal.hpp>
-#include <legacy/graph_transformer.h>
+#include <low_precision/transformer.hpp>
+#include <low_precision/mat_mul.hpp>

 #include "cldnn_engine.h"
 #include "cldnn_executable_network.h"
 #include "cldnn_custom_layer.h"

-#include <low_precision/transformer.hpp>
-#include <low_precision/mat_mul.hpp>
-
 #ifdef __linux__
 # include <dlfcn.h>
 #endif

-using InferenceEngine::DescriptionBuffer;
-using InferenceEngine::TBlob;
-using InferenceEngine::Blob;
 using namespace InferenceEngine;
 using namespace InferenceEngine::gpu;
 using namespace InferenceEngine::details;

 namespace CLDNNPlugin {

+#define FACTORY_DECLARATION(op_version, op_name) \
+    void __register ## _ ## op_name ## _ ## op_version();
+
+#define FACTORY_CALL(op_version, op_name) \
+    __register ## _ ## op_name ## _ ## op_version();
+
+#define REGISTER_FACTORY(op_version, op_name) FACTORY_DECLARATION(op_version, op_name)
+#include "cldnn_primitives_list.hpp"
+#undef REGISTER_FACTORY
+
+void clDNNEngine::RegisterPrimitives() {
+    #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name)
+    #include "cldnn_primitives_list.hpp"
+    #undef REGISTER_FACTORY
+}
+
 struct clDNNEngine::impl {
    CLDNNPlugin::Config m_config;
 };
@ -85,205 +108,197 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::s
    return device_info;
 }

-InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::ICNNNetwork& network, CLDNNPlugin::Config config) const {
-    std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
-    bool baselineIsFP16 = false;
+template<typename T>
+static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node> node) {
+    if (auto op = std::dynamic_pointer_cast<const T>(node)) {
+        auto reduction_axes = op->get_reduction_axes().to_vector();
+        bool reduce_along_f = op->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
+        bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_shape()[0] != 1;
+        bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
+        return can_use_reduce;
+    }
+    return false;
+}

-    if (clonedNetwork->getFunction()) {
-        const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
-            // Reshape->Permute->Reshape pattern in theory can change output rank, so this check is added to be sure
-            // that the following primitives will be handled correctly
-            // DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
-            if (auto dtsOp = std::dynamic_pointer_cast<const ::ngraph::opset3::DepthToSpace>(node)) {
-                return dtsOp->input_value(0).get_shape().size() <= 5lu && dtsOp->input_value(0).get_shape().size() == dtsOp->get_output_shape(0).size();
-            }
+InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
+                                                                  const CLDNNPlugin::Config& config) const {
+    CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);

-            // SpaceToDepth node implementation supports only equal input/output tensors with rank <= 5
-            if (auto stdOp = std::dynamic_pointer_cast<const ::ngraph::opset3::SpaceToDepth>(node)) {
-                return stdOp->input_value(0).get_shape().size() <= 5lu && stdOp->input_value(0).get_shape().size() == stdOp->get_output_shape(0).size();
-            }
-
-            // Reduce node implementation with reduce along features performs better with Reshape->Pooling->Reshape pattern
-            // Reshape->Pooling->Reshape scenario is also more optimal in case when batch > 1 and network precission is FP16
-            if (auto redOp = std::dynamic_pointer_cast<const ::ngraph::opset1::ReduceMean>(node)) {
-                auto reduction_axes = redOp->get_reduction_axes().to_vector();
-                bool reduce_along_f = redOp->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
-                bool fp16_batch_not_1 = redOp->get_element_type() == ngraph::element::f16 && redOp->input(0).get_shape()[0] != 1;
-                bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
-                return can_use_reduce;
-            }
-            if (auto redOp = std::dynamic_pointer_cast<const ::ngraph::opset1::ReduceMax>(node)) {
-                auto reduction_axes = redOp->get_reduction_axes().to_vector();
-                bool reduce_along_f = redOp->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
-                bool fp16_batch_not_1 = redOp->get_element_type() == ngraph::element::f16 && redOp->input(0).get_shape()[0] != 1;
-                bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
-                return can_use_reduce;
-            }
-            if (auto redOp = std::dynamic_pointer_cast<const ::ngraph::opset1::ReduceSum>(node)) {
-                auto reduction_axes = redOp->get_reduction_axes().to_vector();
-                bool reduce_along_f = redOp->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
-                bool fp16_batch_not_1 = redOp->get_element_type() == ngraph::element::f16 && redOp->input(0).get_shape()[0] != 1;
-                bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
-                return can_use_reduce;
-            }
-
-            if (auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(node)) {
-                return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
-                       ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
-                       ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
-            }
-
-            return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset5::HSigmoid>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset4::HSwish>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset4::ReduceL1>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset4::ReduceL2>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset4::SoftPlus>(node) ||
-                   std::dynamic_pointer_cast<const ::ngraph::opset5::LogSoftmax>(node);
-        };
-        auto nGraphFunc = clonedNetwork->getFunction();
+    if (clonedNetwork.getFunction()) {
+        auto nGraphFunc = clonedNetwork.getFunction();
        // Disable shape inference (WA for generic operations)
-        ::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
+        ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);

-        bool enableInt8;
        {
-            // Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
            ngraph::pass::Manager manager;
-            using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
-            const auto& pass_config = manager.get_pass_config();
            manager.register_pass<ngraph::pass::InitNodeInfo>();
-            // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
-            manager.register_pass<ngraph::pass::ConvertPriorBox>();
-            manager.register_pass<ngraph::pass::ConvertNMS5ToLegacyMatcher>();
            manager.register_pass<ngraph::pass::CommonOptimizations>();
            manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
            manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
            manager.register_pass<ngraph::pass::ConvertLSTMSequenceToTensorIterator>();
            manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
            manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
+
            manager.register_pass<ngraph::pass::ConvertTensorIteratorToGRUSequence>();
            manager.register_pass<ngraph::pass::ConvertTensorIteratorToLSTMSequence>();
            manager.register_pass<ngraph::pass::ConvertTensorIteratorToRNNSequence>();
            manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
            manager.register_pass<ngraph::pass::GRUCellDecomposition>();
            manager.register_pass<ngraph::pass::RNNCellDecomposition>();
+            manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
+            manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
+            manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
+            manager.register_pass<ngraph::pass::ConvertNMS1ToNMS5>();
+            manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
+            manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
+            manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();

-            manager.set_callback(transformations_callback);
+            std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
+                    {ngraph::element::i64, ngraph::element::i32},
+                    {ngraph::element::u64, ngraph::element::i32},
+                    {ngraph::element::u16, ngraph::element::i32},
+                    {ngraph::element::u32, ngraph::element::i32},
+                    {ngraph::element::boolean, ngraph::element::u8},
+            };
+
+            for (auto & precision : convert_precision_list) {
+                manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
+            }
+
+            auto pass_config = manager.get_pass_config();
+
+            using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
+
+            // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
+            pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
+                                      ngraph::pass::ConvertDepthToSpace>(
+                    [](const_node_ptr &node) -> bool {
+                        return node->input_value(0).get_shape().size() <= 5lu &&
+                            node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
+                    });
+
+            pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
+                                      ngraph::pass::ConvertSpaceToBatch>(
+                    [](const_node_ptr &node) -> bool {
+                        const auto & rank = node->input(0).get_partial_shape().rank().get_length();
+                        return rank <= 5lu;
+                    });
+
+            pass_config->set_callback<ngraph::pass::ConvertReduceSumToPooling>(
+                [](const_node_ptr &node) -> bool {
+                    return disableReduceDecomposition<ngraph::opset1::ReduceSum>(node);
+                });
+
+            pass_config->set_callback<ngraph::pass::ConvertReduceMeanToPooling>(
+                [](const_node_ptr &node) -> bool {
+                    return disableReduceDecomposition<ngraph::opset1::ReduceMean>(node);
+                });
+
+            pass_config->set_callback<ngraph::pass::ConvertReduceMaxToPooling>(
+                [](const_node_ptr &node) -> bool {
+                    return disableReduceDecomposition<ngraph::opset1::ReduceMax>(node);
+                });

            auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
-                if (const auto &rnn_cell = std::dynamic_pointer_cast<const ngraph::opset4::RNNCell>(node)) {
+                if (std::dynamic_pointer_cast<const ngraph::op::v0::RNNCell>(node) || std::dynamic_pointer_cast<const ngraph::op::v5::RNNSequence>(node)) {
                    return false;
-                } else if (const auto &gru_cell = std::dynamic_pointer_cast<const ngraph::opset4::GRUCell>(
-                        node)) {
+                } else if (std::dynamic_pointer_cast<const ngraph::op::v3::GRUCell>(node) ||
+                           std::dynamic_pointer_cast<const ngraph::op::v5::GRUSequence>(node)) {
                    return false;
-                } else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::opset4::LSTMCell>(
-                        node)) {
-                    return lstm_cell->get_clip() == 0.0f &&
-                           lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
-                } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::opset1::LSTMCell>(
-                        node)) {
-                    return lstm_cell_v1->get_clip() == 0.0f &&
-                           lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
+                } else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::op::v4::LSTMCell>(node)) {
+                    return lstm_cell->get_clip() == 0.0f && lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
+                } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::op::v0::LSTMCell>(node)) {
+                    return lstm_cell_v1->get_clip() == 0.0f && lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
+                } else if (const auto &lstm_sequence = std::dynamic_pointer_cast<const ngraph::op::v5::LSTMSequence>(node)) {
+                    return lstm_sequence->get_clip() == 0.0f && lstm_sequence->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
                }
                return false;
            };

-            pass_config->set_callback<ngraph::pass::RNNCellDecomposition, ngraph::pass::GRUCellDecomposition,
-                    ngraph::pass::LSTMCellDecomposition>(
-                    [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
-                        return isCellPrimitiveSupported(node);
-                    });
+            pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
+                                      ngraph::pass::ConvertGRUSequenceToTensorIterator,
+                                      ngraph::pass::ConvertLSTMSequenceToTensorIterator,
+                                      ngraph::pass::RNNCellDecomposition,
+                                      ngraph::pass::GRUCellDecomposition,
+                                      ngraph::pass::LSTMCellDecomposition>(
+                [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
+                    return isCellPrimitiveSupported(node);
+                });

            pass_config->set_callback<ngraph::pass::ConvertTensorIteratorToRNNSequence,
-                    ngraph::pass::ConvertTensorIteratorToLSTMSequence,
-                    ngraph::pass::ConvertTensorIteratorToGRUSequence>(
-                    [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
-                        if (const auto& ti_op = std::dynamic_pointer_cast<const ngraph::op::TensorIterator>(node)) {
-                            size_t count_rnn = 0;
-                            for (const auto &op : ti_op->get_body()->get_ops())
-                                count_rnn += isCellPrimitiveSupported(op);
-                            return count_rnn != 1;
-                        }
-                        return true;
-                    });
-            manager.run_passes(nGraphFunc);
-
-            enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
-            if (enableInt8) {
-                const auto fp16_callback = [&baselineIsFP16](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
-                    if (!baselineIsFP16 && node->get_output_element_type(0) == ngraph::element::f16) {
-                        baselineIsFP16 = true;
+                                      ngraph::pass::ConvertTensorIteratorToLSTMSequence,
+                                      ngraph::pass::ConvertTensorIteratorToGRUSequence>(
+                [isCellPrimitiveSupported](const_node_ptr &node) -> bool {
+                    if (const auto& ti_op = std::dynamic_pointer_cast<const ngraph::op::TensorIterator>(node)) {
+                        size_t count_rnn = 0;
+                        for (const auto &op : ti_op->get_body()->get_ops())
+                            count_rnn += isCellPrimitiveSupported(op);
+                        return count_rnn != 1;
                    }
-
                    return true;
-                };
+                });

-                ngraph::pass::Manager conversion_manager;
-                // [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
-                conversion_manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
-                conversion_manager.set_callback(fp16_callback);
-                conversion_manager.run_passes(nGraphFunc);
-            }
+            pass_config->set_callback<ngraph::pass::ConvertNMS1ToNMS5,
+                                      ngraph::pass::ConvertNMS3ToNMS5,
+                                      ngraph::pass::ConvertNMS4ToNMS5,
+                                      ngraph::pass::ConvertNMSToNMSIEInternal>(
+                    [](const_node_ptr &node) -> bool {
+                        return node->input_value(0).get_shape().back() == 4lu &&
+                               node->input_value(0).get_shape().front() == node->input_value(1).get_shape().front() &&
+                               node->input_value(0).get_shape()[1] == node->input_value(1).get_shape().back() &&
+                               node->input_value(0).get_shape().size() == 3lu &&
+                               node->input_value(1).get_shape().size() == 3lu;
+                    });
+
+            // List of enabled/disabled transformations
+            pass_config->disable<ngraph::pass::ConvertGELU>();
+            pass_config->disable<ngraph::pass::ConvertMod>();
+            pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
+            pass_config->disable<ngraph::pass::HSwishDecomposition>();
+            pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
+            pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
+            pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
+            pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
+            pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
+
+            pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
+
+            manager.run_passes(nGraphFunc);
        }

-        using namespace ngraph::pass::low_precision;
+        bool enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
        if (enableInt8) {
-            auto params = LayerTransformation::Params(
-                true,  // updatePrecisions
-                LayerTransformation::QuantizedTensorAlignment::UpdateLevel,  // quantizedTensorAlignmentOnActivations
-                LayerTransformation::QuantizedTensorAlignment::None,  // quantizedTensorAlignmentOnWeights
-                true);  // supportAsymmetricQuantization
+            using namespace ngraph::pass::low_precision;
+            ngraph::pass::Manager conversion_manager;
+            // [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
+            conversion_manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
+            conversion_manager.run_passes(nGraphFunc);
+            auto params = LayerTransformation::Params(true,                                                        // updatePrecisions
+                                                      LayerTransformation::QuantizedTensorAlignment::UpdateLevel,  // quantizedTensorAlignmentOnActivations
+                                                      LayerTransformation::QuantizedTensorAlignment::None,         // quantizedTensorAlignmentOnWeights
+                                                      true);                                                       // supportAsymmetricQuantization
            LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
                .add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false)));

            transformer.transform(nGraphFunc);
        }

-        const auto reshape_fc_callback = [](const std::shared_ptr<const ::ngraph::Node>& node) -> bool {
-            return node->input_value(0).get_shape().size() <= 3lu;
-        };
-
        {
-            ngraph::pass::Manager manager = ngraph::pass::Manager();
-            manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
+            ngraph::pass::Manager manager;
+            // This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
+            // TODO: check why we have these reshapes
+            manager.register_pass<ngraph::pass::ConstantFolding>();
            manager.register_pass<ngraph::pass::UnrollTensorIterator>();
-            manager.set_callback(transformations_callback);
-            auto pass_config = manager.get_pass_config();
-            pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(reshape_fc_callback);
            manager.run_passes(nGraphFunc);
        }
-
-        clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
    }
-
-    auto implNetwork = std::dynamic_pointer_cast<InferenceEngine::details::CNNNetworkImpl>(clonedNetwork);
-    if (implNetwork) {
-        // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
-        ConstTransformer transformator(implNetwork.get());
-        transformator.fullTrim();
-    }
-
-    if (baselineIsFP16) {
-        // [WA part1] Store 'lpt_back_to_fp16' flag to convert FP32 operations to original FP16 after LPT
-        InputsDataMap inputsMap;
-        clonedNetwork->getInputsInfo(inputsMap);
-
-        if (!inputsMap.empty()) {
-            auto input0 = getInputTo(inputsMap.begin()->second->getInputData());
-            input0.begin()->second->params["lpt_back_to_fp16"];
-        }
-    }
-
    return clonedNetwork;
 }

 clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
    _pluginName = "GPU";
    _impl = std::make_shared<impl>();
-
+    RegisterPrimitives();
    // try loading clDNN engine and get info from it
    {
        cldnn::device_query device_query;
@ -333,6 +348,15 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
    }
 };

+void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const {
+    auto device_info = GetDeviceInfo(params);
+    conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
+    conf.UpdateFromMap(params);
+    if (conf.enableDynamicBatch) {
+        conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
+    }
+}
+
 ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
                                                               const std::map<std::string, std::string> &config) {
    // verification of supported input
@ -340,13 +364,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
    check_inputs(_networkInputs);

    CLDNNPlugin::Config conf = _impl->m_config;
-    auto device_info = GetDeviceInfo(config);
-    conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
-    conf.UpdateFromMap(config);
-
-    if (conf.enableDynamicBatch) {
-        conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
-    }
+    UpdateConfig(conf, network, config);

    CLDNNRemoteCLContext::Ptr context;

@ -379,7 +397,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn

    context = m_defaultContext;

-    InferenceEngine::CNNNetwork transformedNetwork(CloneAndTransformNetwork(network, conf));
+    auto transformedNetwork = CloneAndTransformNetwork(network, conf);
    return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
 }

@ -395,15 +413,9 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
    }

    CLDNNPlugin::Config conf = getContextImpl(casted)->GetConfig();
-    auto device_info = GetDeviceInfo(config);
-    conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
-    conf.UpdateFromMap(config);
+    UpdateConfig(conf, network, config);

-    if (conf.enableDynamicBatch) {
-        conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
-    }
-
-    InferenceEngine::CNNNetwork transformedNetwork(CloneAndTransformNetwork(network, conf));
+    auto transformedNetwork = CloneAndTransformNetwork(network, conf);
    return std::make_shared<CLDNNExecNetwork>(transformedNetwork, casted, conf);
 }

@ -440,85 +452,101 @@ void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
 QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
                                             const std::map<std::string, std::string>& config) const {
    QueryNetworkResult res;
-    GetDeviceInfo(config);      // Verify device id
+    CLDNNPlugin::Config conf = _impl->m_config;
+    UpdateConfig(conf, network, config);
+
+    Program prog;
    auto function = network.getFunction();
-    if (function != nullptr) {
-        std::unordered_set<std::string> originalOps;
-        for (auto&& node : function->get_ops()) {
-            originalOps.emplace(node->get_friendly_name());
+    if (function == nullptr) {
+        THROW_IE_EXCEPTION << "CNNetworkImpl representation is not supported anymore";
+    }
+
+    std::unordered_set<std::string> originalOpNames;
+    auto originalOps = function->get_ops();
+    for (auto&& node : originalOps) {
+        originalOpNames.emplace(node->get_friendly_name());
+    }
+
+    auto clonedNetwork = CloneAndTransformNetwork(network, conf);
+    auto ops = clonedNetwork.getFunction()->get_ordered_ops();
+    std::unordered_set<std::string> supported;
+    std::unordered_set<std::string> unsupported;
+
+    std::unordered_set<std::string> splitNames;
+    std::unordered_set<std::string> concatNames;
+    std::unordered_set<std::string> constantsNames;
+    std::unordered_set<std::string> depLayerNames;
+
+    std::vector<std::shared_ptr<ngraph::Node>> splits;
+    std::vector<std::shared_ptr<ngraph::Node>> concats;
+    std::vector<std::shared_ptr<ngraph::Node>> constants;
+    std::vector<std::shared_ptr<ngraph::Node>> nextLayerDependent;
+
+    auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) {
+        if (ngraph::is_type<const ngraph::op::v0::DetectionOutput>(node) ||
+            ngraph::is_type<const ngraph::op::v0::PriorBox>(node) ||
+            ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) ||
+            ngraph::is_type<const ngraph::op::v0::Proposal>(node)) {
+            return false;
+        } else if (ngraph::is_type<const ngraph::op::v1::Split>(node)) {
+            splitNames.emplace(node->get_friendly_name());
+            splits.push_back(node);
+            return false;
+        } else if (ngraph::is_type<const ngraph::op::v0::Concat>(node)) {
+            concatNames.emplace(node->get_friendly_name());
+            concats.push_back(node);
+            return false;
+        } else if (ngraph::is_type<const ngraph::op::v1::Reshape>(node) ||
+                   ngraph::is_type<const ngraph::op::v0::Squeeze>(node) ||
+                   ngraph::is_type<const ngraph::op::v0::Unsqueeze>(node) ||
+                   ngraph::is_type<const ngraph::op::v1::Transpose>(node)) {
+            depLayerNames.emplace(node->get_friendly_name());
+            nextLayerDependent.push_back(node);
+            return false;
+        } else if (ngraph::is_type<const ngraph::op::v0::Constant>(node)) {
+            constantsNames.emplace(node->get_friendly_name());
+            constants.push_back(node);
+            return false;
+        } else if (prog.IsOpSupported(network, node) &&
+                   !ngraph::op::is_parameter(node) &&
+                   !ngraph::op::is_output(node)) {
+            return true;
+        } else {
+            return false;
        }
-        auto clonedNetwork = CloneAndTransformNetwork(network, _impl->m_config);
-        std::unordered_set<std::string> supported;
-        std::unordered_set<std::string> unsupported;
+    };

-        std::unordered_set<std::string> splitNames;
-        std::unordered_set<std::string> concatNames;
-        std::unordered_set<std::string> depLayerNames;
-
-        std::vector<std::shared_ptr<ngraph::Node>> splits;
-        std::vector<std::shared_ptr<ngraph::Node>> concats;
-        std::vector<std::shared_ptr<ngraph::Node>> nextLayerDependent;
-
-        for (InferenceEngine::details::CNNNetworkIterator itLayer{clonedNetwork.get()};
-             itLayer != InferenceEngine::details::CNNNetworkIterator();
-             itLayer++) {
-            auto layerIsSupported = [&] {
-                auto node = (*itLayer)->getNode();
-                if (std::dynamic_pointer_cast<const ::ngraph::opset3::DetectionOutput>(node) != nullptr ||
-                    std::dynamic_pointer_cast<const ::ngraph::opset3::PriorBox>(node) != nullptr ||
-                    std::dynamic_pointer_cast<const ::ngraph::opset3::PriorBoxClustered>(node) != nullptr ||
-                    std::dynamic_pointer_cast<const ::ngraph::opset3::Proposal>(node) != nullptr) {
-                    return false;
-                } else if (std::dynamic_pointer_cast<const ::ngraph::opset3::Split>(node) != nullptr) {
-                    splitNames.emplace(node->get_friendly_name());
-                    splits.push_back(node);
-                    return false;
-                } else if (std::dynamic_pointer_cast<const ::ngraph::opset3::Concat>(node) != nullptr) {
-                    concatNames.emplace(node->get_friendly_name());
-                    concats.push_back(node);
-                    return false;
-                } else if (std::dynamic_pointer_cast<const ::ngraph::opset3::Reshape>(node) != nullptr ||
-                           std::dynamic_pointer_cast<const ::ngraph::opset3::Squeeze>(node) != nullptr ||
-                           std::dynamic_pointer_cast<const ::ngraph::opset3::Unsqueeze>(node) != nullptr ||
-                           std::dynamic_pointer_cast<const ::ngraph::opset3::Transpose>(node) != nullptr ||
-                           ngraph::op::is_constant(node)) {
-                    depLayerNames.emplace(node->get_friendly_name());
-                    nextLayerDependent.push_back(node);
-                    return false;
-                } else if (CLDNNGraph::IsLayerSupported((*itLayer)->type)) {
-                    return true;
+    // Get ops after transformations and check if it's supported
+    // Transformations might lead to the situation when single node is merged to multiple operations,
+    // so we mark original op as supported only if all nodes that it was merged into are supported
+    for (auto&& op : ops) {
+        for (auto&& fusedLayerName : ngraph::getFusedNamesVector(op)) {
+            if (InferenceEngine::details::contains(originalOpNames, fusedLayerName)) {
+                if (layerIsSupported(op)) {
+                    supported.emplace(fusedLayerName);
                } else {
-                    return false;
-                }
-            }();
-            const auto fusedNode = (*itLayer)->getNode();
-            if (fusedNode == nullptr) {
-                // skip layers completely generated by IR transformation
-                continue;
-            }
-            for (auto&& fusedLayerName : ngraph::getFusedNamesVector(fusedNode)) {
-                if (InferenceEngine::details::contains(originalOps, fusedLayerName)) {
-                    if (layerIsSupported) {
-                        supported.emplace(fusedLayerName);
-                    } else {
-                        unsupported.emplace(fusedLayerName);
-                    }
+                    unsupported.emplace(fusedLayerName);
                }
            }
        }
+    }

-        for (auto&& layerName : supported) {
-            if (InferenceEngine::details::contains(unsupported, layerName)) {
-                supported.erase(layerName);
-            }
+    for (auto&& layerName : supported) {
+        if (InferenceEngine::details::contains(unsupported, layerName)) {
+            supported.erase(layerName);
        }
-        unsupported.clear();
+    }
+    unsupported.clear();

-        for (const auto & split : splits) {
-            bool is_supported = true;
-            const auto outputs = split->outputs();
-            for (const auto& output : outputs) {
-                const auto& name = output.get_node()->get_friendly_name();
+    // Check set of heuristics to produce more efficient hetero sub-graph. Note: checks order is important.
+    // 1. Split is marked as supported when all output ops can be offloaded to GPU
+    for (const auto & op : splits) {
+        bool is_supported = true;
+        for (size_t i = 0; i < op->get_output_size(); i++) {
+            auto outTensors = op->get_output_target_inputs(i);
+            for (auto& t : outTensors) {
+                auto output = t.get_node();
+                const auto& name = output->get_friendly_name();
                if (!InferenceEngine::details::contains(supported, name) &&
                    !InferenceEngine::details::contains(depLayerNames, name) &&
                    !InferenceEngine::details::contains(concatNames, name) &&
@ -527,69 +555,97 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
                    break;
                }
            }
-            if (is_supported) {
-                supported.emplace(split->get_friendly_name());
+        }
+        if (is_supported) {
+            supported.emplace(op->get_friendly_name());
+        }
+    }
+
+    // 2. Concat is marked as supported when all inputs can be offloaded to GPU
+    for (const auto& op : concats) {
+        bool is_supported = true;
+        for (size_t i = 0; i < op->get_input_size(); i++) {
+            auto input = op->get_input_node_shared_ptr(i);
+            const auto& name = input->get_friendly_name();
+            if (!InferenceEngine::details::contains(supported, name) &&
+                !InferenceEngine::details::contains(depLayerNames, name) &&
+                !InferenceEngine::details::contains(concatNames, name)) {
+                is_supported = false;
+                break;
            }
        }
+        if (is_supported) {
+            supported.emplace(op->get_friendly_name());
+        }
+    }

-        for (const auto& concat : concats) {
-            bool is_supported = true;
-            const auto inputs = concat->inputs();
-            for (const auto& input : inputs) {
-                const auto& name = input.get_node()->get_friendly_name();
-                if (!InferenceEngine::details::contains(supported, name) &&
-                    !InferenceEngine::details::contains(depLayerNames, name) &&
-                    !InferenceEngine::details::contains(concatNames, name)) {
-                    is_supported = false;
-                    break;
-                }
-            }
-            if (is_supported) {
-                supported.emplace(concat->get_friendly_name());
+    // 3. Some layers are marked as supported when all inputs and outputs can be offloaded to GPU
+    for (const auto& op : nextLayerDependent) {
+        bool is_supported = true;
+        // both inputs and output should be GPU to remain on GPU
+        for (size_t i = 0; i < op->get_input_size(); i++) {
+            auto input = op->get_input_node_shared_ptr(i);
+            const auto& name = input->get_friendly_name();
+            // All inputs must be supported or be a constant
+            if (!InferenceEngine::details::contains(supported, name) && !InferenceEngine::details::contains(constantsNames, name)) {
+                is_supported = false;
+                break;
            }
        }
-
-        for (const auto& cnl : nextLayerDependent) {
-            bool is_supported = true;
-            // both inputs and output should be GPU to remain on GPU
-            const auto inputs = cnl->inputs();
-            for (const auto& input : inputs) {
-                const auto& name = input.get_node()->get_friendly_name();
+        for (size_t i = 0; i < op->get_output_size(); i++) {
+            auto outTensors = op->get_output_target_inputs(i);
+            for (auto& t : outTensors) {
+                auto output = t.get_node();
+                const auto& name = output->get_friendly_name();
                if (!InferenceEngine::details::contains(supported, name)) {
                    is_supported = false;
                    break;
                }
            }
-            const auto outputs = cnl->outputs();
-            for (const auto& output : outputs) {
-                const auto& name = output.get_node()->get_friendly_name();
+        }
+        if (is_supported) {
+            supported.emplace(op->get_friendly_name());
+        }
+    }
+
+    // 4. Constants are marked as supported when all outputs can be offloaded to GPU
+    for (const auto& op : constants) {
+        bool is_supported = true;
+        for (size_t i = 0; i < op->get_output_size(); i++) {
+            auto outTensors = op->get_output_target_inputs(i);
+            for (auto& t : outTensors) {
+                auto output = t.get_node();
+                const auto& name = output->get_friendly_name();
                if (!InferenceEngine::details::contains(supported, name)) {
                    is_supported = false;
                    break;
                }
            }
-            if (is_supported) {
-                supported.emplace(cnl->get_friendly_name());
+        }
+        if (is_supported) {
+            supported.emplace(op->get_friendly_name());
+        }
+    }
+
+    // Mark original constants/parameters/results ops as supported for each supported operation
+    // since rt_info doesn't contain names of constant that are removed during constant folding
+    for (auto&& node : originalOps) {
+        if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
+            for (auto&& inputNodeOutput : node->input_values()) {
+                if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) {
+                    supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
+                }
+            }
+            for (auto&& outputs : node->outputs()) {
+                for (auto&& outputNodeInput : outputs.get_target_inputs()) {
+                    if (ngraph::op::is_output(outputNodeInput.get_node())) {
+                        supported.emplace(outputNodeInput.get_node()->get_friendly_name());
+                    }
+                }
            }
        }

-        for (auto&& node : function->get_ops()) {
-            if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
-                for (auto&& inputNodeOutput : node->input_values()) {
-                    if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) {
-                        supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
-                    }
-                }
-                for (auto&& outputs : node->outputs()) {
-                    for (auto&& outputNodeInput : outputs.get_target_inputs()) {
-                        if (ngraph::op::is_output(outputNodeInput.get_node())) {
-                            supported.emplace(outputNodeInput.get_node()->get_friendly_name());
-                        }
-                    }
-                }
-            }
-
-            if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
+        if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
                if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
                    supported.erase(node->get_friendly_name());
                }
@ -598,69 +654,10 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
                    supported.erase(node->get_friendly_name());
                }
            }
-        }
+    }

-        for (auto&& layerName : supported) {
-            res.supportedLayersMap.emplace(layerName, GetName());
-        }
-    } else {
-        std::vector<CNNLayer::Ptr> concats;
-        std::vector<CNNLayer::Ptr> nextLayerDependent;
-        std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
-        for (auto layer : sortedLayers) {
-            if (CaselessEq<std::string>()(layer->type, "DetectionOutput")) {
-            } else if (CaselessEq<std::string>()(layer->type, "PriorBox")) {
-            } else if (CaselessEq<std::string>()(layer->type, "Proposal")) {
-            } else if (CaselessEq<std::string>()(layer->type, "SimplerNMS")) {
-            } else if (CaselessEq<std::string>()(layer->type, "Concat")) {
-                concats.push_back(layer);
-            } else if (CaselessEq<std::string>()(layer->type, "reshape")) {
-                nextLayerDependent.push_back(layer);
-            } else if (CaselessEq<std::string>()(layer->type, "permute")) {
-                nextLayerDependent.push_back(layer);
-            } else if (CaselessEq<std::string>()(layer->type, "Const")) {
-                nextLayerDependent.push_back(layer);
-            } else if (CLDNNGraph::IsLayerSupported(layer->type)) {
-                res.supportedLayersMap.insert({ layer->name, GetName() });
-            }
-        }
-        // evaluation of concats - if all parent layers are supported, only in this case we
-        // will mark concat as a supported for GPU
-        for (const auto& concat : concats) {
-            // take all parrents.
-            bool supported = true;
-            for (DataWeakPtr insData : concat->insData) {
-                CNNLayerPtr prev = getCreatorLayer(insData.lock()).lock();
-                // verify if previous layer is not supported or if it in the list of not defined layers yet
-                // not defined layers are treated as layers which will be assigned to GPU if next layer is assigned to GPU
-                if (res.supportedLayersMap.find(prev->name) == res.supportedLayersMap.end()
-                    && std::find(nextLayerDependent.begin(), nextLayerDependent.end(), prev) == nextLayerDependent.end()) {
-                    supported = false;
-                }
-            }
-            if (supported) {
-                res.supportedLayersMap.insert({ concat->name, GetName() });
-            }
-        }
-
-        // evaluation of constant blobs - if all consumers are on GPU,
-        // then leave it on GPU, else - move to other device
-        for (auto cnl = nextLayerDependent.rbegin();
-            cnl != nextLayerDependent.rend();
-            cnl++) {
-            bool supported = true;
-            for (DataPtr out : (*cnl)->outData) {
-                for (auto ol : getInputTo(out)) {
-                    if (res.supportedLayersMap.find(ol.second->name) == res.supportedLayersMap.end()) {
-                        supported = false;
-                    }
-                }
-            }
-
-            if (supported) {
-                res.supportedLayersMap.insert({ (*cnl)->name, GetName() });
-            }
-        }
+    for (auto&& layerName : supported) {
+        res.supportedLayersMap.emplace(layerName, GetName());
    }

    return res;
--- a/inference-engine/src/cldnn_engine/cldnn_engine.h
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.h
@ -16,7 +16,7 @@ namespace CLDNNPlugin {
 using CLDNNCustomLayerPtr = std::shared_ptr<class CLDNNCustomLayer>;

 class clDNNEngine : public InferenceEngine::InferencePluginInternal,
-    public gpu::details::param_map_obj_getter {
+                    public InferenceEngine::gpu::details::param_map_obj_getter {
    struct impl;
    std::shared_ptr<impl> _impl;

@ -27,8 +27,11 @@ class clDNNEngine : public InferenceEngine::InferencePluginInternal,
    CLDNNRemoteCLContext::Ptr m_defaultContext;

    cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
-    InferenceEngine::ICNNNetwork::Ptr CloneAndTransformNetwork(const InferenceEngine::ICNNNetwork& network,
-                                                               CLDNNPlugin::Config config) const;
+    InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
+                                                         const CLDNNPlugin::Config& config) const;
+
+    void RegisterPrimitives();
+    void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const;
 public:
    clDNNEngine();

@ -46,7 +49,7 @@ public:
                                                     const std::map<std::string, std::string>& config) const override;

    InferenceEngine::RemoteContext::Ptr CreateContext(const InferenceEngine::ParamMap& params) override;
-    InferenceEngine::RemoteContext::Ptr GetDefaultContext(const ParamMap& params) override;
+    InferenceEngine::RemoteContext::Ptr GetDefaultContext(const InferenceEngine::ParamMap& params) override;
 };

 };  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@ -16,8 +16,6 @@

 #include <description_buffer.hpp>
 #include <cldnn/cldnn_config.hpp>
-#include <legacy/graph_tools.hpp>
-#include <legacy/net_pass.h>
 #include "cldnn_infer_request.h"
 #include <threading/ie_executor_manager.hpp>
 #include "cldnn_async_infer_request.h"
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.h
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.h
@ -12,7 +12,6 @@
 #include <utility>
 #include "ie_blob.h"
 #include "cpp/ie_cnn_network.h"
-#include "debug_options.h"
 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
 #include "cldnn_graph.h"
 #include "cldnn_config.h"
@ -24,7 +23,7 @@ class CLDNNExecNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefa
 public:
    typedef std::shared_ptr<CLDNNExecNetwork> Ptr;

-    CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, RemoteContext::Ptr context, Config config);
+    CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, Config config);

    InferenceEngine::CNNNetwork GetExecGraphInfo() override;
    InferenceEngine::IInferRequest::Ptr CreateInferRequest() override;
@ -33,11 +32,10 @@ public:

    InferenceEngine::Parameter GetMetric(const std::string &name) const override;
    InferenceEngine::Parameter GetConfig(const std::string &name) const override;
-    RemoteContext::Ptr GetContext() const override;
-
+    InferenceEngine::RemoteContext::Ptr GetContext() const override;

    std::vector<std::shared_ptr<CLDNNGraph>> m_graphs;
-    gpu::ClContext::Ptr m_context;
+    InferenceEngine::gpu::ClContext::Ptr m_context;
    Config m_config;
    InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
 };
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@ -17,8 +17,6 @@
 #include "simple_math.h"
 #include <description_buffer.hpp>
 #include <cldnn/cldnn_config.hpp>
-#include <legacy/graph_tools.hpp>
-#include <legacy/net_pass.h>
 #include "cldnn_infer_request.h"
 #include <threading/ie_executor_manager.hpp>
 #include <fstream>
@ -69,12 +67,12 @@ void CLDNNGraph::Build() {
    if (GetMaxDynamicBatchSize() > 1) {
        int m_bv_sz = m_program->GetMaxBatchSizeForSingleProgram();
        for (int b = m_bv_sz - 1; b >= 0; b--) {
-            auto network = BuildNetwork(m_program->getCompiledProgram(b));
+            auto network = BuildNetwork(m_program->GetCompiledProgram(b));
            m_networks.insert(m_networks.begin(), network);
            GetEngine()->release_pending_memory(network->get_id());
        }
    } else {
-        auto network = BuildNetwork(m_program->getCompiledProgram());
+        auto network = BuildNetwork(m_program->GetCompiledProgram());
        m_networks.emplace_back(network);
        GetEngine()->release_pending_memory(network->get_id());
    }
@ -131,6 +129,7 @@ InferenceEngine::CNNNetwork CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::ve
        }
    };

+    // TODO: Adjust output layer names to be aligned with ngraph and add new ops
    auto to_IE_type_name = [](const std::string& cldnn_name) -> std::string{
        static std::map<std::string, std::string> type_n2l {
                { "activation", "Activation" },
@ -748,6 +747,9 @@ std::string CLDNNGraph::MapOutputName(std::string outName) const {
    auto allPrimitiveIds = GetNetwork()->get_all_primitives();

    // Find correct output ID. Start with name stored in IR.
+    if (primitiveIDs.find(outName) == primitiveIDs.end()) {
+        THROW_IE_EXCEPTION << "output with name " << outName << " was not found in primitiveIDs";
+    }
    std::string outputID = primitiveIDs.at(outName);
    while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) {
        // If current ID isn't found in cldnn network outputs, get previous primitive id and try again.
--- a/inference-engine/src/cldnn_engine/cldnn_graph.h
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.h
@ -16,17 +16,10 @@
 #include <utility>
 #include "ie_blob.h"
 #include "cpp/ie_cnn_network.h"
-#include "debug_options.h"
+
 #include <api/network.hpp>
-#include <api/memory.hpp>
-#include <api/primitive.hpp>
 #include <api/topology.hpp>
-#include <api/pooling.hpp>
-#include <api/eltwise.hpp>
-#include <api/concatenation.hpp>
-#include <api/detection_output.hpp>
-#include <api/softmax.hpp>
-#include <api/resample.hpp>
+
 #include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
 #include "cldnn_custom_layer.h"
 #include "cldnn_config.h"
@ -39,24 +32,20 @@ class CLDNNGraph {
 public:
    typedef std::shared_ptr<CLDNNGraph> Ptr;

-    CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
+    CLDNNGraph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
    explicit CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id = 0);
    InferenceEngine::CNNNetwork GetExecGraphInfo();

    bool IsLoaded() const;

-    static bool IsLayerSupported(const std::string& type) {
-        return Program::LayerTypeFromStr(type) != Program::NO_TYPE;
-    }
-
    void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfMap) const;
    void UpdatePerfStatistics();

    const Config& getConfig() const { return m_config; }
-    gpu::ClContext::Ptr GetContext() { return m_context; }
+    InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
    std::shared_ptr<const cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
    int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
-    const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->getInputLayouts(); }
+    const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
    size_t GetNetworksCount() const { return m_networks.size(); }
    std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const;
    InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
@ -67,7 +56,7 @@ protected:
    std::string m_networkName;
    Config m_config;

-    gpu::ClContext::Ptr m_context;
+    InferenceEngine::gpu::ClContext::Ptr m_context;
    std::vector<std::shared_ptr<cldnn::network>> m_networks;
    std::map<std::string, cldnn::primitive_id> primitiveIDs;
    std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
--- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
@ -273,7 +273,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
    size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
    size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;

-    cldnn::primitive_id internalName = "input:" + inputName;
+    cldnn::primitive_id internalName = "parameter:" + inputName;
    auto locked = inputBlob.cbuffer();
    switch (inputBlob.getTensorDesc().getPrecision()) {
    case Precision::FP32: {
@ -562,6 +562,7 @@ void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) {
 }

 void CLDNNInferRequest::AllocateInputs() {
+    auto inputLayouts = m_graph->GetInputLayouts();
    // allocate inputs
    for (auto& ni : _networkInputs) {
        std::string name = ni.first;
@ -572,8 +573,14 @@ void CLDNNInferRequest::AllocateInputs() {
            cldnn::primitive_id YName(name + "_Y");
            cldnn::primitive_id UVName(name + "_UV");

-            input_alloc(YName, m_graph->GetInputLayouts().at(YName));
-            input_alloc(UVName, m_graph->GetInputLayouts().at(UVName));
+            if (inputLayouts.find(YName) == inputLayouts.end()) {
+                THROW_IE_EXCEPTION << "Input layout for " << YName << " is not found";
+            }
+            if (inputLayouts.find(UVName) == inputLayouts.end()) {
+                THROW_IE_EXCEPTION << "Input layout for " << UVName << " is not found";
+            }
+            input_alloc(YName, inputLayouts.at(YName));
+            input_alloc(UVName, inputLayouts.at(UVName));

            size_t height = desc.getDims()[2], width = desc.getDims()[3];
            cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>();
@ -586,7 +593,10 @@ void CLDNNInferRequest::AllocateInputs() {

            _inputs[name] = make_shared_blob<NV12Blob>(blobY, blobUV);
        } else {
-            cldnn::layout layout = m_graph->GetInputLayouts().at(name);
+            if (inputLayouts.find(name) == inputLayouts.end()) {
+                THROW_IE_EXCEPTION << "Input layout for " << name << " is not found";
+            }
+            cldnn::layout layout = inputLayouts.at(name);
            input_alloc(name, layout);
            cldnn::pointer<uint8_t> mem_ptr = inputsMemory.at(name).pointer<uint8_t>();
            _inputs[name] = createInputBlob(desc, mem_ptr.data());
@ -907,7 +917,7 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
        return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size());
    };

-    cldnn::primitive_id internalName = "input:" + inputName;
+    cldnn::primitive_id internalName = "parameter:" + inputName;
    const cldnn::memory& memory = inputsMemory.at(inputName);
    auto _nw_ptr = m_graph->GetNetwork();
    auto prec = inputBlob.getTensorDesc().getPrecision();
--- a/inference-engine/src/cldnn_engine/cldnn_lstm.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_lstm.cpp
@ -1,585 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <string>
-#include <vector>
-#include <sstream>
-#include <utility>
-#include <api/cldnn.hpp>
-#include <api/data.hpp>
-#include <api/mutable_data.hpp>
-#include <api/reorder.hpp>
-#include <api/fully_connected.hpp>
-#include <api/concatenation.hpp>
-#include <api/reshape.hpp>
-#include <api/permute.hpp>
-#include <api/split.hpp>
-#include <api/crop.hpp>
-#include <api/reverse_sequence.hpp>
-#include <api/lstm.hpp>
-#include <api/lstm_dynamic.hpp>
-#include "cldnn_common_utils.h"
-#include "cldnn_program.h"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-
-namespace CLDNNPlugin {
-
-std::string get_string_id(size_t i) {
-    std::stringstream ss;
-    ss << std::setw(5) << std::setfill('0') << i;
-    return ss.str();
-}
-
-void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    int lstm_batch_size, lstm_input_size, lstm_hidden_size;
-    bool hasBias = false;
-    auto inputPrimitives = GetPrevLayersPrimitives(layer);
-
-    std::string layerName = layer_type_name_ID(layer);
-    cldnn::primitive_id weightID = layerName + m_weightsTag;
-    cldnn::primitive_id biasID = layerName + m_biasesTag;
-
-    /* check incoming CNN layer and setup required variables */
-    {
-        auto in_data0 = layer->insData[0].lock();
-        if (!in_data0)
-            THROW_IE_EXCEPTION << "Missing first input for LSTMCell layer " << layer->name;
-
-        const auto in_dims0 = in_data0->getTensorDesc().getDims();
-        const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims();
-
-        lstm_input_size = in_dims0.back();
-        lstm_batch_size = in_dims0.at(in_dims0.size()-2);
-        lstm_hidden_size = out_dims0.back();
-
-        auto in_data1 = layer->insData[1].lock();
-        if (!in_data1)
-            THROW_IE_EXCEPTION << "Missing second input for LSTMCell layer " << layer->name;
-
-        auto in_data2 = layer->insData[2].lock();
-        if (!in_data2)
-            THROW_IE_EXCEPTION << "Missing third input for LSTMCell layer " << layer->name;
-
-        if (in_dims0.size() != 2 ||
-            in_data1->getTensorDesc().getDims().size() != 2 ||
-            in_data2->getTensorDesc().getDims().size() != 2)
-            THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name;
-    }
-
-    /* Prepare weight/bias memory primitives */
-    {
-        auto wLayer = as<InferenceEngine::WeightableLayer *>(layer);
-        auto pWeightsBlob = wLayer->_weights;
-        cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(4 * lstm_hidden_size), cldnn::feature(1), cldnn::spatial(lstm_input_size + lstm_hidden_size, 1));
-        cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()), m_defaultFormat, wTensor);
-        weightID = CreatePrimitiveFromBlob(topology, weightID, pWeightsBlob, WLayout);
-
-        /* create bias memory primitive */
-        auto pBiasBlob = wLayer->_biases;
-        if (pBiasBlob != nullptr) {
-            cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
-            cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), m_defaultFormat, bTensor);
-
-            biasID = CreatePrimitiveFromBlob(topology, biasID, pBiasBlob, BLayout);
-            hasBias = true;
-        }
-    }
-
-    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
-    cldnn::primitive_id permuteID = layerName + "_inputReorder";
-    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
-    cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder";
-    cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape";
-    cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder";
-    cldnn::primitive_id concatID = layerName + "_inputConcat";
-
-    //  LSTM primitive works with single precision for all in/out/weights tensors
-    auto lstmPrecision = layer->outData[0]->getPrecision();
-
-    cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
-    cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
-    cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, inputShape);
-    cldnn::layout hiddenLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, hiddenStateShape);
-    topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
-    topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
-
-    AddInnerPrimitiveToProfiler(inReshapeID, layer->name, layer);
-    AddInnerPrimitiveToProfiler(permuteID, layer->name, layer);
-
-    std::string hiddenInResh = inHiddenReshapeID + "_1";
-    std::string hiddenInStr = inHiddenReorderID + "_1";
-    std::string cellInResh = inHiddenReshapeID + "_2";
-    std::string cellInStr = inHiddenReorderID + "_2";
-    topology.add(cldnn::reshape(hiddenInResh, inputPrimitives[1], hiddenStateShape));
-    topology.add(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout));
-    topology.add(cldnn::reshape(cellInResh, inputPrimitives[2], hiddenStateShape));
-    topology.add(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
-    topology.add(cldnn::concatenation(concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
-
-    AddInnerPrimitiveToProfiler(hiddenInResh, layer->name, layer);
-    AddInnerPrimitiveToProfiler(hiddenInStr, layer->name, layer);
-    AddInnerPrimitiveToProfiler(cellInResh, layer->name, layer);
-    AddInnerPrimitiveToProfiler(cellInStr, layer->name, layer);
-    AddInnerPrimitiveToProfiler(concatID, layer->name, layer);
-
-    cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
-    cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, gemmSz);
-    cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
-    cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
-
-    std::string lstm_fc_id = layerName + "_fully_connected";
-    std::string lstm_elt_id = layerName + "_lstm_elt";
-    std::string crop_id = layerName + "_crop";
-
-    topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : ""));
-    topology.add(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
-    topology.add(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
-    topology.add(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
-                                    0, 0, {}, {}, cldnn::lstm_weights_order::fizo));
-
-    AddInnerPrimitiveToProfiler(lstm_fc_id, layer->name, layer);
-    AddInnerPrimitiveToProfiler(gemmReshapeID, layer->name, layer);
-    AddInnerPrimitiveToProfiler(gemmReorderID, layer->name, layer);
-    AddInnerPrimitiveToProfiler(lstm_elt_id, layer->name, layer);
-
-    cldnn::primitive_id outputHiddenID = layerName;
-    topology.add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
-    AddInnerPrimitiveToProfiler(outputHiddenID, layer->name, layer);
-    cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
-    topology.add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
-    AddInnerPrimitiveToProfiler(outputCellID, layer->name, layer);
-
-    // output primitive IDs
-    primitiveIDs[outputHiddenID] = outputHiddenID;                                // LSTMCell:LSTMCell - "concat hidden"
-    primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = outputHiddenID;   // LSTMCell:LSTMCell:0 - hidden state
-    primitiveIDs[outputCellID] = outputCellID;                                    // LSTMCell:LSTMCell:1 - cell state
-
-    AddPrimitiveToProfiler(layerName, layer, outputHiddenID);
-}
-
-void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
-    bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true;
-    auto inputPrimitives = GetPrevLayersPrimitives(layer);
-
-    std::string layerName = layer_type_name_ID(layer);
-    cldnn::primitive_id weightID = layerName + m_weightsTag;
-    cldnn::primitive_id biasID = layerName + m_biasesTag;
-    auto rnnLayer = as<RNNSequenceLayer*> (layer);
-    bool permute_input = (1 != rnnLayer->axis);
-
-    /* check incoming CNN layer and setup required variables */
-    {
-        if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
-         THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
-
-        auto in_data0 = layer->insData[0].lock();
-        if (!in_data0)
-            THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name;
-
-        const auto in_dims0 = in_data0->getTensorDesc().getDims();
-        const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims();
-
-        /* do we have initial hidden and cell?
-        if blobs are not null, direct the data from them
-        into corresponding LSTM inputs */
-        auto in_data1 = layer->insData[1].lock();
-        if (in_data1) {
-            hasInitialHidden = true;
-        }
-
-        auto in_data2 = layer->insData[2].lock();
-        if (in_data2) {
-            hasInitialCell = true;
-        }
-
-        if (in_dims0.size() != 3 ||
-            in_data1->getTensorDesc().getDims().size() != 2 ||
-            in_data2->getTensorDesc().getDims().size() != 2)
-            THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name;
-
-        if (!permute_input) {
-            lstm_batch_size = in_dims0.front();
-            lstm_sequence_len = in_dims0[1];
-        } else {
-            lstm_batch_size = in_dims0[1];
-            lstm_sequence_len = in_dims0.front();
-        }
-
-        lstm_input_size = in_dims0.back();
-        lstm_hidden_size = out_dims0.back();
-
-        if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD)
-            THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name;
-        isForward = rnnLayer->direction == RNNSequenceLayer::FWD;
-    }
-
-    /* Prepare weight/bias memory primitives */
-    {
-        auto wLayer = as<InferenceEngine::WeightableLayer *>(layer);
-        auto pWeightsBlob = wLayer->_weights;
-        cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(4 * lstm_hidden_size), cldnn::feature(1), cldnn::spatial(lstm_input_size + lstm_hidden_size, 1));
-        cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()), m_defaultFormat, wTensor);
-        weightID = CreatePrimitiveFromBlob(topology, weightID, pWeightsBlob, WLayout);
-
-        /* create bias memory primitive */
-        auto pBiasBlob = wLayer->_biases;
-        if (pBiasBlob != nullptr) {
-            cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
-            cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), m_defaultFormat, bTensor);
-
-            biasID = CreatePrimitiveFromBlob(topology, biasID, pBiasBlob, BLayout);
-            hasBias = true;
-        }
-    }
-
-    std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> input_ids_offsets;
-    std::vector<cldnn::primitive_id> output_ids_offsets;
-
-    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
-    cldnn::primitive_id permuteID = layerName + "_inputReorder";
-    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
-
-    //  LSTM primitive works with single precision for all in/out/weights tensors
-    auto lstmPrecision = layer->outData[0]->getPrecision();
-
-    cldnn::tensor inputShape;
-
-    if (permute_input) {
-        inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 };
-    } else {
-        inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
-    }
-    cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
-    cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, inputShape);
-    topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
-    topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
-
-    topology.add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
-    topology.add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
-
-    AddInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
-    AddInnerPrimitiveToProfiler(permuteID, layerName, layer);
-    AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_1", layerName, layer);
-    AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_2", layerName, layer);
-
-    for (int i = 0; i < lstm_sequence_len; ++i)
-        input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
-
-    cldnn::primitive_id inputSplitID = layerName + "_inputSplit";
-
-    if (permute_input) {
-        topology.add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
-        AddInnerPrimitiveToProfiler(layerName + "_inputSwap", layerName, layer);
-        topology.add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
-    } else {
-        topology.add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
-    }
-    AddInnerPrimitiveToProfiler(inputSplitID, layerName, layer);
-
-    cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
-    cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, gemmSz);
-    cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
-    cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
-    std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : "";
-    std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : "";
-
-    for (int i = 0; i < lstm_sequence_len; ++i) {
-        std::string concatID = layerName + "_inputConcat" + get_string_id(i);
-        std::string lstm_fc_id = layerName + "_fully_connected" + get_string_id(i);
-        std::string lstm_fc_resh_id = layerName + "_gemmReshape" + get_string_id(i);
-        std::string lstm_fc_reor_id = layerName + "_gemmReorder" + get_string_id(i);
-        std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i);
-        std::string crop_id = layerName + "_crop" + get_string_id(i);
-
-        int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
-        if (hiddenStr != "") {
-            topology.add(cldnn::concatenation(concatID, { inputSplitID + ":" + get_string_id(seqIdx), hiddenStr },
-                            cldnn::concatenation::concatenation_axis::along_x));
-            AddInnerPrimitiveToProfiler(concatID, layerName, layer);
-            topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : ""));
-            AddInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
-            AddInnerPrimitiveToProfiler(inputSplitID + ":" + get_string_id(seqIdx), layerName, layer);
-        } else {
-            topology.add(cldnn::fully_connected(lstm_fc_id, inputSplitID + ":" + get_string_id(seqIdx), weightID, hasBias ? biasID : ""));
-            AddInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
-        }
-
-        topology.add(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
-        topology.add(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
-        topology.add(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id,
-                                            cellStr, 0, 0, {}, {},
-                                            cldnn::lstm_weights_order::fizo));
-        AddInnerPrimitiveToProfiler(lstm_fc_resh_id, layerName, layer);
-        AddInnerPrimitiveToProfiler(lstm_fc_reor_id, layerName, layer);
-        AddInnerPrimitiveToProfiler(lstm_elt_id, layerName, layer);
-
-        hiddenStr = crop_id + ":hidden";
-        cellStr = crop_id + ":cell";
-        topology.add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
-        AddInnerPrimitiveToProfiler(hiddenStr, layerName, layer);
-        output_ids_offsets.push_back(hiddenStr);
-
-        if (i < lstm_sequence_len - 1) {
-            topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
-            AddInnerPrimitiveToProfiler(cellStr, layerName, layer);
-        } else {
-            // last hidden state crop (output 2)
-            if (layer->outData.size() > 1) {
-                cldnn::primitive_id outputHiddenID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
-                primitiveIDs[hiddenStr] = hiddenStr;
-                primitiveIDs[outputHiddenID] = hiddenStr;
-            }
-
-            // last cell state crop (output 3)
-            if (layer->outData.size() > 2) {
-                topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
-                cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName();
-                AddInnerPrimitiveToProfiler(cellStr, layerName, layer);
-                primitiveIDs[outputCellID] = cellStr;
-            }
-        }
-    }
-
-    if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
-
-    if (permute_input) {
-        topology.add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
-        AddInnerPrimitiveToProfiler(layerName + "_outputConcat", layerName, layer);
-        topology.add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
-    } else {
-        topology.add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
-    }
-    primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = layerName;
-    AddPrimitiveToProfiler(layerName, layer);
-}
-
-void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
-    bool hasBias = false, reverseSeq = false;
-    auto inputPrimitives = GetPrevLayersPrimitives(layer);
-
-    auto lstmPrecision = layer->outData[0]->getPrecision();
-    auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(lstmPrecision));
-    std::string layerName = layer_type_name_ID(layer);
-    cldnn::primitive_id weightID = layerName + m_weightsTag;
-    cldnn::primitive_id recurrentID = weightID + "_recurrent";
-    cldnn::primitive_id biasID = layerName + m_biasesTag;
-    auto rnnLayer = as<RNNSequenceLayer*>(layer);
-    bool permute_input = (1 != rnnLayer->axis);
-    int32_t directions = 1;
-
-    /* check incoming CNN layer and setup required variables */
-    {
-        if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
-            THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
-
-        auto in_data0 = layer->insData[0].lock();
-        if (!in_data0)
-            THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name;
-
-        const auto in_dims0 = in_data0->getTensorDesc().getDims();
-        const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims();
-
-        auto in_data1 = layer->insData[1].lock();
-        auto in_data2 = layer->insData[2].lock();
-        auto in_data3 = layer->insData[3].lock();
-
-        if (in_dims0.size() != 3 ||
-            in_data1->getTensorDesc().getDims().size() != 2 ||
-            in_data2->getTensorDesc().getDims().size() != 2 ||
-            in_data3->getTensorDesc().getDims().size() != 1)
-            THROW_IE_EXCEPTION << "Wrong input shapes for dynamic RNN Layer " << layer->name;
-
-        if (!permute_input) {
-            lstm_batch_size = in_dims0.front();
-            lstm_sequence_len = in_dims0[1];
-        } else {
-            lstm_batch_size = in_dims0[1];
-            lstm_sequence_len = in_dims0.front();
-        }
-
-        lstm_input_size = in_dims0.back();
-        lstm_hidden_size = out_dims0.back();
-
-        if (rnnLayer->direction == RNNSequenceLayer::BDR) {
-            directions = 2;
-        } else {
-            reverseSeq = rnnLayer->direction == RNNSequenceLayer::BWD;
-        }
-    }
-
-    /* Prepare weight/bias memory primitives - split weight blob into W and R */
-    {
-        const size_t WchunkSz = lstm_input_size * elementSize;
-        const size_t RchunkSz = lstm_hidden_size * elementSize;
-
-        cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
-        cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
-        cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), m_defaultFormat, wTensor);
-        cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), m_defaultFormat, rTensor);
-
-        auto wLayer = as<InferenceEngine::WeightableLayer *>(layer);
-
-        {
-            auto pWeightsBlob = wLayer->_weights;
-            auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
-
-            auto wmem = cldnn::memory::allocate(*m_engine, WLayout);
-            auto wtmpPointer = wmem.pointer<char>();  // implicitly maps buffer - unmap in destructor
-
-            auto rmem = cldnn::memory::allocate(*m_engine, RLayout);
-            auto rtmpPointer = rmem.pointer<char>();
-
-            auto wBytes = wtmpPointer.data();
-            auto rBytes = rtmpPointer.data();
-
-            for (int h = 0; h < 4 * lstm_hidden_size; h++) {
-                // copy "input size" elements to W
-                for (size_t b = 0; b < WchunkSz; b++)
-                    *wBytes++ = *blobBytes++;
-
-                // copy "lstm_hidden_size" elements to R
-                for (size_t b = 0; b < RchunkSz; b++)
-                    *rBytes++ = *blobBytes++;
-            }
-
-            topology.add(cldnn::data(weightID, wmem));
-            topology.add(cldnn::data(recurrentID, rmem));
-        }
-
-        /* create bias memory primitive */
-        auto pBiasBlob = wLayer->_biases;
-        if (pBiasBlob != nullptr) {
-            cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(4 * lstm_hidden_size, 1));
-            cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), m_defaultFormat, bTensor);
-
-            auto bmem = cldnn::memory::allocate(*m_engine, BLayout);
-            auto btmpPointer = bmem.pointer<char>();
-
-            auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
-            const size_t BchunkSz = lstm_hidden_size * elementSize;
-            auto bBytes = btmpPointer.data();
-
-            for (size_t b = 0; b < 4 * BchunkSz; b++)
-                *bBytes++ = *blobBytes++;
-
-            topology.add(cldnn::data(biasID, bmem));
-            hasBias = true;
-        }
-    }
-
-    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
-    cldnn::primitive_id permuteID = layerName + "_inputReorder";
-    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
-
-    cldnn::tensor inputShape;
-
-    if (permute_input) {
-        inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, directions };
-    } else {
-        inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, directions };
-    }
-    cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, directions };
-    cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, inputShape);
-    topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
-    topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
-
-    AddInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
-    AddInnerPrimitiveToProfiler(permuteID, layerName, layer);
-
-    topology.add(cldnn::reshape(inHiddenReshapeID + "_1", inputPrimitives[1], hiddenStateShape));
-    topology.add(cldnn::reshape(inHiddenReshapeID + "_2", inputPrimitives[2], hiddenStateShape));
-
-    AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_1", layerName, layer);
-    AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_2", layerName, layer);
-
-    cldnn::primitive_id dynID = layerName + "_dynLength";
-    cldnn::primitive_id dynReshapeID = layerName + "_dynReshape";
-    cldnn::tensor dynShape = { 1, 1, lstm_batch_size, 1 };
-    cldnn::layout dynLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, dynShape);
-    topology.add(cldnn::reshape(dynReshapeID, inputPrimitives[3], dynShape));
-    topology.add(cldnn::reorder(dynID, dynReshapeID, dynLayout));
-
-    AddInnerPrimitiveToProfiler(dynReshapeID, layerName, layer);
-    AddInnerPrimitiveToProfiler(dynID, layerName, layer);
-
-    cldnn::primitive_id inputID = permuteID;
-    cldnn::primitive_id prevInputID = permuteID;
-
-    if (permute_input) {
-        inputID = layerName + "_inputSwap";
-        topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
-        prevInputID = inputID;
-        AddInnerPrimitiveToProfiler(inputID, layerName, layer);
-    }
-
-    cldnn::primitive_id seq_len_id = layer->name + "seq_lengths";
-    if (reverseSeq) {
-        inputID = layerName + "_inputReverse";
-        topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
-        primitivesToIRLayersMap[inputID] = { layer->name };
-        AddInnerPrimitiveToProfiler(inputID, layerName, layer);
-        prevInputID = inputID;
-    }
-
-    // last hidden state crop (output 2)
-    cldnn::primitive_id outputHiddenID = "", outputCellID = "";
-     if (layer->outData.size() > 1) {
-        outputHiddenID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
-        auto last_hidden_mem = cldnn::memory::allocate(*m_engine,
-        { DataTypeFromPrecision(lstmPrecision),
-            cldnn::format::bfyx, { lstm_batch_size, 1, lstm_hidden_size, directions } });
-        topology.add(cldnn::mutable_data(outputHiddenID, last_hidden_mem));
-        primitiveIDs[outputHiddenID] = outputHiddenID;
-    }
-
-    // last cell state crop (output 3)
-    if (layer->outData.size() > 2) {
-        outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName();
-        auto last_cell_mem = cldnn::memory::allocate(*m_engine,
-        { DataTypeFromPrecision(lstmPrecision),
-            cldnn::format::bfyx, { lstm_batch_size, 1, lstm_hidden_size, directions } });
-        topology.add(cldnn::mutable_data(outputCellID, last_cell_mem));
-        primitiveIDs[outputCellID] = outputCellID;
-    }
-
-    // main part - dLSTM primitive intself
-    cldnn::primitive_id dlstmID = layerName + "_dlstm";
-    topology.add(cldnn::lstm_dynamic(dlstmID, inputID, dynID,
-        weightID, recurrentID, outputHiddenID, outputCellID, biasID,
-        inHiddenReshapeID + "_1", inHiddenReshapeID + "_2"));
-    prevInputID = inputID = dlstmID;
-    AddInnerPrimitiveToProfiler(dlstmID, layerName, layer);
-
-    if (reverseSeq) {
-        inputID = layerName + "_outputReverse";
-        topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
-        AddInnerPrimitiveToProfiler(inputID, layerName, layer);
-        prevInputID = inputID;
-    }
-
-    if (permute_input) {
-        inputID = layerName + "_outputSwap";
-        topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
-        AddInnerPrimitiveToProfiler(inputID, layerName, layer);
-        prevInputID = inputID;
-    }
-
-    primitiveIDs[inputID] = inputID;
-    primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = inputID;
-    AddPrimitiveToProfiler(layerName, layer, inputID);
-}
-
-void Program::CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
-    if (layer->insData.size() > 3) {
-        CreateDynamicLSTM(topology, layer);
-    } else {
-        CreateRegularLSTM(topology, layer);
-    }
-}
-
-};  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
+++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp
@ -0,0 +1,206 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef REGISTER_FACTORY
+#error "REGISTER_FACTORY is not defined"
+#endif
+
+// ------------------------------ Supported v0 ops ------------------------------ //
+REGISTER_FACTORY(v0, Abs);
+REGISTER_FACTORY(v0, Acos);
+REGISTER_FACTORY(v0, Asin);
+REGISTER_FACTORY(v0, Atan);
+REGISTER_FACTORY(v0, Ceiling);
+REGISTER_FACTORY(v0, Clamp);
+REGISTER_FACTORY(v0, Concat);
+REGISTER_FACTORY(v0, Constant);
+REGISTER_FACTORY(v0, Convert);
+REGISTER_FACTORY(v0, Cos);
+REGISTER_FACTORY(v0, Cosh);
+REGISTER_FACTORY(v0, CumSum);
+REGISTER_FACTORY(v0, CTCGreedyDecoder);
+REGISTER_FACTORY(v0, DepthToSpace);
+REGISTER_FACTORY(v0, DetectionOutput);
+REGISTER_FACTORY(v0, Elu);
+REGISTER_FACTORY(v0, Erf);
+REGISTER_FACTORY(v0, Exp);
+REGISTER_FACTORY(v0, FakeQuantize);
+REGISTER_FACTORY(v0, Floor);
+REGISTER_FACTORY(v0, Gelu);
+REGISTER_FACTORY(v0, GRN);
+REGISTER_FACTORY(v0, HardSigmoid);
+// REGISTER_FACTORY(v0, Interpolate); Supported via v0 -> v4 conversion
+REGISTER_FACTORY(v0, Log);
+REGISTER_FACTORY(v0, LRN);
+REGISTER_FACTORY(v0, MatMul);
+REGISTER_FACTORY(v0, MVN);
+REGISTER_FACTORY(v0, Negative);
+REGISTER_FACTORY(v0, NormalizeL2);
+REGISTER_FACTORY(v0, Parameter);
+REGISTER_FACTORY(v0, PRelu);
+REGISTER_FACTORY(v0, PriorBox);
+REGISTER_FACTORY(v0, PriorBoxClustered);
+REGISTER_FACTORY(v0, Proposal);
+REGISTER_FACTORY(v0, PSROIPooling);
+REGISTER_FACTORY(v0, Relu);
+REGISTER_FACTORY(v0, Result);
+REGISTER_FACTORY(v0, RegionYolo);
+REGISTER_FACTORY(v0, ReorgYolo);
+REGISTER_FACTORY(v0, ReverseSequence);
+REGISTER_FACTORY(v0, ROIPooling);
+REGISTER_FACTORY(v0, Sigmoid);
+REGISTER_FACTORY(v0, Sqrt);
+REGISTER_FACTORY(v0, Selu);
+REGISTER_FACTORY(v0, Sin);
+REGISTER_FACTORY(v0, Sinh);
+REGISTER_FACTORY(v0, Sign);
+REGISTER_FACTORY(v0, SquaredDifference);
+REGISTER_FACTORY(v0, SpaceToDepth);
+REGISTER_FACTORY(v0, Squeeze);
+REGISTER_FACTORY(v0, ShuffleChannels);
+REGISTER_FACTORY(v0, Tan);
+REGISTER_FACTORY(v0, Tanh);
+REGISTER_FACTORY(v0, Tile);
+REGISTER_FACTORY(v0, Unsqueeze);
+
+// ----------------------------- Unsupported v0 ops ----------------------------- //
+// Deprecated ops
+// REGISTER_FACTORY(v0, Add);
+// REGISTER_FACTORY(v0, Divide);
+// REGISTER_FACTORY(v0, Greater);
+// REGISTER_FACTORY(v0, GreaterEq);
+// REGISTER_FACTORY(v0, Less);
+// REGISTER_FACTORY(v0, LessEq);
+// REGISTER_FACTORY(v0, LSTMSequence);
+// REGISTER_FACTORY(v0, LSTMCell);
+// REGISTER_FACTORY(v0, Maximum);
+// REGISTER_FACTORY(v0, Minimum);
+// REGISTER_FACTORY(v0, Multiply);
+// REGISTER_FACTORY(v0, NotEqual);
+// REGISTER_FACTORY(v0, Power);
+// REGISTER_FACTORY(v0, Quantize);
+// REGISTER_FACTORY(v0, Select);
+// REGISTER_FACTORY(v0, Subtract);
+// REGISTER_FACTORY(v0, Xor); // Not marked as deprecated yet, but removed from new opsets
+
+// REGISTER_FACTORY(v0, BatchNormInference);
+// REGISTER_FACTORY(v0, Range);
+// REGISTER_FACTORY(v0, RNNCell);
+// REGISTER_FACTORY(v0, ShapeOf);
+// REGISTER_FACTORY(v0, TensorIterator);
+
+// ------------------------------ Supported v1 ops ------------------------------ //
+REGISTER_FACTORY(v1, Add);
+REGISTER_FACTORY(v1, AvgPool);
+REGISTER_FACTORY(v1, BatchToSpace);
+REGISTER_FACTORY(v1, BinaryConvolution);
+REGISTER_FACTORY(v1, Broadcast);
+REGISTER_FACTORY(v1, ConvertLike);
+REGISTER_FACTORY(v1, Convolution);
+REGISTER_FACTORY(v1, ConvolutionBackpropData);
+REGISTER_FACTORY(v1, DeformableConvolution);
+REGISTER_FACTORY(v1, DeformablePSROIPooling);
+REGISTER_FACTORY(v1, Divide);
+REGISTER_FACTORY(v1, Equal);
+REGISTER_FACTORY(v1, FloorMod);
+REGISTER_FACTORY(v1, Gather);
+REGISTER_FACTORY(v1, GatherTree);
+REGISTER_FACTORY(v1, Greater);
+REGISTER_FACTORY(v1, GreaterEqual);
+REGISTER_FACTORY(v1, GroupConvolution);
+REGISTER_FACTORY(v1, GroupConvolutionBackpropData);
+REGISTER_FACTORY(v1, Less);
+REGISTER_FACTORY(v1, LessEqual);
+REGISTER_FACTORY(v1, LogicalAnd);
+REGISTER_FACTORY(v1, LogicalNot);
+REGISTER_FACTORY(v1, LogicalOr);
+REGISTER_FACTORY(v1, LogicalXor);
+REGISTER_FACTORY(v1, MaxPool);
+REGISTER_FACTORY(v1, Maximum);
+REGISTER_FACTORY(v1, Minimum);
+REGISTER_FACTORY(v1, Multiply);
+REGISTER_FACTORY(v1, NotEqual);
+// REGISTER_FACTORY(v1, NonMaxSuppression); Supported via v1 -> v5 internal conversion
+REGISTER_FACTORY(v1, OneHot);
+REGISTER_FACTORY(v1, Pad);
+REGISTER_FACTORY(v1, Power);
+REGISTER_FACTORY(v1, ReduceMax);
+REGISTER_FACTORY(v1, ReduceLogicalAnd);
+REGISTER_FACTORY(v1, ReduceLogicalOr);
+REGISTER_FACTORY(v1, ReduceMean);
+REGISTER_FACTORY(v1, ReduceMin);
+REGISTER_FACTORY(v1, ReduceProd);
+REGISTER_FACTORY(v1, ReduceSum);
+REGISTER_FACTORY(v1, Reshape);
+REGISTER_FACTORY(v1, Subtract);
+REGISTER_FACTORY(v1, SpaceToBatch);
+REGISTER_FACTORY(v1, Softmax);
+REGISTER_FACTORY(v1, StridedSlice);
+REGISTER_FACTORY(v1, Select);
+REGISTER_FACTORY(v1, Split);
+REGISTER_FACTORY(v1, Transpose);
+REGISTER_FACTORY(v1, TopK);
+REGISTER_FACTORY(v1, VariadicSplit);
+REGISTER_FACTORY(v1, Mod);
+
+// ----------------------------- Unsupported v1 ops ----------------------------- //
+// REGISTER_FACTORY(v1, Reverse);
+
+// ------------------------------ Supported v3 ops ------------------------------ //
+REGISTER_FACTORY(v3, Asinh);
+REGISTER_FACTORY(v3, Acosh);
+REGISTER_FACTORY(v3, Atanh);
+REGISTER_FACTORY(v3, Broadcast);
+REGISTER_FACTORY(v3, EmbeddingBagOffsetsSum);
+REGISTER_FACTORY(v3, EmbeddingBagPackedSum);
+REGISTER_FACTORY(v3, EmbeddingSegmentsSum);
+REGISTER_FACTORY(v3, ExtractImagePatches);
+// REGISTER_FACTORY(v3, NonMaxSuppression); Supported via v3 -> v5 internal conversion
+
+// ----------------------------- Unsupported v3 ops ----------------------------- //
+// REGISTER_FACTORY(v3, ScatterUpdate); // There is the scatter_update primitive, but seems like it produces wrong results
+// REGISTER_FACTORY(v3, Assign);
+// REGISTER_FACTORY(v3, Bucketize);
+// REGISTER_FACTORY(v3, GRUCell);
+// REGISTER_FACTORY(v3, NonZero);
+// REGISTER_FACTORY(v3, ROIAlign);
+// REGISTER_FACTORY(v3, ReadValue);
+// REGISTER_FACTORY(v3, ScatterElementsUpdate);
+// REGISTER_FACTORY(v3, ScatterUpdate);
+// REGISTER_FACTORY(v3, ScatterNDUpdate);
+// REGISTER_FACTORY(v3, ShapeOf);
+// REGISTER_FACTORY(v3, TopK);
+
+// ------------------------------ Supported v4 ops ------------------------------ //
+REGISTER_FACTORY(v4, HSwish);
+REGISTER_FACTORY(v4, Interpolate);
+REGISTER_FACTORY(v4, LSTMCell);
+REGISTER_FACTORY(v4, Mish);
+// REGISTER_FACTORY(v4, NonMaxSuppression); Supported via v4 -> v5 internal conversion
+REGISTER_FACTORY(v4, Proposal);
+REGISTER_FACTORY(v4, ReduceL1);
+REGISTER_FACTORY(v4, ReduceL2);
+REGISTER_FACTORY(v4, SoftPlus);
+REGISTER_FACTORY(v4, Swish);
+
+// ----------------------------- Unsupported v4 ops ----------------------------- //
+// REGISTER_FACTORY(v4, CTCLoss);
+// REGISTER_FACTORY(v4, Range);
+
+// ------------------------------ Supported v5 ops ------------------------------ //
+REGISTER_FACTORY(v5, HSigmoid);
+REGISTER_FACTORY(v5, LogSoftmax);
+REGISTER_FACTORY(v5, LSTMSequence);
+//REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion
+REGISTER_FACTORY(v5, Round);
+
+// ----------------------------- Unsupported v5 ops ----------------------------- //
+// REGISTER_FACTORY(v5, BatchNormInference);
+// REGISTER_FACTORY(v5, GatherND);
+// REGISTER_FACTORY(v5, GRUSequence);
+// REGISTER_FACTORY(v5, Loop);
+// REGISTER_FACTORY(v5, RNNSequence);
+
+// --------------------------- Supported internal ops --------------------------- //
+REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);
--- a/inference-engine/src/cldnn_engine/cldnn_program.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp
--- a/inference-engine/src/cldnn_engine/cldnn_program.h
+++ b/inference-engine/src/cldnn_engine/cldnn_program.h
@ -6,65 +6,49 @@

 #include <vector>
 #include <map>
-#include <set>
 #include <memory>
 #include <string>
-#include <utility>
-#include <algorithm>
+#include <cstdint>

 #include <cpp/ie_cnn_network.h>
-#include <legacy/ie_layers.h>
-#include <cpp_interfaces/exception2status.hpp>
-#include <ie_blob.h>
+#include "details/ie_exception.hpp"

-#include "debug_options.h"
-#include "cldnn_custom_layer.h"
 #include "cldnn_config.h"

 #include <api/engine.hpp>
-#include <api/memory.hpp>
 #include <api/topology.hpp>
-#include <api/primitive.hpp>
-#include <api/softmax.hpp>
-#include <api/resample.hpp>
-#include <api/pooling.hpp>
-#include <api/eltwise.hpp>
-#include <api/concatenation.hpp>
-#include <api/detection_output.hpp>
+
+// Forward declarations for cldnn part
+namespace cldnn {
+enum class activation_func;
+struct activation_additional_params;
+enum class reduce_mode : uint16_t;
+enum class eltwise_mode : int32_t;
+}  // namespace cldnn
+
+// Forward declarations for ngraph part
+namespace ngraph {
+class Node;
+class DiscreteTypeInfo;
+}  // namespace ngraph
+
+#define REGISTER_FACTORY_IMPL(op_version, op_name)                                                \
+void __register ## _ ## op_name ## _ ## op_version() {                                            \
+    Program::RegisterFactory<ngraph::op::op_version::op_name>(                                    \
+    [](Program& p, const std::shared_ptr<ngraph::Node>& op) {                                     \
+        auto op_casted = std::dynamic_pointer_cast<ngraph::op::op_version::op_name>(op);          \
+        if (!op_casted)                                                                           \
+            THROW_IE_EXCEPTION << "Invalid ngraph Node type passed into " << __PRETTY_FUNCTION__; \
+        Create##op_name##Op(p, op_casted);                                                        \
+       });                                                                                        \
+}

 namespace CLDNNPlugin {
-template<typename LayerTypePtr>
-LayerTypePtr tryAs(const InferenceEngine::CNNLayerPtr& in_ptr) {
-    return dynamic_cast<LayerTypePtr>(in_ptr.get());
-}

-template<typename LayerTypePtr>
-LayerTypePtr as(const InferenceEngine::CNNLayerPtr& in_ptr) {
-    auto result_ptr = dynamic_cast<LayerTypePtr> (in_ptr.get());
-    if (nullptr == result_ptr) {
-        THROW_IE_EXCEPTION << "CNNLayerPtr is not suitable for casting to requested layer type";
-    }
-    return result_ptr;
-}
-
-inline std::string layer_type_lower(const InferenceEngine::CNNLayer* layer) {
-    std::string layerType = layer->type;
-    std::transform(layerType.begin(), layerType.end(), layerType.begin(),
-        [](unsigned char c) -> unsigned char { return std::tolower(c); });
-    return layerType;
-}
-
-inline std::string layer_type_name_ID(const InferenceEngine::CNNLayer* layer) {
-    return layer_type_lower(layer) + ":" + layer->name;
-}
-
-inline std::string layer_type_lower(InferenceEngine::CNNLayerPtr layer) {
-    return layer_type_lower(layer.get());
-}
-
-inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) {
-    return layer_type_name_ID(layer.get());
-}
+std::string layer_type_lower(const ngraph::Node* op);
+std::string layer_type_name_ID(const ngraph::Node* op);
+std::string layer_type_lower(const std::shared_ptr<ngraph::Node>& op);
+std::string layer_type_name_ID(const std::shared_ptr<ngraph::Node>& op);

 struct PerfCounter {
    InferenceEngine::InferenceEngineProfileInfo::LayerStatus status;
@ -85,8 +69,14 @@ public:

 class Program {
 public:
-    Program(InferenceEngine::CNNNetwork &network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
-    std::shared_ptr<cldnn::program> getCompiledProgram(int program_id = 0);
+    Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
+    Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false) {}
+
+    static const cldnn::primitive_id m_preProcessTag;
+    static const cldnn::primitive_id m_meanValuesTag;
+    static const cldnn::primitive_id m_workaroundTag;
+    static const cldnn::primitive_id m_preCustomLayerTag;
+    static const cldnn::primitive_id m_postCustomLayerTag;

    std::map<std::string, cldnn::primitive_id> primitiveIDs;
    std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
@ -103,298 +93,82 @@ public:
    int m_max_batch;
    int m_curBatch;

-    InferenceEngine::OutputsDataMap p_currentOutputs;
-
-    std::vector<cldnn::primitive_id> GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const;
-    const std::map<std::string, cldnn::layout>& getInputLayouts() const { return inputLayouts; }
+    std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0);
+    const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
+    InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
+    InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
+    const cldnn::engine& GetEngine() const { return *m_engine; }
+    const Config& GetConfig() const { return m_config; }
    int GetMaxBatchSizeForSingleProgram();

-    void AddPrimitiveToProfiler(cldnn::primitive_id id, const InferenceEngine::CNNLayerPtr &layer,
-                                cldnn::primitive_id customOutputId = "");
-
-    void AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
-                                     const InferenceEngine::CNNLayerPtr &layer);
-
-    // internal types
-    enum LayerType {
-        Convolution,
-        DeformableConvolution,
-        ReLU,
-        ReLU6,
-        Sigmoid,
-        TanH,
-        ELU,
-        Activation,
-        Exp,
-        Asin,
-        Atan,
-        Acos,
-        Abs,
-        Asinh,
-        Acosh,
-        Atanh,
-        Not,
-        LRN,
-        Pooling,
-        FullyConnected,
-        SoftMax,
-        LogSoftmax,
-        Power,
-        Split,
-        VariadicSplit,
-        Concatenate,
-        Eltwise,
-        SimplerNMS,
-        ROIPooling,
-        Crop,
-        Deconvolution,
-        PriorBox,
-        DetectionOutput,
-        Normalize,
-        Reshape,
-        Transpose,
-        Permute,
-        Flatten,
-        BatchNormalization,
-        PReLU,
-        ScaleShift,
-        Proposal,
-        PSROIPooling,
-        Clamp,
-        Copy,
-        Resample,
-        Interp,
-        Interpolate,
-        RegionYolo,
-        ReorgYolo,
-        ConstantBlob,
-        ArgMax,
-        ArgMin,
-        MVN,
-        Unpooling,
-        Tile,
-        Pad,
-        LSTMCell,
-        RNN,
-        Gather,
-        DepthToSpace,
-        SpaceToDepth,
-        BatchToSpace,
-        SpaceToBatch,
-        ShuffleChannels,
-        StridedSlice,
-        Broadcast,
-        ReverseSequence,
-        BinaryConvolution,
-        Quantize,
-        Squeeze,
-        Unsqueeze,
-        Reduce,
-        TopK,
-        Floor,
-        Ceil,
-        Ceiling,
-        Erf,
-        HardSigmoid,
-        HSigmoid,
-        Log,
-        Neg,
-        Reciprocal,
-        Selu,
-        Sign,
-        SoftPlus,
-        SoftSign,
-        Swish,
-        HSwish,
-        Mish,
-        Gelu,
-        Sin,
-        Sinh,
-        Cos,
-        Cosh,
-        Tan,
-        Gemm,
-        OneHot,
-        Convert,
-        ConvertLike,
-        GatherTree,
-        ExperimentalDetectronROIFeatureExtractor,
-        NonMaxSuppression,
-        Select,
-        GRN,
-        CTCGreedyDecoder,
-        PriorBoxClustered,
-        CumSum,
-        Round,
-        EmbeddingBagPackedSum,
-        EmbeddingBagOffsetsSum,
-        EmbeddingSegmentsSum,
-        ExtractImagePatches,
-        NO_TYPE
-    };
-    using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
-
-    static LayerType LayerTypeFromStr(const std::string& str);
-
-private:
-    std::vector<std::shared_ptr<cldnn::program>> m_programs;
-    std::shared_ptr<const cldnn::engine> m_engine;
-    Config m_config;
-
-    std::shared_ptr<cldnn::program> BuildProgram(InferenceEngine::CNNNetwork &network);
+    bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);

+    // Profiling utils
    void InitProfileInfo(const std::string& layerName,
                         const std::string& layerType,
                         bool isCPU = false,
                         InferenceEngine::InferenceEngineProfileInfo::LayerStatus status
                         = InferenceEngine::InferenceEngineProfileInfo::EXECUTED,
                         std::string parentId = "");
+    void AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_ptr<ngraph::Node>& op,
+                                cldnn::primitive_id customOutputId = "");
+    void AddPrimitiveToProfiler(const std::shared_ptr<ngraph::Node>& op,
+                                cldnn::primitive_id customOutputId = "");
+    void AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
+                                     const std::shared_ptr<ngraph::Node>& op);

-    static const cldnn::primitive_id m_preProcessTag;
-    static const cldnn::primitive_id m_weightsTag;
-    static const cldnn::primitive_id m_biasesTag;
-    static const cldnn::primitive_id m_meanValuesTag;
-    static const cldnn::primitive_id m_postProcessTag;
-    static const cldnn::primitive_id m_scalesTag;
-    static const cldnn::primitive_id m_workaroundTag;
-    static const cldnn::primitive_id m_preCustomLayerTag;
-    static const cldnn::primitive_id m_postCustomLayerTag;
+    // Graph construction helpers
+    void ValidateInputs(const std::shared_ptr<ngraph::Node>& op, std::vector<size_t> validInputsCount);
+    std::vector<cldnn::primitive_id> GetInputPrimitiveIDs(const std::shared_ptr<ngraph::Node>& op) const;

+    using factory_t = std::function<void(Program&, const std::shared_ptr<ngraph::Node>&)>;
+    using factories_map_t = std::map<ngraph::DiscreteTypeInfo, factory_t>;

-    enum WeightRearrangeType {
-        BroadcastFeatures,
-        FlipDeconvDims,
-        NO_REARRANGE
-    };
-
-    cldnn::format m_defaultFormat;
-    void InitFormat(InferenceEngine::ICNNNetwork &network);
-
-    static cldnn::resample_type ResampleTypeFromString(const std::string &str);
-
-    void Load(InferenceEngine::ICNNNetwork &network);
-    static cldnn::pooling_mode PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding = false);
-    static cldnn::eltwise_mode EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op);
-    static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str);
-    static cldnn::softmax::dimension_t SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer);
-    cldnn::primitive_id CreatePrimitiveFromBlob(cldnn::topology& topology,
-                                                cldnn::primitive_id primID,
-                                                const InferenceEngine::Blob::Ptr pBlob,
-                                                const cldnn::layout& blobLayout,
-                                                size_t blobByteOffset = 0,
-                                                WeightRearrangeType rearrange = NO_REARRANGE);
-    void CreateWeightAndBiasPrimitives(cldnn::topology& topology,
-                                       const InferenceEngine::CNNLayerPtr& layer,
-                                       std::vector<cldnn::primitive_id>& weightsPrimID,
-                                       std::vector<cldnn::primitive_id>& biasesPrimID);
-    void CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology,
-                                             const InferenceEngine::CNNLayerPtr& layer,
-                                             std::vector<cldnn::primitive_id>& weightsPrimID,
-                                             std::vector<cldnn::primitive_id>& biasesPrimID);
-    void CreateScaleWeightsAndBiasesFromBN(cldnn::topology& topology,
-                                           const InferenceEngine::BatchNormalizationLayer* bnLayer,
-                                           cldnn::primitive_id& weightsPrimID,
-                                           cldnn::primitive_id& biasesPrimID);
-    void AddPreProcessPrimitive(InferenceEngine::InputInfo::Ptr inputInfo);
-    void AddInputPrimitive(cldnn::topology& topology,
-                           InferenceEngine::InputInfo::Ptr inputInfo, InferenceEngine::Precision inputPrecision, const std::string inputName);
-    void AddOutputPrimitive(cldnn::topology& topology,
-                            std::string outputName, const InferenceEngine::DataPtr outputData,
-                            InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::UNSPECIFIED);
-    void CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    bool IsValidSplitConvMerge(const InferenceEngine::SplitLayer* splitLayer) const;
-    bool CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const;
-    static std::vector<InferenceEngine::CNNLayerPtr> GetNextLayers(const InferenceEngine::DataPtr data);
-    static std::vector<InferenceEngine::CNNLayerPtr> GetNextLayers(const InferenceEngine::CNNLayerPtr layer);
-    static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::DataPtr data);
-    static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer);
-    void AddSingleValuePrimitive(cldnn::topology& topology, cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value);
-
-    GenericBlobMap CreateGenericLayerBlobPrimitives(cldnn::topology& topology, const InferenceEngine::GenericLayer* layer);
-    static void ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames);
-    static bool HasParam(const std::map<std::string, std::string>& layerParams, std::string paramName) {
-        auto p = layerParams.find(paramName);
-        return p != layerParams.end();
+    template<typename OpType, typename std::enable_if<std::is_base_of<ngraph::Node, OpType>::value, int>::type = 0>
+    static void RegisterFactory(factory_t func) {
+        Program::factories_map.insert({OpType::type_info, func});
    }

-    void changeInputBatch(int batch);
+    template<typename PType>
+    void AddPrimitive(PType prim) {
+        if (m_topology == nullptr) {
+            THROW_IE_EXCEPTION << "m_topology object was not created in clDNNPlugin::Program";
+        }

-    // Layer Primitive Creators
-    void CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateBatchNormalizationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer);
-    void CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateDetectionOutputPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateConcatenatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateSplitPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateFusedSplitConvMergePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, bool useGroups = true);
-    void CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateSoftMaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateLogSoftmaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateFullyConnectedPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateLRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateActivationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
-    void CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateDeformableConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
-    void CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateMaxUnpoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, CLDNNCustomLayerPtr customLayer);
-    void CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateSpaceToDepthPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateBatchToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateSpaceToBatchPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateReverseSequencePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateBinaryConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateGatherTreePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateConvertPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateConvertLikePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreatePyramidRoIAlignPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateNonMaxSuppressionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
-    void CreateSelectPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateGRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateCTCGreedyDecoderPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateCumSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateRoundPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
-    void CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
+        m_topology->add(prim);
+    }
+
+private:
+    static factories_map_t factories_map;
+    std::vector<std::shared_ptr<cldnn::program>> m_programs;
+    std::shared_ptr<const cldnn::engine> m_engine;
+    Config m_config;
+
+    std::shared_ptr<cldnn::topology> m_topology;
+    InferenceEngine::InputsDataMap m_networkInputs;
+    InferenceEngine::OutputsDataMap m_networkOutputs;
+
+    bool queryMode;
+
+    void EnableQueryMode() { queryMode = true; }
+    void DisableQueryMode() { queryMode = false; }
+
+    void PrepareBuild(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs);
+    void CleanupBuild();
+    std::shared_ptr<cldnn::program> BuildProgram(std::vector<std::shared_ptr<ngraph::Node>> ops,
+                                                 InferenceEngine::InputsDataMap networkInputs,
+                                                 InferenceEngine::OutputsDataMap networkOutputs);
+
+    void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
+    bool CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const;
+    void ChangeInputBatch(int batch);
 };

+void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& node, CLDNNCustomLayerPtr customLayer);
+void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node,
+                          cldnn::activation_func func, cldnn::activation_additional_params params);
+void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node, cldnn::eltwise_mode mode);
+
+bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node);
+
 }  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/cldnn_remote_context.h
+++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.h
@ -28,7 +28,7 @@
 namespace CLDNNPlugin {
 class CLDNNRemoteAllocator;

-class CLDNNRemoteBlobImpl : public gpu::details::param_map_obj_getter {
+class CLDNNRemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
    friend class CLDNNRemoteAllocator;
 public:
    enum BlobType {
@ -40,24 +40,24 @@ public:
        BT_DX_BUF_SHARED,
    };

-    explicit CLDNNRemoteBlobImpl(gpu::ClContext::Ptr context,
-        const cldnn::layout& layout,
-        cldnn::shared_handle mem,
-        cldnn::shared_surface surf,
-        uint32_t plane = 0,
-        BlobType mem_type = BT_BUF_INTERNAL);
+    explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
+                                 const cldnn::layout& layout,
+                                 cldnn::shared_handle mem,
+                                 cldnn::shared_surface surf,
+                                 uint32_t plane = 0,
+                                 BlobType mem_type = BT_BUF_INTERNAL);

    void allocate() noexcept;
    bool deallocate() noexcept;
-    ParamMap getParams() const;
+    InferenceEngine::ParamMap getParams() const;
    std::string getDeviceName() const noexcept;
-    std::shared_ptr<RemoteContext> getContext() const noexcept;
-    LockedMemory<void> buffer() noexcept;
-    LockedMemory<const void> cbuffer() const noexcept;
-    LockedMemory<void> rwmap()noexcept;
-    LockedMemory<const void> rmap() const noexcept;
-    LockedMemory<void> wmap()noexcept;
-    const std::shared_ptr<IAllocator> &getAllocator() const noexcept;
+    std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept;
+    InferenceEngine::LockedMemory<void> buffer() noexcept;
+    InferenceEngine::LockedMemory<const void> cbuffer() const noexcept;
+    InferenceEngine::LockedMemory<void> rwmap()noexcept;
+    InferenceEngine::LockedMemory<const void> rmap() const noexcept;
+    InferenceEngine::LockedMemory<void> wmap()noexcept;
+    const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept;
    void *getHandle() const noexcept { return _handle; }

    bool is_allocated() const noexcept;
@ -67,7 +67,7 @@ public:

 protected:
    static CLDNNRemoteAllocator m_allocator;
-    std::weak_ptr<gpu::ClContext> m_context;
+    std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;

    // constructor stuff
    cldnn::shared_handle m_mem;
@ -81,10 +81,10 @@ protected:

    mutable std::unique_ptr<cldnn::pointer<uint8_t>> lockedHolder;
    mutable void* _handle;
-    mutable std::shared_ptr<IAllocator> _allocator;
+    mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;

-    void  lock() const;
-    void  unlock() const;
+    void lock() const;
+    void unlock() const;
 };

 template<typename TpublicAPI>
@ -92,45 +92,44 @@ class typedCLDNNRemoteBlob : public TpublicAPI {
 public:
    using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;

-    explicit typedCLDNNRemoteBlob(gpu::ClContext::Ptr context,
-        const TensorDesc& desc,
-        const cldnn::layout& layout,
-        cldnn::shared_handle mem,
-        cldnn::shared_surface surf,
-        uint32_t plane,
-        CLDNNRemoteBlobImpl::BlobType mem_type)
-        : _impl(context, layout, mem,
-            surf,
-            plane, mem_type), TpublicAPI(desc) {}
+    explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
+                                  const InferenceEngine::TensorDesc& desc,
+                                  const cldnn::layout& layout,
+                                  cldnn::shared_handle mem,
+                                  cldnn::shared_surface surf,
+                                  uint32_t plane,
+                                  CLDNNRemoteBlobImpl::BlobType mem_type)
+        : _impl(context, layout, mem, surf, plane, mem_type)
+        , TpublicAPI(desc) {}

    void allocate() noexcept override { _impl.allocate(); }
    bool deallocate() noexcept override { return _impl.deallocate(); }
-    ParamMap getParams() const override { return _impl.getParams(); }
+    InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
    std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
-    std::shared_ptr<RemoteContext> getContext() const noexcept override { return _impl.getContext(); }
-    LockedMemory<void> buffer() noexcept override { return _impl.buffer(); }
-    LockedMemory<const void> cbuffer() const noexcept override { return _impl.cbuffer(); }
-    LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
-    LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
-    LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
+    std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept override { return _impl.getContext(); }
+    InferenceEngine::LockedMemory<void> buffer() noexcept override { return _impl.buffer(); }
+    InferenceEngine::LockedMemory<const void> cbuffer() const noexcept override { return _impl.cbuffer(); }
+    InferenceEngine::LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
+    InferenceEngine::LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
+    InferenceEngine::LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
    CLDNNRemoteBlobImpl* getImpl() { return &_impl; }

 protected:
-    const std::shared_ptr<IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
+    const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
    void *getHandle() const noexcept override { return _impl.getHandle(); }
    CLDNNRemoteBlobImpl _impl;
 };

-using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob<gpu::ClBufferBlob>;
-using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob<gpu::ClImage2DBlob>;
+using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
+using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
 #ifdef WIN32
-using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob<gpu::D3DBufferBlob>;
-using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob<gpu::D3DSurface2DBlob>;
+using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
+using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
 #else
-using CLDNNRemoteVASurface = typedCLDNNRemoteBlob<gpu::VASurfaceBlob>;
+using CLDNNRemoteVASurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
 #endif

-inline CLDNNRemoteBlobImpl* getBlobImpl(gpu::ClBlob* blobPtr) {
+inline CLDNNRemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
 #ifdef WIN32
    {
        auto ptr = blobPtr->as<CLDNNRemoteD3DSurface>();
@ -157,7 +156,7 @@ inline CLDNNRemoteBlobImpl* getBlobImpl(gpu::ClBlob* blobPtr) {
    return nullptr;
 }

-class CLDNNRemoteAllocator : public IAllocator {
+class CLDNNRemoteAllocator : public InferenceEngine::IAllocator {
 protected:
    friend class CLDNNRemoteBlobImpl;
    std::atomic_flag _lock;
@ -181,13 +180,13 @@ public:
    * @brief Maps handle to heap memory accessible by any memory manipulation routines.
    * @return Generic pointer to memory
    */
-    void* lock(void* handle, LockOp = LOCK_FOR_WRITE)  noexcept override { return nullptr; };
+    void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE)  noexcept override { return nullptr; };
    /**
    * @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
    * The multiple sequential mappings of the same handle are suppose to get the same
    * result while there isn't a ref counter supported.
    */
-    void  unlock(void* handle) noexcept override;
+    void unlock(void* handle) noexcept override;
    /**
    * @brief Allocates memory
    * @param size The size in bytes to allocate
@ -198,12 +197,12 @@ public:
    * @brief Releases handle and all associated memory resources which invalidates the handle.
    * @return false if handle cannot be released, otherwise - true.
    */
-    bool   free(void* handle) noexcept override { return true; }
+    bool free(void* handle) noexcept override { return true; }

    void Release() noexcept override {}
 };

-class CLDNNExecutionContextImpl : public gpu::details::param_map_obj_getter {
+class CLDNNExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
 public:
    enum ContextType {
        OCL,
@ -213,17 +212,17 @@ public:
    using Ptr = std::shared_ptr<CLDNNExecutionContextImpl>;
    using CPtr = std::shared_ptr<const CLDNNExecutionContextImpl>;

-    explicit CLDNNExecutionContextImpl(std::shared_ptr<IInferencePlugin> plugin,
-        const ParamMap& params,
-        const Config& config = {});
+    explicit CLDNNExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
+                                       const InferenceEngine::ParamMap& params,
+                                       const Config& config = {});

-    ParamMap getParams() const;
+    InferenceEngine::ParamMap getParams() const;
    std::string getDeviceName() const noexcept;

    std::shared_ptr<cldnn::engine> GetEngine() const { return m_engine; }
    Config& GetConfig() { return m_config; }
    ContextType GetType() const { return m_type; }
-    const std::weak_ptr<IInferencePlugin> GetPlugin() const { return m_plugin; }
+    const std::weak_ptr<InferenceEngine::IInferencePlugin> GetPlugin() const { return m_plugin; }

    void acquire_lock() {
        while (lock.test_and_set(std::memory_order_acquire)) {}
@ -235,11 +234,11 @@ public:

 protected:
    std::shared_ptr<cldnn::engine> m_engine;
-    gpu_handle_param m_va_display;
+    InferenceEngine::gpu_handle_param m_va_display;
    Config m_config;

    ContextType m_type;
-    std::weak_ptr<IInferencePlugin> m_plugin;
+    std::weak_ptr<InferenceEngine::IInferencePlugin> m_plugin;
    std::atomic_flag lock;
 };

@ -263,18 +262,19 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
 #else
    using surf_key = _Key<cldnn::shared_surface, uint32_t>;
 #endif
-    std::map<surf_key, RemoteBlob::Ptr> shared_surf_reg;
-    std::map<cldnn::shared_handle, RemoteBlob::Ptr> shared_obj_reg;
+    std::map<surf_key, InferenceEngine::RemoteBlob::Ptr> shared_surf_reg;
+    std::map<cldnn::shared_handle, InferenceEngine::RemoteBlob::Ptr> shared_obj_reg;

-    RemoteBlob::Ptr reuse_surf(const TensorDesc& tensorDesc,
-        const ParamMap& params) {
-        RemoteBlob::Ptr ret = nullptr;
-        uint32_t plane = gpu::details::param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
+    InferenceEngine::RemoteBlob::Ptr reuse_surf(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params) {
+        using namespace InferenceEngine;
+        using InferenceEngine::gpu::details::param_map_obj_getter;
+        InferenceEngine::RemoteBlob::Ptr ret = nullptr;
+        uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
 #ifdef WIN32
-        cldnn::shared_handle mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
+        cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
        surf_key skey(mem, plane);
 #else
-        cldnn::shared_surface surf = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_surface>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
+        cldnn::shared_surface surf = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_surface>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
        surf_key skey(surf, plane);
 #endif
        _impl.acquire_lock();
@ -289,7 +289,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
                ImageFormatFromLayout(tensorDesc.getLayout()),
                CldnnTensorFromIEDims(tensorDesc.getDims()));
            auto smart_this =
-                std::dynamic_pointer_cast<gpu::ClContext>
+                std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
                (std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
 #ifdef WIN32
            ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this,
@ -307,10 +307,10 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
        return ret;
    }

-    RemoteBlob::Ptr reuse_obj(const TensorDesc& tensorDesc,
-        cldnn::shared_handle mem,
-        CLDNNRemoteBlobImpl::BlobType blob_type) {
-        RemoteBlob::Ptr ret = nullptr;
+    InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc,
+                                               cldnn::shared_handle mem,
+                                               CLDNNRemoteBlobImpl::BlobType blob_type) {
+        InferenceEngine::RemoteBlob::Ptr ret = nullptr;

        _impl.acquire_lock();

@ -321,26 +321,23 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
        } else {
            // unlickily, not found - create new and insert into registry
            cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
-                FormatFromLayout(tensorDesc.getLayout()),
-                CldnnTensorFromIEDims(tensorDesc.getDims()));
+                                 FormatFromLayout(tensorDesc.getLayout()),
+                                 CldnnTensorFromIEDims(tensorDesc.getDims()));
            auto smart_this =
-                std::dynamic_pointer_cast<gpu::ClContext>
+                std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
                (std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());

            switch (blob_type) {
            case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
-                ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
-                    tensorDesc, layout, mem, 0, 0, blob_type);
+                ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
                break;
            case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
                layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
-                ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this,
-                    tensorDesc, layout, mem, 0, 0, blob_type);
+                ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
                break;
 #ifdef WIN32
            case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
-                ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this,
-                    tensorDesc, layout, mem, 0, 0, blob_type);
+                ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
                break;
 #endif
            default:
@ -353,17 +350,17 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
        return ret;
    }

-    RemoteBlob::Ptr create_buffer(const TensorDesc& tensorDesc) {
+    InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) {
        cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
-            FormatFromLayout(tensorDesc.getLayout()),
-            CldnnTensorFromIEDims(tensorDesc.getDims()));
-        auto smart_this = std::dynamic_pointer_cast<gpu::ClContext>
+                             FormatFromLayout(tensorDesc.getLayout()),
+                             CldnnTensorFromIEDims(tensorDesc.getDims()));
+        auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
            (std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
        return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
-            tensorDesc,
-            layout,
-            nullptr, 0, 0,
-            CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
+                                                     tensorDesc,
+                                                     layout,
+                                                     nullptr, 0, 0,
+                                                     CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
    }

    void check_if_shared() {
@ -374,21 +371,23 @@ public:
    using Ptr = std::shared_ptr<typedCLDNNExecutionContext>;
    using CPtr = std::shared_ptr<const typedCLDNNExecutionContext>;

-    explicit typedCLDNNExecutionContext(std::shared_ptr<IInferencePlugin> plugin,
-        const ParamMap& params,
-        const Config& config = {})
+    explicit typedCLDNNExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
+                                        const InferenceEngine::ParamMap& params,
+                                        const Config& config = {})
        : _impl(plugin, params, config) {}

-    ParamMap getParams() const noexcept override { return _impl.getParams(); }
+    InferenceEngine::ParamMap getParams() const noexcept override { return _impl.getParams(); }
    std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }

-    RemoteBlob::Ptr CreateBlob(const TensorDesc& tensorDesc, const ParamMap& params = {}) override {
+    InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params = {}) override {
+        using namespace InferenceEngine;
+        using InferenceEngine::gpu::details::param_map_obj_getter;
        if (params.empty()) {
            // user wants clDNN to allocate blob by itself and return handle
            return create_buffer(tensorDesc);
        } else {
            // user will supply shared object handle
-            std::string memTypeStr = gpu::details::param_map_obj_getter::_StrFromParams(params, GPU_PARAM_KEY(SHARED_MEM_TYPE));
+            std::string memTypeStr = param_map_obj_getter::_StrFromParams(params, GPU_PARAM_KEY(SHARED_MEM_TYPE));

            if (GPU_PARAM_VALUE(VA_SURFACE) == memTypeStr) {
                check_if_shared();
@ -399,14 +398,14 @@ public:

                if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) {
                    blob_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED;
-                    mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
+                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
                } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) {
                    blob_type = CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED;
-                    mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
+                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
 #ifdef WIN32
                } else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) {
                    blob_type = CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
-                    mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
+                    mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
                    check_if_shared();
 #endif
                } else {
@ -426,14 +425,14 @@ protected:
    CLDNNExecutionContextImpl _impl;
 };

-using CLDNNRemoteCLContext = typedCLDNNExecutionContext<gpu::ClContext>;
+using CLDNNRemoteCLContext = typedCLDNNExecutionContext<InferenceEngine::gpu::ClContext>;
 #ifdef WIN32
-using CLDNNRemoteD3DContext = typedCLDNNExecutionContext<gpu::D3DContext>;
+using CLDNNRemoteD3DContext = typedCLDNNExecutionContext<InferenceEngine::gpu::D3DContext>;
 #else
-using CLDNNRemoteVAContext = typedCLDNNExecutionContext<gpu::VAContext>;
+using CLDNNRemoteVAContext = typedCLDNNExecutionContext<InferenceEngine::gpu::VAContext>;
 #endif

-inline CLDNNExecutionContextImpl* getContextImpl(gpu::ClContext::Ptr ctxPtr) {
+inline CLDNNExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
 #ifdef WIN32
    {
        auto ptr = ctxPtr->as<CLDNNRemoteD3DContext>();
--- a/inference-engine/src/cldnn_engine/debug_options.cpp
+++ b/inference-engine/src/cldnn_engine/debug_options.cpp
@ -1,326 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <iostream>
-#include <iomanip>
-#ifndef NDEBUG
-    #include <algorithm>
-    #include <cmath>
-#endif
-
-#include "debug_options.h"
-
-namespace CLDNNPlugin {
-
-DebugOptions::DebugOptions() {
-    m_bDebugLayerContent =
-#ifdef _DEBUG_LAYER_CONTENT
-        true;
-#else
-        false;
-#endif
-
-    m_bDebugLayerContentIndexed =
-#ifdef _DEBUG_LAYER_CONTENT_INDEXED
-        true;
-#else
-        false;
-#endif
-
-    m_bDebugLayerFormat =
-#ifdef _DEBUG_LAYER_FORMAT
-        true;
-#else
-        false;
-#endif
-
-    m_bPluginPerfPrints =
-#ifdef _PLUGIN_PERF_PRINTS
-        true;
-#else
-        false;
-#endif
-
-    m_maxPrintSize =
-#ifdef _DEBUG_LAYER_CONTENT_FULL
-        1000000000;
-#else
-        3;
-#endif
-}
-
-void DebugOptions::PrintOptions() const {
-#ifndef NDEBUG
-    std::cout << "Debug Options:" << std::endl;
-    std::cout << "\tDebug Layer Content: " << m_bDebugLayerContent << std::endl;
-    std::cout << "\tDebug Layer Content Indexed: " << m_bDebugLayerContentIndexed << std::endl;
-    std::cout << "\tDebug Layers Format: " << m_bDebugLayerFormat << std::endl;
-    std::cout << "\tPlugin Performance Prints: " << m_bPluginPerfPrints << std::endl;
-    std::cout << "\tPrint Size: " << m_maxPrintSize << std::endl;
-#endif  // NDEBUG
-}
-
-std::string DebugOptions::GetFormatName(cldnn::format::type format) {
-    switch (format) {
-    case cldnn::format::yxfb:
-        return "yxfb";
-    case cldnn::format::byxf:
-        return "byxf";
-    case cldnn::format::bfyx:
-        return "bfyx";
-    case cldnn::format::fyxb:
-        return "fyxb";
-    default:
-        return "Unknown Format";
-    }
-}
-
-std::string DebugOptions::GetDataTypeName(cldnn::data_types dataType) {
-    switch (dataType) {
-    case cldnn::data_types::f16:
-        return "f16";
-    case cldnn::data_types::f32:
-        return "f32";
-    default:
-        return "Unknown Data Type";
-    }
-}
-
-void DebugOptions::PrintInput(const InferenceEngine::TBlob<float>& input) const {
-#ifndef NDEBUG
-    const float* inputBlobPtr = input.readOnly();
-
-    if (m_bDebugLayerContent) {
-        std::cout << "Input (" << input.size() << ") = ";
-        for (size_t i = 0; i < std::min<size_t>(m_maxPrintSize, input.size()); i++) {
-            std::cout << inputBlobPtr[i] << ", ";
-        }
-        std::cout << std::endl;
-    }
-#endif  // NDEBUG
-}
-
-float DebugOptions::SimpleConvertFP16toFP32(uint16_t u16val) {
-#ifndef NDEBUG
-    // convert to fp32 (1,5,10)->(1,8,23)
-    // trivial conversion not handling inf/denorm
-    uint32_t sign = (u16val & 0x8000U) << 16;
-    uint32_t mantissa = (u16val & 0x3FFU) << 13;
-    uint32_t exp_val_f16 = (u16val & 0x7C00U) >> 10;
-    uint32_t exp = (exp_val_f16 == 0x1FU ? 0xFFU : exp_val_f16 + 127 - 15) << 23;;
-    uint32_t val = sign | exp | mantissa;
-    float fval = *(reinterpret_cast<float*>(&val));
-    return (fabs(fval) < 1e-4f) ? 0.0f : fval;  // clamp epsilon fp16 to 0
-#endif  // NDEBUG
-    return 0;
-}
-void DebugOptions::PrintIndexedValue(const cldnn::memory& mem, const cldnn::tensor index) const {
-#ifndef NDEBUG
-    auto layout = mem.get_layout();
-    float fval;
-    switch (layout.data_type) {
-    case cldnn::data_types::f32: {
-        auto p32 = mem.pointer<float>();
-        auto resPtrF32 = p32.data();
-        fval = resPtrF32[CalcLinearIndex(layout, index)];
-    }
-    break;
-    case cldnn::data_types::f16:
-    {
-        auto p16 = mem.pointer<uint16_t>();
-        auto resPtrU16 = p16.data();
-        fval = SimpleConvertFP16toFP32(resPtrU16[CalcLinearIndex(layout, index)]);
-    }
-    break;
-    default:
-        assert(0);  // unhandled data type
-        fval = 0.0f;
-    }
-
-    if (m_bDebugLayerContentIndexed) {
-        std::cout << "\t[";
-        for (size_t i = 0; i < index.raw.size(); i++) {
-            std::cout << index.raw[i] << ",";
-        }
-        std::cout << "] = " << fval << "\n";
-    } else {
-        std::cout << fval << ", ";
-    }
-#endif  // NDEBUG
-}
-
-uint32_t DebugOptions::CalcLinearIndex(const cldnn::layout& memLayout, const cldnn::tensor index) {
-#ifndef NDEBUG
-    uint32_t bPitch, fPitch, xPitch, yPitch;
-    switch (memLayout.format) {
-    case cldnn::format::yxfb:
-        bPitch = 1;
-        fPitch = memLayout.size.batch[0] * bPitch;
-        xPitch = memLayout.size.feature[0] * fPitch;
-        yPitch = memLayout.size.spatial[1] * xPitch;
-        return (index.batch[0] * bPitch)
-            + (index.feature[0] * fPitch)
-            + (index.spatial[1] * xPitch)
-            + (index.spatial[0] * yPitch);
-        break;
-    case cldnn::format::bfyx:
-        xPitch = 1;
-        yPitch = memLayout.size.spatial[1] * xPitch;
-        fPitch = memLayout.size.spatial[0] * yPitch;
-        bPitch = memLayout.size.feature[0] * fPitch;
-        return (index.batch[0] * bPitch)
-            + (index.feature[0] * fPitch)
-            + (index.spatial[1] * xPitch)
-            + (index.spatial[0] * yPitch);
-        break;
-    default:
-        assert(0);
-        return 0;
-    }
-#endif  // NDEBUG
-    return 0;
-}
-
-void DebugOptions::PrintNetworkOutputs(std::map<cldnn::primitive_id, cldnn::network_output>& outputsMap) const {
-#ifndef NDEBUG
-    if (!m_bDebugLayerContent && !m_bDebugLayerFormat) {
-        return;
-    }
-
-    for (auto& layer : outputsMap) {
-        std::cout << layer.first << ":\n";
-        auto mem = layer.second.get_memory();
-        auto layout = mem.get_layout();
-        if (m_bDebugLayerFormat) {
-            std::string formatName = GetFormatName(layout.format);
-            std::string datatypeName = GetDataTypeName(layout.data_type);
-            std::cout << "  Layout: ( " <<
-                GetDataTypeName(layout.data_type) << ", " <<
-                GetFormatName(layout.format) << ", [";
-            for (auto s : layout.size.sizes()) {
-                std::cout << s << ",";
-            }
-            std::cout << "] )\n";
-        }
-        if (m_bDebugLayerContent) {
-            DumpSingleOutput(layer.first, outputsMap);
-            std::cout << "\n";
-        }
-    }
-#endif  // NDEBUG
-}
-
-void DebugOptions::DumpSingleOutput(cldnn::primitive_id name, std::map<cldnn::primitive_id, cldnn::network_output>& outputs, bool bSingleFeatureMap) const {
-#ifndef NDEBUG
-    if (outputs.find(name) == outputs.end()) {
-        std::cout << "Couldn't find output: " << name << std::endl;
-        return;
-    }
-
-    auto output = outputs.at(name);
-    std::cout << name << ":\n";
-    auto mem = output.get_memory();
-    auto layout = mem.get_layout();
-    cldnn::tensor lowerPad = layout.data_padding.lower_size();
-    cldnn::tensor upperPad = layout.data_padding.upper_size();
-    {   // format
-        std::string formatName = GetFormatName(layout.format);
-        std::string datatypeName = GetDataTypeName(layout.data_type);
-        std::cout << "  Layout: ( " <<
-            GetDataTypeName(layout.data_type) << ", " <<
-            GetFormatName(layout.format) << ", [";
-        for (auto s : layout.size.sizes()) {
-            std::cout << s << ",";
-        }
-        std::cout << "] [";
-        for (auto p : layout.data_padding.lower_size().sizes()) {
-            std::cout << p << ",";
-        }
-        std::cout << "] [";
-        for (auto p : layout.data_padding.upper_size().sizes()) {
-            std::cout << p << ",";
-        }
-        std::cout << "] )\n";
-    }
-    {   // content
-        switch (layout.format) {
-        case cldnn::format::bfyx:
-        {
-            std::vector<size_t> pitches;
-            size_t elements = 1;
-            if (bSingleFeatureMap) {
-                elements = layout.size.spatial[1] * layout.size.spatial[0];
-            } else {
-                for (int i = 0; i < 4; i++) {
-                    elements *= layout.size.sizes()[i] + lowerPad.sizes()[i] + upperPad.sizes()[i];
-                }
-            }
-            pitches.push_back(layout.size.spatial[0] + lowerPad.spatial[0] + upperPad.spatial[0]);  // x or width - rowpitch
-            pitches.push_back(pitches[0] * (layout.size.spatial[1] + lowerPad.spatial[1] + upperPad.spatial[1]));  // slice pitch
-            pitches.push_back(pitches[0] * pitches[1] * layout.size.feature[0]);  // depth/feature pitch
-            if (layout.data_type == cldnn::data_types::f32)
-                DumpElementsRaw<float>(mem, pitches, elements);
-            else
-                DumpElementsRaw<uint16_t>(mem, pitches, elements);
-            break;
-        }
-        default:
-            assert(0);  // unhandled format
-            return;
-        }
-        std::cout << "\n";
-    }
-#endif  // NDEBUG
-}
-
-void DebugOptions::AddTimedEvent(std::string eventName, std::string startingAt) {
-#ifdef _PLUGIN_PERF_PRINTS
-    m_TimedEventTimestamp[eventName] = std::chrono::steady_clock::now();
-    if (startingAt.compare(std::string()) == 0) {
-        startingAt = eventName;
-    }
-    m_TimedEventStart[eventName] = startingAt;
-#endif  // _PLUGIN_PERF_PRINTS
-}
-
-void DebugOptions::PrintTimedEvents() {
-#ifdef _PLUGIN_PERF_PRINTS
-    for (auto& e : m_TimedEventStart) {
-        if (e.first.compare(e.second)) {
-            std::cout << "[Plugin Internal Metric]: \t" << e.first << " took: " <<
-                std::chrono::duration_cast<std::chrono::duration<double, std::chrono::milliseconds::period>>
-                (m_TimedEventTimestamp[e.first] - m_TimedEventTimestamp[e.second]).count() << " ms\n";
-        }
-    }
-#endif  // _PLUGIN_PERF_PRINTS
-}
-
-void DebugOptions::ClearTimedEvents() {
-#ifdef _PLUGIN_PERF_PRINTS
-    m_TimedEventStart.clear();
-    m_TimedEventTimestamp.clear();
-#endif  // _PLUGIN_PERF_PRINTS
-}
-
-void DebugOptions::EnableWA(std::string name) {
-#ifndef NDEBUG
-    m_workaroundNames.insert(name);
-#endif  // NDEBUG
-}
-
-void DebugOptions::DisableWA(std::string name) {
-#ifndef NDEBUG
-    m_workaroundNames.erase(name);
-#endif  // NDEBUG
-}
-
-bool DebugOptions::IsWAActive(std::string name) {
-#ifndef NDEBUG
-    return (m_workaroundNames.find(name) != m_workaroundNames.end());
-#else
-    return false;
-#endif  // NDEBUG
-}
-
-};  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/debug_options.h
+++ b/inference-engine/src/cldnn_engine/debug_options.h
@ -1,85 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <vector>
-#include <iostream>
-#include <iomanip>
-#include <string>
-#include <set>
-#include <map>
-#include <algorithm>
-#include "cpp/ie_cnn_network.h"
-#include <api/memory.hpp>
-#include <api/primitive.hpp>
-#include <api/network.hpp>
-
-// Debugging options flags
-// #define _DEBUG_LAYER_CONTENT
-// #define _DEBUG_LAYER_CONTENT_FULL
-// #define _DEBUG_LAYER_FORMAT
-// #define _PLUGIN_PERF_PRINTS
-
-namespace CLDNNPlugin {
-
-class DebugOptions {
-public:
-    bool m_bDebugLayerContent;
-    bool m_bDebugLayerContentIndexed;
-    bool m_bDebugLayerFormat;
-    bool m_bPluginPerfPrints;
-    cldnn::tensor::value_type m_maxPrintSize;
-
-    DebugOptions();
-    void PrintOptions() const;
-    static std::string GetFormatName(cldnn::format::type format);
-    static std::string GetDataTypeName(cldnn::data_types dataType);
-    void PrintInput(const InferenceEngine::TBlob<float>& input) const;
-    void PrintIndexedValue(const cldnn::memory& mem, const cldnn::tensor index) const;
-    static uint32_t CalcLinearIndex(const cldnn::layout& memLayout, const cldnn::tensor index);
-
-    void PrintNetworkOutputs(std::map<cldnn::primitive_id, cldnn::network_output>& outputsMap) const;
-    void DumpSingleOutput(cldnn::primitive_id name, std::map<cldnn::primitive_id, cldnn::network_output>& outputs, bool bSingleFeatureMap = false)const;
-
-    // the functions below will work in release unlike the rest
-    void AddTimedEvent(std::string eventName, std::string startingAt = std::string());
-    void PrintTimedEvents();
-    void ClearTimedEvents();
-
-    void EnableWA(std::string name);
-    void DisableWA(std::string name);
-    bool IsWAActive(std::string name);
-
-protected:
-    std::map<std::string, std::chrono::steady_clock::time_point> m_TimedEventTimestamp;
-    std::map<std::string, std::string> m_TimedEventStart;
-    std::set<std::string> m_workaroundNames;
-
-    static float SimpleConvertFP16toFP32(uint16_t u16val);
-
-    template <typename T>
-    static void DumpElementsRaw(cldnn::memory& mem, const std::vector<size_t>& pitches, size_t numElements) {
-#ifndef NDEBUG
-        auto layout = mem.get_layout();
-        auto ptr = mem.pointer<T>();
-        auto data = ptr.data();  // +offset;
-        auto elements = std::min(layout.count(), numElements);
-        for (size_t i = 0; i < elements;) {
-            // size_t linearAddress = ... // todo calc linear with pitches
-            std::cout << std::setprecision(10)
-                      << ((layout.data_type == cldnn::data_types::f32) ? data[i] : cldnn::half_to_float(uint16_t(data[i])))
-                      << ", ";
-            i++;
-            for (auto& pitch : pitches) {
-                if ((i % pitch) == 0) {
-                    std::cout << std::endl;
-                }
-            }
-        }
-#endif  // NDEBUG
-    }
-};
-
-};  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp
+++ b/inference-engine/src/cldnn_engine/ops/batch_to_space.cpp
@ -0,0 +1,53 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/batch_to_space.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/batch_to_space.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v1::BatchToSpace>& op) {
+    p.ValidateInputs(op, {4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto rank = op->get_input_shape(0).size();
+    auto format = DefaultFormatForDims(rank);
+
+    std::vector<cldnn::tensor> inputs;
+    inputs.reserve(3);
+
+    for (size_t i = 1; i < 4; ++i) {
+        auto inConst = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(i));
+        if (!inConst)
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+        std::vector<int32_t> sizes = inConst->cast_vector<int32_t>();
+        int32_t default_size = i == 1 ? 1 : 0;
+        for (size_t s = sizes.size(); s < rank; s++) {
+            sizes.push_back(default_size);
+        }
+        inputs.emplace_back(format, sizes, default_size);
+    }
+    auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
+
+    auto batchToSpacePrim = cldnn::batch_to_space(layerName,
+                                                  inputPrimitives[0], // input
+                                                  inputs[0], // block_shape
+                                                  inputs[1], // crops_begin
+                                                  inputs[2], // crops_end
+                                                  out_size);
+
+    p.AddPrimitive(batchToSpacePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, BatchToSpace);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/broadcast.cpp
+++ b/inference-engine/src/cldnn_engine/ops/broadcast.cpp
@ -0,0 +1,107 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/broadcast.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/broadcast.hpp"
+#include "api/reorder.hpp"
+#include "api/reshape.hpp"
+
+namespace CLDNNPlugin {
+
+static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::Node>& op, const ngraph::AxisSet axis_mapping) {
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto inputShape = op->get_input_shape(0);
+    auto outputShape = op->get_output_shape(0);
+    auto inputRank = inputShape.size();
+    auto outputRank = outputShape.size();
+
+    auto inputPrimitive = inputPrimitives[0];
+
+    if (inputRank != outputRank) {
+        // Add reorder if changing number of dimensions requires changing format
+        auto targetFormat = DefaultFormatForDims(outputRank);
+        if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
+            auto reorderName = layerName + "_cldnn_in_reorder";
+            auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(0));
+            auto reorderPrim = cldnn::reorder(reorderName, inputPrimitive, targetFormat, targetDatatype);
+
+            p.AddPrimitive(reorderPrim);
+            p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
+
+            inputPrimitive = reorderName;
+        }
+
+        auto reshapeName = layerName + "_cldnn_in_reshape";
+
+        // Extend input dimensions with ones
+        if (axis_mapping.empty()) {
+            // If axis_mapping is not specified, then we prepend shape with neccesary count of 1-s
+            inputShape.insert(inputShape.begin(), outputRank - inputRank, 1ul);
+        } else {
+            // If axis_mapping is specified, then ones are inserted according to it.
+            ngraph::Shape tmp_shape;
+            int prev_axis = -1;
+            int next_axis = -1;
+            size_t currentRank = 0;
+            for (auto& axis : axis_mapping) {
+                prev_axis = next_axis;
+                next_axis = static_cast<int>(axis);
+
+                int ones_count = std::max(next_axis - prev_axis - 1, 0);
+                tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul);
+                tmp_shape.push_back(outputShape[axis]);
+
+                currentRank += ones_count + 1;
+            }
+            inputShape = tmp_shape;
+        }
+
+        auto targetShape = CldnnTensorFromIEDims(inputShape);
+
+        auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape);
+        p.AddPrimitive(reshapePrim);
+        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+        inputPrimitive = reshapeName;
+    }
+
+    auto broadcastPrim = cldnn::broadcast(layerName,
+                                          inputPrimitive,
+                                          CldnnTensorFromIEDims(op->get_output_shape(0)));
+
+    p.AddPrimitive(broadcastPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateBroadcastOp(Program& p, const std::shared_ptr<ngraph::op::v1::Broadcast>& op) {
+    p.ValidateInputs(op, {2, 3});
+    if (op->get_broadcast_spec().m_type == ngraph::op::AutoBroadcastType::NONE && op->get_input_size() == 3) {
+        auto axis_mapping_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+        if (!axis_mapping_node)
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+        auto axis_mapping = axis_mapping_node->get_axis_set_val();
+        CreateCommonBroadcastOp(p, op, axis_mapping);
+    } else {
+        // TODO: check if axis_mapping is not needed in these cases and prepending input shape with ones works fine in all cases
+        CreateCommonBroadcastOp(p, op, {});
+    }
+}
+
+void CreateBroadcastOp(Program& p, const std::shared_ptr<ngraph::op::v3::Broadcast>& op) {
+    p.ValidateInputs(op, {2, 3});
+    CreateCommonBroadcastOp(p, op, op->get_broadcast_axes().second);
+}
+
+REGISTER_FACTORY_IMPL(v1, Broadcast);
+REGISTER_FACTORY_IMPL(v3, Broadcast);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/concat.cpp
+++ b/inference-engine/src/cldnn_engine/ops/concat.cpp
@ -0,0 +1,56 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/concat.hpp"
+
+#include "api/concatenation.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) {
+    if (axis >= rank)
+        THROW_IE_EXCEPTION << "Concatenation axis exceeds number of dimensions";
+
+    // Difference in dimension ordering between IE and clDNN,
+    // reverse spatial dimensions after batch and feature.
+    unsigned cldnn_axis = axis;
+    if (axis >= 2) {
+        auto spatial_axis = axis - 2;
+        // Default and minimum number of dimensions is 4
+        auto spatial_size = std::max<size_t>(rank, 4) - 2;
+        cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+    }
+
+    switch (cldnn_axis) {
+        case 0: return cldnn::concatenation::concatenation_axis::along_b;
+        case 1: return cldnn::concatenation::concatenation_axis::along_f;
+        case 2: return cldnn::concatenation::concatenation_axis::along_x;
+        case 3: return cldnn::concatenation::concatenation_axis::along_y;
+        case 4: return cldnn::concatenation::concatenation_axis::along_z;
+        case 5: return cldnn::concatenation::concatenation_axis::along_w;
+        default: THROW_IE_EXCEPTION << "Unsupported concatenation axis: " << axis;
+    }
+
+    return cldnn::concatenation::concatenation_axis::along_f;  // shouldn't get here
+}
+
+void CreateConcatOp(Program& p, const std::shared_ptr<ngraph::op::v0::Concat>& op) {
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+    auto concatPrim = cldnn::concatenation(
+        layerName,
+        inputPrimitives,
+        GetConcatAxis(op->get_axis(), op->get_input_shape(0).size()),
+        DataTypeFromPrecision(op->get_output_element_type(0)));
+
+    p.AddPrimitive(concatPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, Concat);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/constant.cpp
+++ b/inference-engine/src/cldnn_engine/ops/constant.cpp
@ -0,0 +1,190 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/convolution.hpp"
+#include "ngraph/op/binary_convolution.hpp"
+#include "ngraph/op/deformable_convolution.hpp"
+#include "ngraph/op/group_conv.hpp"
+#include "ngraph/op/concat.hpp"
+#include "ngraph/op/squared_difference.hpp"
+#include "ngraph/op/gather.hpp"
+#include "ngraph/op/split.hpp"
+#include "ngraph/op/variadic_split.hpp"
+#include "ngraph/op/util/op_types.hpp"
+
+#include "api/data.hpp"
+
+namespace CLDNNPlugin {
+
+struct ConstProperties {
+    bool isWeights;
+    bool hasGroupDimension;
+    bool reversedChannelsOrder;
+};
+
+static ConstProperties getConstProperties(const std::shared_ptr<ngraph::op::Constant>& op) {
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        auto outTensors = op->get_output_target_inputs(i);
+        for (auto& t : outTensors) {
+            auto outOp = t.get_node();
+            if (dynamic_cast<ngraph::op::v1::Convolution*>(outOp)) {
+                return {true, false, false};
+            } else if (dynamic_cast<ngraph::op::v1::BinaryConvolution*>(outOp)) {
+                return {true, false, false};
+            } else if (auto castedOp = dynamic_cast<ngraph::op::v1::DeformableConvolution*>(outOp)) {
+                return {true, castedOp->get_group() > 1, false};
+            } else if (dynamic_cast<ngraph::op::v1::GroupConvolution*>(outOp)) {
+                return {true, true, false};
+            } else if (dynamic_cast<ngraph::op::v1::ConvolutionBackpropData*>(outOp)) {
+                return {true, false, true};
+            } else if (dynamic_cast<ngraph::op::v1::GroupConvolutionBackpropData*>(outOp)) {
+                return {true, true, true};
+            }
+        }
+    }
+    return {false, false, false};
+}
+
+void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
+    auto constDims = op->get_shape();
+    cldnn::tensor constTensor;
+    switch (constDims.size()) {
+    case 6: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
+                                        TensorValue(constDims[5]), TensorValue(constDims[4]),
+                                        TensorValue(constDims[3]), TensorValue(constDims[2]));
+        break;
+    case 5: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
+                                        TensorValue(constDims[4]), TensorValue(constDims[3]), TensorValue(constDims[2]));
+        break;
+    case 4: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
+                                        TensorValue(constDims[3]), TensorValue(constDims[2]));
+        break;
+    case 3: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
+                                        1, TensorValue(constDims[2]));
+        break;
+    case 2: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), 1, 1);
+        break;
+    case 1: constTensor = cldnn::tensor(1, TensorValue(constDims[0]), 1, 1);
+        break;
+    case 0: constTensor = cldnn::tensor(1, 1, 1, 1);
+        break;
+    default: THROW_IE_EXCEPTION << "Invalid constant blob dimensions";
+    }
+
+    // WA to inconsistency between input and const 1d tensors
+    // For Concat along batch we go with batch interpretation
+    // For Gather input we go with batch interpretation
+    bool needsBatchInterpretation = false;
+    if (constDims.size() == 1) {
+        for (size_t i = 0; i < op->get_output_size(); i++) {
+            auto outTensors = op->get_output_target_inputs(i);
+
+            for (auto& t : outTensors) {
+                auto outOp = t.get_node();
+                if (auto castedOp = dynamic_cast<ngraph::op::v0::Concat*>(outOp)) {
+                    if (castedOp->get_axis() == 0) {
+                        needsBatchInterpretation = true;
+                        break;
+                    }
+                } else if (ngraph::op::is_binary_elementwise_arithmetic(outOp) ||
+                           ngraph::op::is_binary_elementwise_logical(outOp) ||
+                           ngraph::is_type<ngraph::op::v0::SquaredDifference>(outOp)) {
+                    bool all_inputs_1d = true;
+                    for (size_t j = 0; j < outOp->get_input_size(); j++) {
+                        auto& in_shape = outOp->get_input_shape(j);
+                        if (in_shape.size() != 1)
+                            all_inputs_1d = false;
+                    }
+                    needsBatchInterpretation = all_inputs_1d;
+                    break;
+                } else if (ngraph::is_type<ngraph::op::v1::Gather>(outOp) ||
+                           ngraph::is_type<ngraph::op::v1::Split>(outOp) ||
+                           ngraph::is_type<ngraph::op::v1::VariadicSplit>(outOp)) {
+                    needsBatchInterpretation = true;
+                    break;
+                }
+            }
+        }
+    }
+
+    if (needsBatchInterpretation) {
+        constTensor.batch[0] = constTensor.count();
+        constTensor.feature[0] = 1;
+    }
+
+    auto constFormat = DefaultFormatForDims(op->get_output_shape(0).size());
+    auto prop = getConstProperties(op);
+    if (prop.isWeights) {
+        // Deconvolution has reversed channels order (io instead of oi)
+        if (prop.reversedChannelsOrder) {
+            if (prop.hasGroupDimension) {
+                switch (op->get_output_shape(0).size()) {
+                    case 5: constFormat = cldnn::format::gioyx; break;
+                    case 6: constFormat = cldnn::format::giozyx; break;
+                }
+            } else {
+                switch (op->get_output_shape(0).size()) {
+                    case 4: constFormat = cldnn::format::ioyx; break;
+                    case 5: constFormat = cldnn::format::iozyx; break;
+                }
+            }
+        } else {
+            if (prop.hasGroupDimension) {
+                switch (op->get_output_shape(0).size()) {
+                    case 5: constFormat = cldnn::format::goiyx; break;
+                    case 6: constFormat = cldnn::format::goizyx; break;
+                }
+            } else {
+                switch (op->get_output_shape(0).size()) {
+                    case 4: constFormat = cldnn::format::oiyx; break;
+                    case 5: constFormat = cldnn::format::oizyx; break;
+                }
+            }
+        }
+        std::vector<cldnn::tensor::value_type> dims(constDims.begin(), constDims.end());
+        for (size_t i = dims.size(); i < 4; i++) {
+            dims.push_back(1);
+        }
+        constTensor = cldnn::tensor(constFormat, dims);
+    }
+
+    // If constDims has a dimension = 0, then create tensor with single value
+    // TODO: check if dim=0 is a valid case
+    if (std::accumulate(constDims.begin(), constDims.end(), 1, std::multiplies<size_t>()) == 0)
+        constTensor = cldnn::tensor{1};
+
+    cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)),
+                                              constFormat,
+                                              constTensor);
+
+    cldnn::primitive_id initialconstPrimID = layer_type_name_ID(op);
+    cldnn::primitive_id constPrimID;
+    auto data = op->get_data_ptr<char>();
+
+    auto bufIter = p.blobMemCache.find(data);
+
+    if (bufIter != p.blobMemCache.end()) {
+        constPrimID = bufIter->second;
+    } else {
+        auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
+        auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
+        auto buf = tmpPointer.data();
+        auto bufSize = constLayout.bytes_count();
+
+        std::memcpy(&buf[0], &data[0], bufSize);
+        p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
+        p.blobMemCache[data] = initialconstPrimID;
+        constPrimID = initialconstPrimID;
+    }
+
+    p.AddPrimitiveToProfiler(op, constPrimID);
+}
+
+REGISTER_FACTORY_IMPL(v0, Constant);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/convert.cpp
+++ b/inference-engine/src/cldnn_engine/ops/convert.cpp
@ -0,0 +1,44 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/convert.hpp"
+#include "ngraph/op/convert_like.hpp"
+
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1::ConvertLike>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto outDataType = DataTypeFromPrecision(op->get_input_element_type(1));
+
+    auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
+
+    p.AddPrimitive(reorderPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateConvertOp(Program& p, const std::shared_ptr<ngraph::op::v0::Convert>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto outDataType = DataTypeFromPrecision(op->get_destination_type());
+
+    auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
+
+    p.AddPrimitive(reorderPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, Convert);
+REGISTER_FACTORY_IMPL(v1, ConvertLike);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/convolution.cpp
+++ b/inference-engine/src/cldnn_engine/ops/convolution.cpp
@ -0,0 +1,326 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/convolution.hpp"
+#include "ngraph/op/binary_convolution.hpp"
+#include "ngraph/op/deformable_convolution.hpp"
+#include "ngraph/op/group_conv.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/fake_quantize.hpp"
+#include "ngraph/op/util/op_types.hpp"
+
+#include "api/convolution.hpp"
+#include "api/deconvolution.hpp"
+#include "api/binary_convolution.hpp"
+#include "api/reshape.hpp"
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+struct ConvoltuionParameters {
+    cldnn::tensor stride;
+    cldnn::tensor padding;
+    cldnn::tensor dilation;
+    uint32_t groups;
+};
+
+static ConvoltuionParameters GetConvolutionParameters(const ngraph::CoordinateDiff& pads_begin,
+                                                      const ngraph::Strides& dilations,
+                                                      const ngraph::Strides& strides,
+                                                      uint32_t groups) {
+    cldnn::tensor stride, padding, dilation;
+    if (pads_begin.size() != strides.size() || dilations.size() != strides.size())
+        THROW_IE_EXCEPTION << "Strides, Dilations and Pads are supposed to have the same elements count";
+
+    switch (strides.size()) {
+        case 3: {
+            stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
+            padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[2], -pads_begin[1], -pads_begin[0]));
+            dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[2], dilations[1], dilations[0]));
+            break;
+        }
+        case 2: {
+            stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
+            padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[1], -pads_begin[0], 0));
+            dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[1], dilations[0], 1));
+            break;
+        }
+        case 1: {
+            stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
+            padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[0], 0, 0));
+            dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[0], 1, 1));
+            break;
+        }
+        default: THROW_IE_EXCEPTION << "Unsupported convolve parameters size. Only 1d, 2d, and 3d cases are supported";
+    }
+
+    return {stride, padding, dilation, groups};
+}
+
+void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::GroupConvolution>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    uint32_t groups = op->get_input_shape(1)[0];
+    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), groups);
+    auto outDims = op->get_output_shape(0);
+    auto outPrecision = op->get_output_element_type(0);
+
+    auto weightsName = inputs[1];
+
+    // WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
+    // Dimensions order is GOIYZ, but
+    // the selected format is OIZYX by default.
+    if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) == nullptr) {
+        std::string reshapeName = layerName + "_cldnn_weights_reshape";
+        std::string reorderName = layerName + "_cldnn_weights_reorder";
+
+        auto weights_shape = op->get_input_shape(1);
+        std::vector<size_t> new_weights_shape;
+        new_weights_shape.push_back(weights_shape[0] * weights_shape[1]); // Merged G and O dims
+        for (size_t i = 2; i < weights_shape.size(); i++) {
+            new_weights_shape.push_back(weights_shape[i]);
+        }
+        auto reshapePrim = cldnn::reshape(reshapeName,
+                                          weightsName,
+                                          CldnnTensorFromIEDims(new_weights_shape));
+
+        p.AddPrimitive(reshapePrim);
+        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+        auto reorderPrim = cldnn::reorder(reorderName,
+                                        reshapeName,
+                                        DefaultFormatForDims(new_weights_shape.size()),
+                                        DataTypeFromPrecision(op->get_input_element_type(1)));
+
+        p.AddPrimitive(reorderPrim);
+        p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
+
+        weightsName = reorderName;
+    }
+
+    std::vector<cldnn::primitive_id> weights = {weightsName};
+    auto convPrim = cldnn::convolution(layerName,
+                                       inputs[0],
+                                       weights,
+                                       {},
+                                       params.groups,
+                                       params.stride,
+                                       params.padding,
+                                       params.dilation,
+                                       CldnnTensorFromIEDims(outDims),
+                                       DataTypeFromPrecision(outPrecision));
+
+    p.AddPrimitive(convPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convolution>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
+    auto outDims = op->get_output_shape(0);
+    auto outPrecision = op->get_output_element_type(0);
+
+    std::vector<cldnn::primitive_id> weights = {inputs[1]};
+    auto convPrim = cldnn::convolution(layerName,
+                                       inputs[0],
+                                       weights,
+                                       {},
+                                       params.groups,
+                                       params.stride,
+                                       params.padding,
+                                       params.dilation,
+                                       CldnnTensorFromIEDims(outDims),
+                                       DataTypeFromPrecision(outPrecision));
+
+    p.AddPrimitive(convPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::op::v1::ConvolutionBackpropData>& op) {
+    // 3rd input is an optional output shape
+    p.ValidateInputs(op, {2, 3});
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto dilations = op->get_dilations();
+    for (auto d : dilations) {
+        if (d != 1) {
+            THROW_IE_EXCEPTION << "Unsupported dilation in ConvolutionBackpropData " << op->get_friendly_name();
+        }
+    }
+
+    auto weightsName = inputs[1];
+    // WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
+    // Dimensions order of weights blob is IOYX, but
+    // the selected format is OIYX by default. So we need to swap I and O dimensions to match the format
+    if (IsNodeOnConstPath(op->get_input_node_shared_ptr(1))) {
+        std::string reshapeName = layerName + "_cldnn_weights_reshape";
+
+        auto weights_shape = op->get_input_shape(1);
+        std::swap(weights_shape[0], weights_shape[1]);
+        auto reshapePrim = cldnn::reshape(reshapeName,
+                                          weightsName,
+                                          CldnnTensorFromIEDims(weights_shape));
+
+        p.AddPrimitive(reshapePrim);
+        p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+        weightsName = reshapeName;
+    }
+
+    std::vector<cldnn::primitive_id> weights = {weightsName};
+
+    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
+    auto deconvPrim = cldnn::deconvolution(layerName,
+        inputs[0],
+        weights,
+        {},
+        params.groups,
+        params.stride,
+        params.padding,
+        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
+    p.AddPrimitive(deconvPrim);
+
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::op::v1::GroupConvolutionBackpropData>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto dilations = op->get_dilations();
+    for (auto d : dilations) {
+        if (d != 1) {
+            THROW_IE_EXCEPTION << "Unsupported dilation in ConvolutionBackpropData " << op->get_friendly_name();
+        }
+    }
+
+    uint32_t groups = op->get_input_shape(1)[0];
+    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), groups);
+    std::vector<cldnn::primitive_id> weights = {inputs[1]};
+
+    auto deconvPrim = cldnn::deconvolution(layerName,
+        inputs[0],
+        weights,
+        {},
+        params.groups,
+        params.stride,
+        params.padding,
+        CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
+    p.AddPrimitive(deconvPrim);
+
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::DeformableConvolution>& op) {
+    p.ValidateInputs(op, {3});
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), op->get_group());
+    auto outDims = op->get_output_shape(0);
+    auto outPrecision = op->get_output_element_type(0);
+
+    std::vector<cldnn::primitive_id> weights = {inputs[2]};
+    if (params.groups > 1) {
+        auto convPrim = cldnn::convolution(layerName,
+                                           inputs[0],
+                                           inputs[1],
+                                           weights,
+                                           {},
+                                           params.groups,
+                                           op->get_deformable_group(),
+                                           params.stride,
+                                           params.padding,
+                                           params.dilation,
+                                           CldnnTensorFromIEDims(outDims));
+
+        p.AddPrimitive(convPrim);
+        p.AddPrimitiveToProfiler(op);
+    } else {
+        std::string defConvLayerNameInterp = layerName + "_interp";
+        std::string defConvLayerNameConv = layerName;
+        cldnn::tensor kernel;
+        auto weights_shape = op->get_input_shape(2);
+        size_t sidx = 2 + (params.groups > 1 ? 1 : 0);
+        if (weights_shape.size() == 3) {
+            kernel = cldnn::tensor(cldnn::batch(1),
+                                   cldnn::feature(1),
+                                   cldnn::spatial(weights_shape[sidx + 2],
+                                                  weights_shape[sidx + 1],
+                                                  weights_shape[sidx + 0]));
+        } else {
+            kernel = cldnn::tensor(cldnn::batch(1),
+                                   cldnn::feature(1),
+                                   cldnn::spatial(weights_shape[sidx + 1],
+                                                  weights_shape[sidx + 0],
+                                                  1));
+        }
+
+        auto defConvPrimInterp = cldnn::deformable_interp(defConvLayerNameInterp,
+                                                          inputs[0],
+                                                          inputs[1],
+                                                          params.groups,
+                                                          op->get_deformable_group(),
+                                                          params.stride,
+                                                          params.padding,
+                                                          params.dilation,
+                                                          CldnnTensorFromIEDims(outDims),
+                                                          kernel);
+        p.AddPrimitive(defConvPrimInterp);
+        p.AddInnerPrimitiveToProfiler(defConvLayerNameInterp, defConvLayerNameConv, op);
+        auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv,
+                                                  defConvLayerNameInterp,
+                                                  weights,
+                                                  {},
+                                                  params.groups,
+                                                  CldnnTensorFromIEDims(outDims));
+        p.AddPrimitive(defConvPrim);
+        p.AddPrimitiveToProfiler(defConvLayerNameConv, op);
+    }
+}
+
+void CreateBinaryConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::BinaryConvolution>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
+    auto outDims = op->get_output_shape(0);
+    auto outPrecision = op->get_output_element_type(0);
+
+    std::vector<cldnn::primitive_id> weights = {inputs[1]};
+    cldnn::data_types calc_precision = DataTypeFromPrecision(op->get_output_element_type(0));
+    auto convPrim = cldnn::binary_convolution(layerName,
+                                              inputs[0],
+                                              weights,
+                                              params.stride,
+                                              params.padding,
+                                              params.dilation,
+                                              CldnnTensorFromIEDims(outDims),
+                                              params.groups,
+                                              op->get_pad_value(),
+                                              calc_precision);
+
+    p.AddPrimitive(convPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, GroupConvolution);
+REGISTER_FACTORY_IMPL(v1, Convolution);
+REGISTER_FACTORY_IMPL(v1, ConvolutionBackpropData);
+REGISTER_FACTORY_IMPL(v1, GroupConvolutionBackpropData);
+REGISTER_FACTORY_IMPL(v1, DeformableConvolution);
+REGISTER_FACTORY_IMPL(v1, BinaryConvolution);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp
+++ b/inference-engine/src/cldnn_engine/ops/ctc_greedy_decoder.cpp
@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/ctc_greedy_decoder.hpp"
+
+#include "api/ctc_greedy_decoder.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::op::v0::CTCGreedyDecoder>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto primitive = cldnn::ctc_greedy_decoder(layerName,
+                                               inputPrimitives[0],
+                                               inputPrimitives[1],
+                                               op->get_ctc_merge_repeated(),
+                                               DataTypeFromPrecision(op->get_output_element_type(0)),
+                                               CldnnTensorFromIEDims(op->get_output_shape(0)));
+
+    p.AddPrimitive(primitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, CTCGreedyDecoder);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/cum_sum.cpp
+++ b/inference-engine/src/cldnn_engine/ops/cum_sum.cpp
@ -0,0 +1,74 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/cum_sum.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/cum_sum.hpp"
+
+namespace CLDNNPlugin {
+
+static inline cldnn::cum_sum::cum_sum_axis GetCumSumAxis(int32_t axis, uint32_t rank) {
+    if (axis < 0)
+        axis += rank;
+    if (axis < 0 || axis >= rank)
+        THROW_IE_EXCEPTION << "CumSum axis is not correspond to number of dimensions";
+
+    // Difference in dimension ordering between IE and clDNN,
+    // reverse spatial dimensions after batch and feature.
+    uint32_t cldnn_axis = axis;
+    if (axis >= 2) {
+        auto spatial_axis = axis - 2;
+        // Default and minimum number of dimensions is 4
+        auto spatial_size = std::max(rank, 4u) - 2;
+        cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+    }
+
+    switch (cldnn_axis) {
+        case 0: return cldnn::cum_sum::cum_sum_axis::along_b;
+        case 1: return cldnn::cum_sum::cum_sum_axis::along_f;
+        case 2: return cldnn::cum_sum::cum_sum_axis::along_x;
+        case 3: return cldnn::cum_sum::cum_sum_axis::along_y;
+        case 4: return cldnn::cum_sum::cum_sum_axis::along_z;
+        case 5: return cldnn::cum_sum::cum_sum_axis::along_w;
+        default: THROW_IE_EXCEPTION << "Unsupported CumSum axis: " << axis;
+    }
+
+    return cldnn::cum_sum::cum_sum_axis::along_f;  // shouldn't get here
+}
+
+void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::CumSum>& op) {
+    p.ValidateInputs(op, {1, 2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto exclusive = op->is_exclusive();
+    auto reverse = op->is_reverse();
+
+    size_t rank = op->get_input_shape(0).size();
+    int32_t axis = 0;
+    if (op->get_input_size() == 2) {
+        auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+        if (!axes_constant) {
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+        axis = axes_constant->cast_vector<int32_t>()[0];
+    }
+
+    auto primitive = cldnn::cum_sum(layerName,
+                                    inputPrimitives[0],
+                                    GetCumSumAxis(axis, rank),
+                                    exclusive,
+                                    reverse);
+
+    p.AddPrimitive(primitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, CumSum);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/custom.cpp
+++ b/inference-engine/src/cldnn_engine/ops/custom.cpp
@ -0,0 +1,251 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+#include "simple_math.h"
+
+#include "ngraph/attribute_visitor.hpp"
+#include "ngraph/node.hpp"
+
+#include "api/custom_gpu_primitive.hpp"
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+template<typename T>
+static inline std::string vecToString(std::vector<T> vec) {
+    if (vec.empty())
+        return "";
+
+    std::string res = std::to_string(vec[0]);
+    for (size_t i = 1; i < vec.size(); i++) {
+        res += "," + std::to_string(vec[i]);
+    }
+    return res;
+}
+
+template<>
+inline std::string vecToString<std::string>(std::vector<std::string> vec) {
+    if (vec.empty())
+        return "";
+
+    std::string res = vec[0];
+    for (size_t i = 1; i < vec.size(); i++) {
+        res += "," + vec[i];
+    }
+    return res;
+}
+
+class CustomLayerAttributeVisitor : public ngraph::AttributeVisitor {
+public:
+    CustomLayerAttributeVisitor() : m_values({}) { }
+
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override {
+        THROW_IE_EXCEPTION << "Attribute " << name << " can't be processed\n";
+    }
+    // The remaining adapter methods fall back on the void adapter if not implemented
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::string>& adapter) override {
+        m_values[name] = adapter.get();
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& adapter) override {
+        m_values[name] = std::to_string(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<int64_t>& adapter) override {
+        m_values[name] = std::to_string(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override {
+        m_values[name] = std::to_string(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<std::string>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<float>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<double>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+    void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override {
+        m_values[name] = vecToString(adapter.get());
+    }
+
+    std::map<std::string, std::string> get_parameters() const {
+        return m_values;
+    }
+
+protected:
+    std::map<std::string, std::string> m_values;
+};
+
+void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCustomLayerPtr customLayer) {
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    CustomLayerAttributeVisitor visitor;
+    op->visit_attributes(visitor);
+    auto params = visitor.get_parameters();
+
+    // Handle defines
+    std::string layerDefines;
+    for (const auto& def : customLayer->Defines()) {
+        std::string singleDefine("#define " + def.name + " " + def.prefix);
+        if (params.find(def.param) != params.end()) {
+            singleDefine += params.at(def.param);
+        } else {
+            singleDefine += def.default_value;
+        }
+        singleDefine += def.postfix + "\n";
+        layerDefines.append(singleDefine);
+    }
+
+    // reserve
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    // Handle kernel parameters
+    std::vector<cldnn::custom_gpu_primitive::arg_desc> kernelParameters;
+    cldnn::format outputFormat(cldnn::format::any);
+    for (const auto& param : customLayer->KernelParams()) {
+        switch (param.type) {
+        case CLDNNCustomLayer::ParamType::Input: {
+            kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
+            kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input;
+            kernelParameters[param.paramIndex].index =
+                static_cast<cldnn::custom_gpu_primitive::arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
+
+            // Handle input reorder
+            if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
+                // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another)
+                if (param.format != cldnn::format::any) {
+                    auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + op->get_friendly_name() + Program::m_preCustomLayerTag;
+                    auto preprocessPrim = cldnn::reorder(
+                        reorderPrimName,
+                        inputPrimitives[param.portIndex],
+                        param.format,
+                        DataTypeFromPrecision(op->get_input_element_type(param.portIndex)));
+
+                    p.AddPrimitive(preprocessPrim);
+                    p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
+                    reorderedInputs[param.portIndex] = (reorderPrimName);
+                } else {
+                    reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
+                }
+            }
+            break;
+        }
+        case CLDNNCustomLayer::ParamType::Output: {
+            kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
+            kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_output;
+            kernelParameters[param.paramIndex].index =
+                static_cast<cldnn::custom_gpu_primitive::arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
+            outputFormat = param.format;
+            break;
+        }
+        default:
+            THROW_IE_EXCEPTION << "Invalid custom layer param type: " << param.type << " in operation: " << op->get_friendly_name();
+        }
+    }
+    const std::string layerTitle("\n// Layer " + op->get_friendly_name() + " using Custom Layer " + customLayer->Name() + "\n");
+    const std::string defineTitle("// Custom Layer User Defines\n");
+
+    auto dims = op->get_output_shape(0);
+    size_t N = (dims.size() > 0) ? dims[0] : 1;
+    size_t C = (dims.size() > 1) ? dims[1] : 1;
+    size_t H = (dims.size() > 2) ? dims[2] : 1;
+    size_t W = (dims.size() > 3) ? dims[3] : 1;
+    cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H));
+
+    cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outputTensor);
+
+    // evaluate work sizes rules
+    std::vector<size_t> gws, lws;
+
+    // assume output tensor is dimension source by default
+    int batchDim = outputTensor.batch[0];
+    int featureDim = outputTensor.feature[0];
+    int yDim = outputTensor.spatial[1];
+    int xDim = outputTensor.spatial[0];
+    int iidx = customLayer->InputDimSourceIndex();
+
+    std::string genericLayerName = layer_type_name_ID(op);
+    // if input index is greater than -1, take dimension from input
+    if (iidx >= 0) {
+        if (iidx >= op->get_input_size())
+            THROW_IE_EXCEPTION << "Invalid input tensor for index: " << iidx;
+        auto inputDims = op->get_input_shape(iidx);
+
+        xDim = inputDims[inputDims.size() - 1];
+        yDim = dims.size() > 1 ? inputDims[inputDims.size() - 2] : 0;
+        featureDim = dims.size() > 2 ? inputDims[inputDims.size() - 3] : 0;
+        batchDim = dims.size() > 3 ? inputDims[inputDims.size() - 4]: 0;
+    }
+    const std::map<char, int> vars = {
+        { 'b', batchDim }  , { 'B', batchDim },
+        { 'f', featureDim }, { 'F', featureDim },
+        { 'y', yDim },       { 'Y', yDim },
+        { 'x', xDim },       { 'X', xDim },
+    };
+    for (auto rule : customLayer->GlobalSizeRules()) {
+        SimpleMathExpression expr;
+        expr.SetVariables(vars);
+        expr.SetExpression(rule);
+        gws.push_back(expr.Evaluate());
+    }
+    for (auto rule : customLayer->LocalSizeRules()) {
+        SimpleMathExpression expr;
+        expr.SetVariables(vars);
+        expr.SetExpression(rule);
+        lws.push_back(expr.Evaluate());
+    }
+
+    auto customPrim = cldnn::custom_gpu_primitive(genericLayerName,
+                                                  reorderedInputs,
+                                                  { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
+                                                  customLayer->KernelEntry(),
+                                                  kernelParameters,
+                                                  customLayer->CompilerOptions(),
+                                                  outputLayout,
+                                                  gws,
+                                                  lws);
+
+    auto prevLayerName = genericLayerName;
+    if (outputLayout.format != cldnn::format::any) {
+        // Handle output reorder
+        auto reorderPrimName = genericLayerName + Program::m_postCustomLayerTag;
+        p.AddPrimitive(
+            cldnn::reorder(reorderPrimName,
+                           genericLayerName,
+                           DefaultFormatForDims(op->get_output_shape(0).size()),
+                           customPrim.output_layout.data_type));
+        prevLayerName = reorderPrimName;
+        p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
+    }
+    p.AddPrimitive(customPrim);
+    p.AddPrimitiveToProfiler(genericLayerName, op);
+    p.primitiveIDs[genericLayerName] = prevLayerName;
+}
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp
+++ b/inference-engine/src/cldnn_engine/ops/depth_to_space.cpp
@ -0,0 +1,44 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/depth_to_space.hpp"
+
+#include "api/depth_to_space.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::depth_to_space_mode GetDepthMode(ngraph::op::v0::DepthToSpace::DepthToSpaceMode mode) {
+    switch (mode) {
+        case ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST:
+            return cldnn::depth_to_space_mode::blocks_first;
+        case ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST:
+            return cldnn::depth_to_space_mode::depth_first;
+        default: THROW_IE_EXCEPTION << "Unsupported DepthToSpaceMode value: " << static_cast<int>(mode);
+    }
+    return cldnn::depth_to_space_mode::blocks_first;
+}
+
+void CreateDepthToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v0::DepthToSpace>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    size_t blockSize = op->get_block_size();
+    cldnn::depth_to_space_mode mode = GetDepthMode(op->get_mode());
+
+    auto depthToSpacePrim = cldnn::depth_to_space(layerName,
+                                                  inputPrimitives[0],
+                                                  blockSize,
+                                                  mode);
+
+    p.AddPrimitive(depthToSpacePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, DepthToSpace);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/detection_output.cpp
+++ b/inference-engine/src/cldnn_engine/ops/detection_output.cpp
@ -0,0 +1,86 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/detection_output.hpp"
+
+#include "api/detection_output.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str) {
+    static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
+        { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner },
+        { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size },
+        { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size },
+    };
+    auto it = CodeNameToType.find(str);
+    if (it != CodeNameToType.end()) {
+        return it->second;
+    } else {
+        THROW_IE_EXCEPTION << "Unknown Prior-Box code type: " << str;
+    }
+    return cldnn::prior_box_code_type::corner;
+}
+
+void CreateDetectionOutputOp(Program& p, const std::shared_ptr<ngraph::op::v0::DetectionOutput>& op) {
+    p.ValidateInputs(op, {3});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto attrs = op->get_attrs();
+
+    uint32_t num_classes            = attrs.num_classes;
+    bool share_location             = attrs.share_location;
+    int background_label_id         = attrs.background_label_id;
+    float nms_threshold             = attrs.nms_threshold;
+    int top_k                       = attrs.top_k;
+    float confidence_threshold      = attrs.confidence_threshold;
+    float eta                       = 1.0f;
+    int keep_top_k                  = attrs.keep_top_k[0];
+    bool variance_encoded_in_target = attrs.variance_encoded_in_target;
+    int input_width                 = attrs.input_width;
+    int input_height                = attrs.input_height;
+    bool normalized                 = attrs.normalized;
+    std::string code_type           = attrs.code_type;
+    bool clip_before_nms            = attrs.clip_before_nms;
+    bool clip_after_nms             = attrs.clip_after_nms;
+    bool decrease_label_id          = attrs.decrease_label_id;
+
+    cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
+    int32_t prior_info_size = normalized != 0 ? 4 : 5;
+    int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
+
+    auto detectionPrim = cldnn::detection_output(layerName,
+                                                 inputPrimitives[0],
+                                                 inputPrimitives[1],
+                                                 inputPrimitives[2],
+                                                 num_classes,
+                                                 keep_top_k,
+                                                 share_location,
+                                                 background_label_id,
+                                                 nms_threshold,
+                                                 top_k,
+                                                 eta,
+                                                 cldnnCodeType,
+                                                 variance_encoded_in_target,
+                                                 confidence_threshold,
+                                                 prior_info_size,
+                                                 prior_coordinates_offset,
+                                                 normalized,
+                                                 input_width,
+                                                 input_height,
+                                                 decrease_label_id,
+                                                 clip_before_nms,
+                                                 clip_after_nms);
+
+    p.AddPrimitive(detectionPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, DetectionOutput);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/eltwise.cpp
+++ b/inference-engine/src/cldnn_engine/ops/eltwise.cpp
@ -0,0 +1,190 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+#include "transformations/utils/utils.hpp"
+
+#include "ngraph/op/add.hpp"
+#include "ngraph/op/multiply.hpp"
+#include "ngraph/op/maximum.hpp"
+#include "ngraph/op/minimum.hpp"
+#include "ngraph/op/subtract.hpp"
+#include "ngraph/op/divide.hpp"
+#include "ngraph/op/squared_difference.hpp"
+#include "ngraph/op/equal.hpp"
+#include "ngraph/op/not_equal.hpp"
+#include "ngraph/op/less.hpp"
+#include "ngraph/op/less_eq.hpp"
+#include "ngraph/op/greater.hpp"
+#include "ngraph/op/greater_eq.hpp"
+#include "ngraph/op/and.hpp"
+#include "ngraph/op/or.hpp"
+#include "ngraph/op/xor.hpp"
+#include "ngraph/op/power.hpp"
+#include "ngraph/op/floor_mod.hpp"
+
+#include "api/activation.hpp"
+#include "api/eltwise.hpp"
+#include "api/reorder.hpp"
+#include "api/reshape.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::eltwise_mode mode) {
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto outRank = op->get_output_shape(0).size();
+    for (size_t i = 0; i < inputPrimitives.size(); ++i) {
+        auto inputShape = op->get_input_shape(i);
+        auto inputRank = inputShape.size();
+        if (inputRank != outRank) {
+            // Add reorder if changing number of dimensions requires changing format
+            auto targetFormat = DefaultFormatForDims(outRank);
+            if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
+                auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
+                auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
+                auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
+
+                p.AddPrimitive(reorderPrim);
+                p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
+
+                inputPrimitives[i] = reorderName;
+            }
+
+            auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape";
+
+            // Extend input dimensions by prepending ones
+            inputShape.insert(inputShape.begin(), outRank - inputRank, 1ul);
+
+            auto targetShape = CldnnTensorFromIEDims(inputShape);
+
+            auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
+            p.AddPrimitive(reshapePrim);
+            p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+            inputPrimitives[i] = reshapeName;
+        }
+    }
+
+    auto out_dt = DataTypeFromPrecision(op->get_output_element_type(0));
+    auto eltwisePrim = cldnn::eltwise(layerName,
+                                      inputPrimitives,
+                                      mode,
+                                      {},
+                                      out_dt);
+
+    p.AddPrimitive(eltwisePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateAddOp(Program& p, const std::shared_ptr<ngraph::op::v1::Add>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::sum);
+}
+
+void CreateMultiplyOp(Program& p, const std::shared_ptr<ngraph::op::v1::Multiply>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::prod);
+}
+
+void CreateMaximumOp(Program& p, const std::shared_ptr<ngraph::op::v1::Maximum>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::max);
+}
+
+void CreateMinimumOp(Program& p, const std::shared_ptr<ngraph::op::v1::Minimum>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::min);
+}
+
+void CreateSubtractOp(Program& p, const std::shared_ptr<ngraph::op::v1::Subtract>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::sub);
+}
+
+void CreateDivideOp(Program& p, const std::shared_ptr<ngraph::op::v1::Divide>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::div);
+}
+
+void CreateSquaredDifferenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::SquaredDifference>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::squared_diff);
+}
+
+void CreateEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::Equal>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::eq);
+}
+
+void CreateNotEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::NotEqual>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::ne);
+}
+
+void CreateLessOp(Program& p, const std::shared_ptr<ngraph::op::v1::Less>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::lt);
+}
+
+void CreateLessEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::LessEqual>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::le);
+}
+
+void CreateGreaterOp(Program& p, const std::shared_ptr<ngraph::op::v1::Greater>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::gt);
+}
+
+void CreateGreaterEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::GreaterEqual>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::ge);
+}
+
+void CreateLogicalAndOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalAnd>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::logic_and);
+}
+
+void CreateLogicalOrOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalOr>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::logic_or);
+}
+
+void CreateLogicalXorOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalXor>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::logic_xor);
+}
+
+void CreatePowerOp(Program& p, const std::shared_ptr<ngraph::op::v1::Power>& op) {
+    p.ValidateInputs(op, {2});
+    auto power_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    if (power_node) {
+        if (ngraph::shape_size(power_node->get_output_shape(0)) == 1) {
+            float pow;
+            if (!ngraph::op::util::get_single_value(power_node, pow))
+                THROW_IE_EXCEPTION << "Invalid parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+            CreateUnaryEltwiseOp(p, op, cldnn::activation_func::pow, {pow});
+            return;
+        }
+    }
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::pow);
+}
+
+void CreateFloorModOp(Program& p, const std::shared_ptr<ngraph::op::v1::FloorMod>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::floor_mod);
+}
+
+void CreateModOp(Program& p, const std::shared_ptr<ngraph::op::v1::Mod>& op) {
+    CreateElementwiseOp(p, op, cldnn::eltwise_mode::mod);
+}
+
+REGISTER_FACTORY_IMPL(v1, Add);
+REGISTER_FACTORY_IMPL(v1, Multiply);
+REGISTER_FACTORY_IMPL(v1, Maximum);
+REGISTER_FACTORY_IMPL(v1, Minimum);
+REGISTER_FACTORY_IMPL(v1, Subtract);
+REGISTER_FACTORY_IMPL(v1, Divide);
+REGISTER_FACTORY_IMPL(v0, SquaredDifference);
+REGISTER_FACTORY_IMPL(v1, Equal);
+REGISTER_FACTORY_IMPL(v1, NotEqual);
+REGISTER_FACTORY_IMPL(v1, Less);
+REGISTER_FACTORY_IMPL(v1, LessEqual);
+REGISTER_FACTORY_IMPL(v1, Greater);
+REGISTER_FACTORY_IMPL(v1, GreaterEqual);
+REGISTER_FACTORY_IMPL(v1, LogicalAnd);
+REGISTER_FACTORY_IMPL(v1, LogicalOr);
+REGISTER_FACTORY_IMPL(v1, LogicalXor);
+REGISTER_FACTORY_IMPL(v1, Power);
+REGISTER_FACTORY_IMPL(v1, FloorMod);
+REGISTER_FACTORY_IMPL(v1, Mod);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp
+++ b/inference-engine/src/cldnn_engine/ops/embedding_bag.cpp
@ -0,0 +1,166 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/embedding_segments_sum.hpp"
+#include "ngraph/op/embeddingbag_offsets_sum.hpp"
+#include "ngraph/op/embeddingbag_packedsum.hpp"
+
+#include "api/embedding_bag.hpp"
+#include "api/reorder.hpp"
+
+#include "transformations/utils/utils.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingBagOffsetsSum>& op) {
+    p.ValidateInputs(op, {3, 4, 5});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    int32_t defaultIndex = -1;
+    if (inputPrimitives.size() > 3) {
+        auto index_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(3));
+        if (!index_node) {
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+
+        float val;
+        if (ngraph::shape_size(index_node->get_output_shape(0)) != 1 || !ngraph::op::util::get_single_value(index_node, val))
+             THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+        defaultIndex = static_cast<int32_t>(val);
+        inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
+    }
+
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
+        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
+        if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
+            // clDNN primitive supports only i32 data type for indices inputs,
+            // so we need additional reorders if they are provided as i64
+            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
+            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
+            auto preprocessPrim = cldnn::reorder(reorderPrimName,
+                                                 inputPrimitives[portIndex],
+                                                 targetFormat,
+                                                 cldnn::data_types::i32);
+            p.AddPrimitive(preprocessPrim);
+            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
+            reorderedInputs[portIndex] = (reorderPrimName);
+        } else {
+            reorderedInputs[portIndex] = inputPrimitives[portIndex];
+        }
+    }
+
+    auto embeddingBagPrim = cldnn::embedding_bag(layerName,
+                                                 reorderedInputs,
+                                                 cldnn::embedding_bag::offsets_sum,
+                                                 CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                                 defaultIndex);
+
+    p.AddPrimitive(embeddingBagPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingBagPackedSum>& op) {
+    p.ValidateInputs(op, {2, 3});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
+        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
+        if ((portIndex == 1) && (inputDataType == cldnn::data_types::i64)) {
+            // clDNN primitive supports only i32 data type for indices input,
+            // so we need additional reorder if it's provided as i64
+            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
+            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
+            auto preprocessPrim = cldnn::reorder(reorderPrimName,
+                                                 inputPrimitives[portIndex],
+                                                 targetFormat,
+                                                 cldnn::data_types::i32);
+            p.AddPrimitive(preprocessPrim);
+            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
+            reorderedInputs[portIndex] = (reorderPrimName);
+        } else {
+            reorderedInputs[portIndex] = inputPrimitives[portIndex];
+        }
+    }
+
+    auto embeddingBagPrim = cldnn::embedding_bag(layerName,
+                                                 reorderedInputs,
+                                                 cldnn::embedding_bag::packed_sum,
+                                                 CldnnTensorFromIEDims(op->get_output_shape(0)));
+
+    p.AddPrimitive(embeddingBagPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingSegmentsSum>& op) {
+    p.ValidateInputs(op, {4, 5, 6});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "num_segments"
+
+    int32_t defaultIndex = -1;
+    // port of default_index is 4 by default, but we removed "num_segments" above, so now it's equal to 3
+    if (inputPrimitives.size() > 3) {
+        auto index_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(4));
+        if (!index_node) {
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+
+        float val;
+        if (ngraph::shape_size(index_node->get_output_shape(0)) != 1 || !ngraph::op::util::get_single_value(index_node, val))
+             THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+        defaultIndex = static_cast<int32_t>(val);
+        inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
+    }
+
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
+        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
+        if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
+            // clDNN primitive supports only i32 data type for indices inputs,
+            // so we need additional reorders if they are provided as i64
+            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
+            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
+            auto preprocessPrim = cldnn::reorder(reorderPrimName,
+                                                 inputPrimitives[portIndex],
+                                                 targetFormat,
+                                                 cldnn::data_types::i32);
+            p.AddPrimitive(preprocessPrim);
+            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
+            reorderedInputs[portIndex] = (reorderPrimName);
+        } else {
+            reorderedInputs[portIndex] = inputPrimitives[portIndex];
+        }
+    }
+
+    auto embeddingBagPrim = cldnn::embedding_bag(layerName,
+                                                 reorderedInputs,
+                                                 cldnn::embedding_bag::segments_sum,
+                                                 CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                                 defaultIndex);
+
+    p.AddPrimitive(embeddingBagPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v3, EmbeddingBagOffsetsSum);
+REGISTER_FACTORY_IMPL(v3, EmbeddingBagPackedSum);
+REGISTER_FACTORY_IMPL(v3, EmbeddingSegmentsSum);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp
+++ b/inference-engine/src/cldnn_engine/ops/extract_image_patches.cpp
@ -0,0 +1,49 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/extractimagepatches.hpp"
+
+#include "api/extract_image_patches.hpp"
+
+namespace CLDNNPlugin {
+
+static inline std::string PadToString(ngraph::op::PadType pad) {
+    switch (pad) {
+        case ngraph::op::PadType::SAME_UPPER: return "same_upper";
+        case ngraph::op::PadType::SAME_LOWER: return "same_lower";
+        case ngraph::op::PadType::VALID: return "valid";
+        default: THROW_IE_EXCEPTION << "Unsupported pad type in ExtractImagePatches primitive " << pad;
+    }
+
+    return "";
+}
+
+void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph::op::v3::ExtractImagePatches>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    std::vector<uint32_t> sizes = std::vector<uint32_t>(op->get_sizes().begin(), op->get_sizes().end());
+    std::vector<uint32_t> strides = std::vector<uint32_t>(op->get_strides().begin(), op->get_strides().end());
+    std::vector<uint32_t> rates = std::vector<uint32_t>(op->get_rates().begin(), op->get_rates().end());
+    std::string auto_pad = PadToString(op->get_auto_pad());
+
+    auto extractImagePatchesPrim = cldnn::extract_image_patches(layerName,
+                                                                inputPrimitives[0],
+                                                                sizes,
+                                                                strides,
+                                                                rates,
+                                                                auto_pad,
+                                                                CldnnTensorFromIEDims(op->get_output_shape(0)));
+
+    p.AddPrimitive(extractImagePatchesPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v3, ExtractImagePatches);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp
+++ b/inference-engine/src/cldnn_engine/ops/fake_quantize.cpp
@ -0,0 +1,42 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/fake_quantize.hpp"
+
+#include "api/quantize.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v0::FakeQuantize>& op) {
+    p.ValidateInputs(op, {5});
+    std::string layerName = layer_type_name_ID(op);
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+
+    auto input_id       = inputPrimitives[0];
+    auto input_low_id   = inputPrimitives[1];
+    auto input_high_id  = inputPrimitives[2];
+    auto output_low_id  = inputPrimitives[3];
+    auto output_high_id = inputPrimitives[4];
+
+    int levels = static_cast<int>(op->get_levels());
+    auto dt = DataTypeFromPrecision(op->get_output_element_type(0));
+    auto quantizationPrim = cldnn::quantize(layerName,
+                                            input_id,
+                                            input_low_id,
+                                            input_high_id,
+                                            output_low_id,
+                                            output_high_id,
+                                            levels,
+                                            dt);
+
+    p.AddPrimitive(quantizationPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, FakeQuantize);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/gather
+++ b/inference-engine/src/cldnn_engine/ops/gather
@ -0,0 +1,54 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/gather_tree.hpp"
+
+#include "api/gather_tree.hpp"
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::GatherTree>& op) {
+    p.ValidateInputs(op, {4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
+        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
+        if (inputDataType == cldnn::data_types::i64) {
+            // clDNN primitive does not support i64 inputs,
+            // so we need additional reorders to convert them to i32
+            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
+            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
+            auto preprocessPrim = cldnn::reorder(reorderPrimName,
+                                                 inputPrimitives[portIndex],
+                                                 targetFormat,
+                                                 cldnn::data_types::i32);
+            p.AddPrimitive(preprocessPrim);
+            p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
+            reorderedInputs[portIndex] = reorderPrimName;
+        } else {
+            reorderedInputs[portIndex] = inputPrimitives[portIndex];
+        }
+    }
+
+    auto gatherTreePrim = cldnn::gather_tree(layerName,
+                                             reorderedInputs[0],
+                                             reorderedInputs[1],
+                                             reorderedInputs[2],
+                                             reorderedInputs[3]);
+
+    p.AddPrimitive(gatherTreePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, GatherTree);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/gather.cpp
+++ b/inference-engine/src/cldnn_engine/ops/gather.cpp
@ -0,0 +1,103 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/gather.hpp"
+
+#include "api/gather.hpp"
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::gather::gather_axis GetGatherAxis(int32_t axis, cldnn::format inputFormat) {
+    if (axis == 0) {
+        return cldnn::gather::gather_axis::along_b;
+    } else if (axis == 1) {
+        return cldnn::gather::gather_axis::along_f;
+    }
+
+    if (inputFormat == cldnn::format::bfyx) {
+        switch (axis) {
+            case 2: return cldnn::gather::gather_axis::along_y;
+            case 3: return cldnn::gather::gather_axis::along_x;
+            case -1: return cldnn::gather::gather_axis::along_y;
+            case -2: return cldnn::gather::gather_axis::along_f;
+            case -3: return cldnn::gather::gather_axis::along_b;
+            default: THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
+        }
+    } else if (inputFormat == cldnn::format::bfzyx) {
+        switch (axis) {
+            case 2: return cldnn::gather::gather_axis::along_z;
+            case 3: return cldnn::gather::gather_axis::along_y;
+            case 4: return cldnn::gather::gather_axis::along_x;
+            case -1: return cldnn::gather::gather_axis::along_y;
+            case -2: return cldnn::gather::gather_axis::along_z;
+            case -3: return cldnn::gather::gather_axis::along_f;
+            case -4: return cldnn::gather::gather_axis::along_b;
+            default: THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
+        }
+    } else if (inputFormat == cldnn::format::bfwzyx) {
+        switch (axis) {
+            case 2: return cldnn::gather::gather_axis::along_w;
+            case 3: return cldnn::gather::gather_axis::along_z;
+            case 4: return cldnn::gather::gather_axis::along_y;
+            case 5: return cldnn::gather::gather_axis::along_x;
+            case -1: return cldnn::gather::gather_axis::along_y;
+            case -2: return cldnn::gather::gather_axis::along_z;
+            case -3: return cldnn::gather::gather_axis::along_w;
+            case -4: return cldnn::gather::gather_axis::along_f;
+            case -5: return cldnn::gather::gather_axis::along_b;
+            default: THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
+        }
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
+    }
+}
+
+void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
+    p.ValidateInputs(op, {2, 3});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    int32_t axis = static_cast<int32_t>(op->get_axis());
+
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
+        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
+        if (inputDataType == cldnn::data_types::i64) {
+            // clDNN primitive does not support i64 inputs,
+            // so we need additional reorders to convert them to i32
+            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
+            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
+            auto preprocessPrim = cldnn::reorder(reorderPrimName,
+                                                 inputPrimitives[portIndex],
+                                                 targetFormat,
+                                                 cldnn::data_types::i32);
+            p.AddPrimitive(preprocessPrim);
+            p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
+            reorderedInputs[portIndex] = reorderPrimName;
+        } else {
+            reorderedInputs[portIndex] = inputPrimitives[portIndex];
+        }
+    }
+
+    auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size());
+    auto gatherPrim = cldnn::gather(layerName,
+                                    reorderedInputs[0],
+                                    reorderedInputs[1],
+                                    GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
+                                    outLayout,
+                                    CldnnTensorFromIEDims(op->get_output_shape(0)));
+
+    p.AddPrimitive(gatherPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Gather);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/grn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/grn.cpp
@ -0,0 +1,30 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/grn.hpp"
+
+#include "api/grn.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto primitive = cldnn::grn(layerName,
+                                inputPrimitives[0],
+                                op->get_bias(),
+                                DataTypeFromPrecision(op->get_output_element_type(0)));
+
+    p.AddPrimitive(primitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, GRN);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/interpolate.cpp
+++ b/inference-engine/src/cldnn_engine/ops/interpolate.cpp
@ -0,0 +1,203 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+#include "caseless.hpp"
+
+#include "ngraph/op/interpolate.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/resample.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
+    switch (mode) {
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel:
+        return cldnn::coordinate_transformation_mode::half_pixel;
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel:
+        return cldnn::coordinate_transformation_mode::pytorch_half_pixel;
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric:
+        return cldnn::coordinate_transformation_mode::asymmetric;
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn:
+        return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn;
+    case ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners:
+        return cldnn::coordinate_transformation_mode::align_corners;
+    }
+
+    THROW_IE_EXCEPTION << "Unknown coordinate transformation mode: " << static_cast<int>(mode);
+}
+
+static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) {
+    switch (mode) {
+    case ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor:
+        return cldnn::nearest_mode::round_prefer_floor;
+    case ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil:
+        return cldnn::nearest_mode::round_prefer_ceil;
+    case ngraph::op::v4::Interpolate::NearestMode::floor:
+        return cldnn::nearest_mode::floor;
+    case ngraph::op::v4::Interpolate::NearestMode::ceil:
+        return cldnn::nearest_mode::ceil;
+    case ngraph::op::v4::Interpolate::NearestMode::simple:
+        return cldnn::nearest_mode::simple;
+    }
+
+    THROW_IE_EXCEPTION << "Unknown nearest mode: " << static_cast<int>(mode);
+}
+
+static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) {
+    switch (mode) {
+    case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes:  return cldnn::shape_calculation_mode::sizes;
+    case ngraph::op::v4::Interpolate::ShapeCalcMode::scales: return cldnn::shape_calculation_mode::scales;
+    }
+    THROW_IE_EXCEPTION << "Unknown shape calculation mode: " << static_cast<int>(mode);
+}
+
+static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) {
+    switch (mode) {
+    case ngraph::op::v4::Interpolate::InterpolateMode::nearest: return cldnn::resample_type::nearest;
+    case ngraph::op::v4::Interpolate::InterpolateMode::linear: return cldnn::resample_type::caffe_bilinear;
+    case ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx: return cldnn::resample_type::linear_onnx;
+    case ngraph::op::v4::Interpolate::InterpolateMode::cubic: return cldnn::resample_type::cubic;
+    }
+    THROW_IE_EXCEPTION << "Unknown interpolation mode: " << static_cast<int>(mode);
+}
+
+static cldnn::resample::resample_axis GetInterpolationAxis(int32_t axis, uint32_t sz) {
+    if (axis < 0)
+        axis += sz;
+    if (axis < 0 || axis >= sz)
+        THROW_IE_EXCEPTION << "Interpolate axis is not correspond to number of dimensions";
+
+    // Difference in dimension ordering between IE and clDNN,
+    // reverse spatial dimensions after batch and feature.
+    uint32_t cldnn_axis = axis;
+    if (axis >= 2) {
+        auto spatial_axis = axis - 2;
+        // Default and minimum number of dimensions is 4
+        auto spatial_size = std::max(sz, 4u) - 2;
+        cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+    }
+
+    switch (cldnn_axis) {
+        case 0:
+            return cldnn::resample::resample_axis::along_b;
+        case 1:
+            return cldnn::resample::resample_axis::along_f;
+        case 2:
+            return cldnn::resample::resample_axis::along_x;
+        case 3:
+            return cldnn::resample::resample_axis::along_y;
+        case 4:
+            return cldnn::resample::resample_axis::along_z;
+        case 5:
+            return cldnn::resample::resample_axis::along_w;
+        default:
+            break;
+    }
+    THROW_IE_EXCEPTION << "Unsupported Interpolate axis: " << axis;
+}
+
+void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4::Interpolate>& op) {
+    p.ValidateInputs(op, {3, 4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    static const size_t SCALES_INDEX = 2;
+    static const size_t AXES_INDEX = 3;
+
+    auto attrs = op->get_attrs();
+    auto inputRank = op->get_input_shape(0).size();
+    auto outDims = op->get_output_shape(0).size();
+    auto outTensor = CldnnTensorFromIEDims(op->get_output_shape(0));
+
+    std::vector<int> pad_begin(attrs.pads_begin.begin(), attrs.pads_begin.end());
+    std::vector<int> pad_end(attrs.pads_end.begin(), attrs.pads_end.end());
+
+    for (size_t i = pad_begin.size(); i < outDims || i < 4; ++i)
+        pad_begin.push_back(0);
+    for (size_t i = pad_end.size(); i < outDims || i < 4; ++i)
+        pad_end.push_back(0);
+
+    int antialias = attrs.antialias;
+    float cube_coeff = attrs.cube_coeff;
+
+    auto cldnnSampleType = GetResampleType(attrs.mode);
+    auto shapeCalcMode = GetShapeCalculationMode(attrs.shape_calculation_mode);
+    auto coordTransMode = GetCoordinateTransformationMode(attrs.coordinate_transformation_mode);
+    auto nearestMode = GetNearestMode(attrs.nearest_mode);
+
+    auto scales_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(SCALES_INDEX));
+    if (!scales_constant) {
+        THROW_IE_EXCEPTION << "Unsupported parameter node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+    std::vector<float> scales = scales_constant->cast_vector<float>();
+
+    std::vector<cldnn::resample::resample_axis> axes;
+    if (op->get_input_size() == 4) {
+        auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(AXES_INDEX));
+        if (!axes_constant) {
+            THROW_IE_EXCEPTION << "Unsupported parameter node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+        auto ie_axes = axes_constant->cast_vector<int32_t>();
+        for (auto axis : ie_axes) {
+            axes.push_back(GetInterpolationAxis(axis, inputRank));
+        }
+    } else {
+        for (int i = 0; i < inputRank; ++i) {
+            axes.push_back(GetInterpolationAxis(i, inputRank));
+        }
+    }
+
+    if (axes.size() != scales.size())
+        THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect axes and scales should be the same size";
+
+    cldnn::resample::AxesAndScales axesAndScales;
+    for (size_t i = 0; i < axes.size(); ++i) {
+        axesAndScales[axes[i]] = scales[i];
+    }
+
+    if (cldnnSampleType == cldnn::resample_type::linear_onnx) {
+        if (inputRank != 2 && inputRank != 4)
+            THROW_IE_EXCEPTION << "mode 'linear_onnx' supports only 2D or 4D tensors";
+        if (axes.size() != 2 && inputRank != axes.size())
+            THROW_IE_EXCEPTION << "mode 'linear_onnx' supports only axes with size 2 or equal to input rank";
+        bool correctAxes =
+            ((axes[0] == cldnn::resample::resample_axis::along_b) &&
+             (axes[1] == cldnn::resample::resample_axis::along_f)) ||
+            ((axes[0] == cldnn::resample::resample_axis::along_y) &&
+             (axes[1] == cldnn::resample::resample_axis::along_x));
+        if (axes.size() == 4 && inputRank == 4) {
+            correctAxes = axes[0] == cldnn::resample::resample_axis::along_b &&
+                          axes[1] == cldnn::resample::resample_axis::along_f &&
+                          axes[2] == cldnn::resample::resample_axis::along_y &&
+                          axes[3] == cldnn::resample::resample_axis::along_x;
+        }
+        if (!correctAxes)
+            THROW_IE_EXCEPTION <<
+                "mode 'linear_onnx' supports only case when axes = {2, 3} or "
+                "axes = {0, 1} or axes = {0, 1, 2, 3}";
+    }
+
+    auto resamplePrim = cldnn::resample(layerName,
+                                        inputPrimitives[0],
+                                        outTensor,
+                                        axesAndScales,
+                                        pad_begin,
+                                        pad_end,
+                                        antialias,
+                                        cube_coeff,
+                                        cldnnSampleType,
+                                        shapeCalcMode,
+                                        coordTransMode,
+                                        nearestMode);
+
+    p.AddPrimitive(resamplePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v4, Interpolate);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/lrn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/lrn.cpp
@ -0,0 +1,49 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/lrn.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/lrn.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::lrn_norm_region GetNormRegion(std::vector<int64_t> axis_value) {
+    if (axis_value.size() == 1 && axis_value[0] == 1) {
+        return cldnn::lrn_norm_region_across_channel;
+    } else {
+        return cldnn::lrn_norm_region_within_channel;
+    }
+}
+
+void CreateLRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::LRN>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto axis_const = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    if (!axis_const) {
+        THROW_IE_EXCEPTION << "Unsupported axes node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+    auto axis_value = axis_const->cast_vector<int64_t>();
+    auto localSize = op->get_nsize();
+
+    auto lrnPrim = cldnn::lrn(layerName,
+                              inputPrimitives[0],
+                              localSize,
+                              static_cast<float>(op->get_bias()),
+                              static_cast<float>(op->get_alpha()),
+                              static_cast<float>(op->get_beta()),
+                              GetNormRegion(axis_value));
+
+    p.AddPrimitive(lrnPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, LRN);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/matmul.cpp
+++ b/inference-engine/src/cldnn_engine/ops/matmul.cpp
@ -0,0 +1,248 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/matmul.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/fake_quantize.hpp"
+
+#include "api/gemm.hpp"
+#include "api/fully_connected.hpp"
+#include "api/reshape.hpp"
+#include "api/reorder.hpp"
+#include "api/permute.hpp"
+
+namespace CLDNNPlugin {
+
+/*
+*  get_aligned_shapes function align two input shapes to have the same size and
+*  the same batch dimensions (last two dimensions are not comparable).
+*  It also checks that dimensions are compatible so in case with two shapes
+*  for example: [2, 32, 64] [3, 64, 64] it will raise an exception.
+*/
+
+static std::pair<ngraph::Shape, ngraph::Shape> get_aligned_shapes(const ngraph::Shape& shape_a,
+                                                                  const ngraph::Shape& shape_b,
+                                                                  const std::shared_ptr<ngraph::op::v0::MatMul>& matmul) {
+    ngraph::Shape shape_a_aligned(shape_a), shape_b_aligned(shape_b);
+    size_t max_size = std::max(shape_a_aligned.size(), shape_b_aligned.size());
+    for (size_t i = 0, cnt = max_size - shape_a_aligned.size(); i < cnt; ++i)
+        shape_a_aligned.insert(shape_a_aligned.begin(), 1);
+    for (size_t i = 0, cnt = max_size - shape_b_aligned.size(); i < cnt; ++i)
+        shape_b_aligned.insert(shape_b_aligned.begin(), 1);
+
+    if (matmul->get_transpose_a()) {
+        std::swap(*(shape_a_aligned.end() - 1), *(shape_a_aligned.end() - 2));
+    }
+    if (matmul->get_transpose_b()) {
+        std::swap(*(shape_b_aligned.end() - 1), *(shape_b_aligned.end() - 2));
+    }
+
+    for (size_t i = 0; i < max_size - 2; ++i) {
+        if (shape_a_aligned[i] != shape_b_aligned[i] && shape_a_aligned[i] > 1 && shape_b_aligned[i] > 1) {
+            THROW_IE_EXCEPTION << "Shapes can't be aligned: " << shape_a_aligned << " " << shape_b_aligned;
+        }
+        size_t max_value = std::max(shape_a_aligned[i], shape_b_aligned[i]);
+        shape_a_aligned[i] = shape_b_aligned[i] = max_value;
+    }
+
+    return {shape_a_aligned, shape_b_aligned};
+}
+
+void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto shape_a = op->get_input_shape(0);
+    auto shape_b = op->get_input_shape(1);
+
+    bool is_fc = ngraph::is_type<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) ||
+                 ngraph::is_type<ngraph::op::v0::FakeQuantize>(op->get_input_node_shared_ptr(1));
+    is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2;
+
+    if (is_fc) {
+        ngraph::Shape shape_a_aligned, shape_b_aligned;
+        std::tie(shape_a_aligned, shape_b_aligned) = get_aligned_shapes(shape_a, shape_b, op);
+        if (shape_a_aligned.size() < 2 || shape_b_aligned.size() < 2) {
+            THROW_IE_EXCEPTION << "MatMul " << op->get_friendly_name() << " shapes are inconsistent.";
+        }
+        size_t K = *(shape_a_aligned.end() - 1);
+        size_t O = *(shape_b_aligned.end() - 1);
+
+        auto inputName = inputPrimitives[0];
+        auto weightsName = inputPrimitives[1];
+        // Weights normalization
+        if (!op->get_transpose_b()) {
+            ngraph::Shape output_shape = shape_b;
+            std::vector<uint16_t> transpose_order(output_shape.size());
+            std::iota(transpose_order.begin(), transpose_order.end(), 0);
+            std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2));
+
+            for (auto o = transpose_order.size(); o < 4; o++)
+                transpose_order.push_back((uint16_t)o);
+
+            auto permuteName = op->get_friendly_name() + "/transpose_b";
+            auto permutePrim = cldnn::permute(permuteName,
+                                              weightsName,
+                                              transpose_order);
+            p.AddPrimitive(permutePrim);
+            p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
+            weightsName = permuteName;
+        }
+
+        // Input normalization
+        if (op->get_transpose_a()) {
+            ngraph::Shape output_shape = shape_a;
+            std::vector<uint16_t> transpose_order(output_shape.size());
+            std::iota(transpose_order.begin(), transpose_order.end(), 0);
+            std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2));
+
+            for (auto o = transpose_order.size(); o < 4; o++)
+                transpose_order.push_back((uint16_t)o);
+
+            auto permuteName = op->get_friendly_name() + "/transpose_a";
+            auto permutePrim = cldnn::permute(permuteName,
+                                              inputName,
+                                              transpose_order);
+            p.AddPrimitive(permutePrim);
+            p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
+            inputName = permuteName;
+        }
+
+        bool reshape_fc = shape_a_aligned.size() > 3;
+
+        auto reshape_to_2d = [&](const ngraph::Shape& shape, std::string inputName, size_t features, std::string suffix) -> std::string {
+            auto total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
+            std::vector<size_t> reshapeSize = { total / features, features };
+
+            if (total != reshapeSize[0] * reshapeSize[1])
+                THROW_IE_EXCEPTION << "Inconsistent reshape in Matmul op: " << op->get_friendly_name();
+
+            auto reshapeInName = op->get_friendly_name() + suffix;
+            auto reshapeInPrim = cldnn::reshape(reshapeInName, inputName, CldnnTensorFromIEDims(reshapeSize));
+            p.AddPrimitive(reshapeInPrim);
+            p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
+            return reshapeInName;
+        };
+
+        if (reshape_fc) {
+            inputName = reshape_to_2d(shape_a, inputName, shape_a.back(), "_cldnn_reshape_in");
+            weightsName = reshape_to_2d(shape_b, weightsName, K, "_cldnn_reshape_weights");
+        }
+
+        auto fcPrim = cldnn::fully_connected(layerName,
+                                             inputName,
+                                             weightsName,
+                                             "",
+                                             DataTypeFromPrecision(op->get_output_element_type(0)),
+                                             cldnn::padding(),
+                                             op->get_output_shape(0).size());
+
+        p.AddPrimitive(fcPrim);
+
+        auto lastLayerName = layerName;
+        if (reshape_fc) {
+            auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0));
+            auto outReshapeName = layerName + "_cldnn_out_reshape";
+            auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
+
+            p.AddPrimitive(outReshapePrim);
+            p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
+
+            lastLayerName = outReshapeName;
+        }
+
+        p.AddPrimitiveToProfiler(op, lastLayerName);
+    } else {
+        auto outDims = op->get_output_shape(0);
+        auto outDimsN = outDims.size();
+
+        auto gemmSpecificTensor = [](const InferenceEngine::SizeVector& dims) {
+            switch (dims.size()) {
+            case 2: return cldnn::tensor(cldnn::spatial(dims[1], dims[0]));
+            case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::spatial(dims[2], dims[1]));
+            case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
+            case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
+            case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
+            default: THROW_IE_EXCEPTION << "Invalid dimensions size(" << dims.size() << ") for Gemm layer";
+            }
+        };
+
+        // Preprocess inputs
+        for (size_t i = 0; i < inputPrimitives.size(); ++i) {
+            auto inputDims = op->get_input_shape(i);
+            auto inputDimsN = inputDims.size();
+
+            // Add reorder if changing number of dimensions requires changing format
+            auto targetFormat = DefaultFormatForDims(outDimsN);
+
+            if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
+                auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
+                auto targetDatatype = DataTypeFromPrecision(op->get_output_element_type(0));
+                auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
+
+                p.AddPrimitive(reorderPrim);
+                p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
+
+                inputPrimitives[i] = reorderName;
+            }
+
+            // Reshape input if they differ or gemm specific shape matches default one
+            if (inputDimsN != outDimsN || inputDimsN < 4) {
+                auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape";
+
+                // Extend input dimensions by prepending ones
+                inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul);
+
+                auto targetShape = gemmSpecificTensor(inputDims);
+
+                auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
+
+                p.AddPrimitive(reshapePrim);
+                p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+                inputPrimitives[i] = reshapeName;
+            }
+        }
+
+        // Add actual gemm
+        auto alpha = 1.0f;
+        auto beta = 0.0f;
+        auto transA = op->get_transpose_a();
+        auto transB = op->get_transpose_b();
+
+        auto gemmPrim = cldnn::gemm(layerName,
+                                    inputPrimitives,
+                                    DataTypeFromPrecision(op->get_output_element_type(0)),
+                                    transA,
+                                    transB,
+                                    alpha,
+                                    beta);
+
+        p.AddPrimitive(gemmPrim);
+
+        auto lastLayerName = layerName;
+
+        // Reshape output if gemm specific shape does not match default one
+        if (outDimsN < 4) {
+            auto outputShape = CldnnTensorFromIEDims(outDims);
+            auto outReshapeName = layerName + "_cldnn_out_reshape";
+            auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
+
+            p.AddPrimitive(outReshapePrim);
+            p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
+
+            lastLayerName = outReshapeName;
+        }
+
+        p.AddPrimitiveToProfiler(op, lastLayerName);
+    }
+}
+
+REGISTER_FACTORY_IMPL(v0, MatMul);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/mvn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/mvn.cpp
@ -0,0 +1,38 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/mvn.hpp"
+
+#include "api/mvn.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateMVNOp(Program& p, const std::shared_ptr<ngraph::op::v0::MVN>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    const size_t chanelAxis = 1;
+    ngraph::AxisSet reductionAxes = op->get_reduction_axes();
+    // FIXME: op->get_across_channels(); doesn't work for some reason. Is it expected?
+    bool across_channels = reductionAxes.count(chanelAxis) > 0;
+    bool normalize_variance = op->get_normalize_variance();
+    float eps = op->get_eps();
+
+    auto mvnPrim = cldnn::mvn(layerName,
+                              inputPrimitives[0],
+                              across_channels,
+                              normalize_variance,
+                              eps);
+
+    p.AddPrimitive(mvnPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, MVN);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp
+++ b/inference-engine/src/cldnn_engine/ops/non_max_suppression.cpp
@ -0,0 +1,163 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/non_max_suppression.hpp"
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph_ops/nms_ie_internal.hpp>
+
+#include "api/reorder.hpp"
+#include "api/mutable_data.hpp"
+#include "api/non_max_suppression.hpp"
+
+namespace CLDNNPlugin {
+
+static bool GetCenterPointBox(ngraph::op::v5::NonMaxSuppression::BoxEncodingType encoding) {
+    switch (encoding) {
+        case ::ngraph::op::v5::NonMaxSuppression::BoxEncodingType::CENTER: return true;
+        case ::ngraph::op::v5::NonMaxSuppression::BoxEncodingType::CORNER: return false;
+        default: THROW_IE_EXCEPTION << "NonMaxSuppression layer has unsupported box encoding";
+    }
+    return false;
+}
+
+void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngraph::op::internal::NonMaxSuppressionIEInternal>& op) {
+    p.ValidateInputs(op, {2, 3, 4, 5, 6});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+
+    std::vector<cldnn::primitive_id> reorderedInputs;
+    reorderedInputs.resize(inputPrimitives.size());
+
+    for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
+        auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
+        if ((portIndex == 2) && (inputDataType == cldnn::data_types::i64)) {
+            // clDNN primitive supports only i32 data type for 'max_output_boxes_per_class' input
+            // so we need additional reorder if it's provided as i64
+            auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
+            auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
+            auto preprocessPrim = cldnn::reorder(reorderPrimName,
+                                                 inputPrimitives[portIndex],
+                                                 targetFormat,
+                                                 cldnn::data_types::i32);
+            p.AddPrimitive(preprocessPrim);
+            p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
+            reorderedInputs[portIndex] = (reorderPrimName);
+        } else {
+            reorderedInputs[portIndex] = inputPrimitives[portIndex];
+        }
+    }
+
+    // clDNN primitive supports only i32 as output data type
+    auto out_type = op->get_output_element_type(0);
+    if (out_type == ngraph::element::i64) {
+        out_type = ngraph::element::i32;
+    }
+
+    auto outputIndices = op->get_output_shape(0)[0];
+
+    auto boxesShape = op->get_input_shape(0);
+    int32_t num_batches = boxesShape.at(0);
+    int32_t num_boxes = boxesShape.at(1);
+
+    auto scoresShape = op->get_input_shape(1);
+    int32_t num_classes = scoresShape.at(1);
+
+    std::size_t num_output = op->get_output_size();
+
+    std::vector<cldnn::memory> shared_memory;
+    switch (num_output) {
+        case 3: {
+            auto mutable_precision_second = op->get_output_element_type(2);
+            if (mutable_precision_second == ngraph::element::i64) {
+                mutable_precision_second = ngraph::element::i32;
+            }
+            cldnn::layout mutableLayoutSecond = cldnn::layout(
+                DataTypeFromPrecision(mutable_precision_second),
+                DefaultFormatForDims(op->get_output_shape(2).size()),
+                CldnnTensorFromIEDims(op->get_output_shape(2)));
+
+            shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutSecond));
+
+            cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
+            auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
+            p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_second] = { op->get_friendly_name() };
+            p.primitiveIDs[non_max_supression_mutable_id_w_second] = non_max_supression_mutable_id_w_second;
+            p.AddPrimitive(nms_mutable_prim_second);
+            inputPrimitives.push_back(non_max_supression_mutable_id_w_second);
+        }
+        case 2: {
+            auto mutable_precision_first = op->get_output_element_type(1);
+
+            cldnn::layout mutableLayoutFirst = cldnn::layout(
+                DataTypeFromPrecision(mutable_precision_first),
+                cldnn::format::bfyx,
+                cldnn::tensor(outputIndices, 3, 1, 1));
+
+            shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutFirst));
+
+            cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
+            auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());
+            p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_first] = { op->get_friendly_name() };
+            p.primitiveIDs[non_max_supression_mutable_id_w_first] = non_max_supression_mutable_id_w_first;
+            p.AddPrimitive(nms_mutable_prim_first);
+            inputPrimitives.push_back(non_max_supression_mutable_id_w_first);
+        }
+        case 1: break;
+        default: THROW_IE_EXCEPTION << "Incorrect number of output for layer: " << op->get_friendly_name();
+    }
+
+    auto nonMaxSupressionLayerName = num_output > 1 ? layer_type_name_ID(op) + ".0" : layer_type_name_ID(op);
+    auto prim = cldnn::non_max_suppression(
+            nonMaxSupressionLayerName,
+            reorderedInputs[0],
+            reorderedInputs[1],
+            static_cast<int>(outputIndices),
+            op->m_center_point_box,
+            op->m_sort_result_descending);
+
+    prim.output_data_type = DataTypeFromPrecision(out_type);
+
+    switch (reorderedInputs.size()) {
+        case 6: prim.soft_nms_sigma = reorderedInputs[5];
+        case 5: prim.score_threshold = reorderedInputs[4];
+        case 4: prim.iou_threshold = reorderedInputs[3];
+        case 3: prim.num_select_per_class = reorderedInputs[2];
+        case 2: break;
+        default: THROW_IE_EXCEPTION << "Incorrect number of input primitives for layer: " << op->get_friendly_name();
+    }
+
+    switch (num_output) {
+        case 3: prim.third_output = inputPrimitives[inputPrimitives.size() - 2];
+        case 2: prim.second_output = inputPrimitives[inputPrimitives.size() - 1];
+        default: break;
+    }
+
+    p.AddPrimitive(prim);
+
+    switch (num_output) {
+        case 3: {
+            cldnn::primitive_id non_max_supression_id_r_second = layer_type_name_ID(op) + ".2";
+            auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second, { nonMaxSupressionLayerName }, shared_memory.front());
+            p.primitivesToIRLayersMap[non_max_supression_id_r_second] = { op->get_friendly_name() };
+            p.primitiveIDs[non_max_supression_id_r_second] = non_max_supression_id_r_second;
+            p.AddPrimitive(nms_mutable_prim_r_second);
+        }
+        case 2: {
+            cldnn::primitive_id non_max_supression_id_r_first = layer_type_name_ID(op) + ".1";
+            auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first, { nonMaxSupressionLayerName }, shared_memory.back());
+            p.primitivesToIRLayersMap[non_max_supression_id_r_first] = { op->get_friendly_name() };
+            p.primitiveIDs[non_max_supression_id_r_first] = non_max_supression_id_r_first;
+            p.AddPrimitive(nms_mutable_prim_r_first);
+        }
+        default: break;
+    }
+
+    p.AddPrimitiveToProfiler(nonMaxSupressionLayerName, op);
+}
+
+REGISTER_FACTORY_IMPL(internal, NonMaxSuppressionIEInternal);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp
+++ b/inference-engine/src/cldnn_engine/ops/normalize_l2.cpp
@ -0,0 +1,63 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/normalize_l2.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/normalize.hpp"
+#include "api/data.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::NormalizeL2>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    // params
+    auto const_axis = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    if (!const_axis)
+        THROW_IE_EXCEPTION << "Unsupported axis node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+    auto axis = const_axis->cast_vector<size_t>();
+    bool across_spatial = !(axis.size() == 1 && axis[0] == 1);
+    float eps = op->get_eps();
+
+    // WA for MO outputting %.6f
+    if (eps == 0.0f) {
+        eps = 1e-10f;
+    }
+
+    // We create fake scale constant and fill it with ones to keep the same behavior as current primitive
+    auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
+    cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
+    auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
+    auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
+    auto buf = tmpPointer.data();
+    auto bufSize = scale->get_output_tensor(0).size();
+
+    if (bufSize != constLayout.bytes_count())
+        THROW_IE_EXCEPTION << "Invalid scales buffer in NormalizeL2 op " << op->get_friendly_name();
+
+    std::memcpy(&buf[0], scale->get_data_ptr(), bufSize);
+    auto scalesName = layerName + "_cldnn_input_scales";
+    p.AddPrimitive(cldnn::data(scalesName, mem));
+    p.AddInnerPrimitiveToProfiler(scalesName, layerName, op);
+
+    auto normPrim = cldnn::normalize(layerName,
+                                     inputPrimitives[0],
+                                     scalesName,
+                                     across_spatial,
+                                     eps);
+
+    p.AddPrimitive(normPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, NormalizeL2);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/one_hot.cpp
+++ b/inference-engine/src/cldnn_engine/ops/one_hot.cpp
@ -0,0 +1,64 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+#include "transformations/utils/utils.hpp"
+
+#include "ngraph/op/one_hot.hpp"
+
+#include "api/one_hot.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::OneHot>& op) {
+    p.ValidateInputs(op, {4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    int16_t axis = op->get_axis();
+    auto on_value_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+    auto off_value_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(3));
+
+    if (on_value_node == nullptr || off_value_node == nullptr)
+        THROW_IE_EXCEPTION << "Unsupported on/off node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+    float on_value;
+    float off_value;
+
+    if (!ngraph::op::util::get_single_value(on_value_node, on_value) ||
+        !ngraph::op::util::get_single_value(off_value_node, off_value)) {
+        THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+
+    auto dims = op->get_input_shape(0);
+
+    if (axis < -1 || axis > static_cast<int16_t>(dims.size()))
+        THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect OneHot axis value: " << axis << ". Should be between -1 and " << dims.size();
+
+    if (axis == -1) {
+        axis = dims.size();
+        for (int i = dims.size() - 1; i >= 0; i--) {
+            if (dims[i] == 1)
+                axis--;
+            else
+                break;
+        }
+    }
+
+    auto oneHotPrim = cldnn::one_hot(layerName,
+                                     inputPrimitives[0],
+                                     CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                     DataTypeFromPrecision(op->get_output_element_type(0)),
+                                     static_cast<uint16_t>(axis),
+                                     on_value,
+                                     off_value);
+
+    p.AddPrimitive(oneHotPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, OneHot);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/pad.cpp
+++ b/inference-engine/src/cldnn_engine/ops/pad.cpp
@ -0,0 +1,75 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+#include "transformations/utils/utils.hpp"
+
+#include "ngraph/op/pad.hpp"
+
+#include "api/border.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::border_type GetBorderType(ngraph::op::PadMode mode) {
+    switch (mode) {
+        case ngraph::op::PadMode::CONSTANT: return cldnn::border_type::constant;
+        case ngraph::op::PadMode::EDGE: return cldnn::border_type::edge;
+        case ngraph::op::PadMode::REFLECT: return cldnn::border_type::mirror_101;
+        case ngraph::op::PadMode::SYMMETRIC: return cldnn::border_type::mirror;
+        default: THROW_IE_EXCEPTION << "Invalid border mode " << mode << " in layer ";
+    }
+    return cldnn::border_type::constant;
+}
+
+static std::vector<int32_t> GetPermuteOrder(const ngraph::CoordinateDiff& ie_order) {
+    std::vector<int32_t> cldnn_order(ie_order.begin(), ie_order.end());
+
+    // 1. Align to min. 4 sizes
+    if (cldnn_order.size() < 4)
+        cldnn_order.push_back(0);
+
+    // 2. Swap spatial positions
+    for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
+        std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
+    }
+
+    return cldnn_order;
+}
+
+void CreatePadOp(Program& p, const std::shared_ptr<ngraph::op::v1::Pad>& op) {
+    p.ValidateInputs(op, {3, 4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto pads_begin = cldnn::tensor(GetPermuteOrder(op->get_pads_begin()), 0);
+    auto pads_end = cldnn::tensor(GetPermuteOrder(op->get_pads_end()), 0);
+    float pad_value = 0.f;
+
+    if (op->get_input_size() == 4) {
+        auto const_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(3));
+        if (!const_node) {
+            THROW_IE_EXCEPTION << "Unsupported const node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+        if (!ngraph::op::util::get_single_value(const_node, pad_value)) {
+            THROW_IE_EXCEPTION << "Unsupported pad value in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+    }
+
+    cldnn::border_type border_mode = GetBorderType(op->get_pad_mode());
+
+    auto tilePrim = cldnn::border(layerName,
+                                  inputPrimitives[0],
+                                  pads_begin,
+                                  pads_end,
+                                  border_mode,
+                                  pad_value);
+
+    p.AddPrimitive(tilePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Pad);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/parameter.cpp
+++ b/inference-engine/src/cldnn_engine/ops/parameter.cpp
@ -0,0 +1,257 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/parameter.hpp"
+
+#include "api/input_layout.hpp"
+#include "api/reorder.hpp"
+#include "api/data.hpp"
+
+using namespace InferenceEngine;
+
+namespace CLDNNPlugin {
+
+void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Parameter>& op) {
+    auto networkInputs = p.GetNetworkInputs();
+    if (networkInputs.find(op->get_friendly_name()) == networkInputs.end()) {
+        THROW_IE_EXCEPTION << "Can't find input " << op->get_friendly_name() << " in InputsDataMap";
+    }
+
+    auto inputInfo = networkInputs.at(op->get_friendly_name());
+    // first create and add the input layout
+    const auto inputDesc = inputInfo->getTensorDesc();
+    const auto inputDims = inputDesc.getDims();
+    Layout l = inputDesc.getLayout();
+    Precision ip = inputDesc.getPrecision();
+
+    cldnn::format inputFormat = cldnn::format::bfyx;
+    if (Layout::BLOCKED == l && 6 == inputDims.size()) {
+        inputFormat = cldnn::format::bfwzyx;
+    } else {
+        inputFormat = FormatFromLayout(l);
+    }
+
+    cldnn::tensor dataTensor;
+    cldnn::tensor::value_type batch = (p.m_max_batch <= 1)
+                                    ? (inputDims.size() > 3 ? TensorValue(inputDims[0]) : 1)
+                                    : TensorValue(p.m_curBatch);
+    switch (inputDims.size()) {
+    case 6:
+        dataTensor = cldnn::tensor(cldnn::batch(batch),
+                                   cldnn::feature(inputDims[1]),
+                                   cldnn::spatial(inputDims[5], inputDims[4], inputDims[3], inputDims[2]));
+        break;
+    case 5:
+        if (Layout::NCDHW == l) {
+            dataTensor = cldnn::tensor(cldnn::batch(batch),
+                                       cldnn::feature(inputDims[1]),
+                                       cldnn::spatial(inputDims[4], inputDims[3], inputDims[2]));
+        } else {
+            THROW_IE_EXCEPTION  << "Unsupported layout (" << l << ") in 5D input " << inputInfo->name();
+        }
+        break;
+    case 4:
+        if (Layout::NCHW == l || Layout::CHW == l) {
+            dataTensor = cldnn::tensor(batch,
+                                       TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
+        } else if (Layout::NHWC == l) {
+            dataTensor = cldnn::tensor(batch,
+                                       TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
+        } else {
+            THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 4D input " + inputInfo->name();
+        }
+        break;
+    case 3:
+        if (Layout::CHW == l) {
+            dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[2]));
+        } else {
+            THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 3D input " + inputInfo->name();
+        }
+        break;
+    case 2:
+        if (Layout::NCHW == l || NC == l) {
+            dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, 1);
+        } else {
+            THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 2D input " << inputInfo->name();
+        }
+        break;
+    case 1:
+        dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1);
+        break;
+    case 0:
+        dataTensor = cldnn::tensor(1, 1, 1, 1);
+        break;
+    default: THROW_IE_EXCEPTION << "Invalid data dimensions";
+    }
+    cldnn::layout networkInputLayout(DataTypeFromPrecision(ip),
+                                     inputFormat,
+                                     dataTensor);
+
+    // look at the expected color format of this input
+    auto inputName = layer_type_name_ID(op);
+    auto preProcess = inputInfo->getPreProcess();
+    size_t meanChannels = preProcess.getNumberOfChannels();
+    networkInputLayout.format = inputFormat;
+    networkInputLayout.size = networkInputLayout.size.transform(inputFormat, 1);
+    networkInputLayout.data_type = DataTypeFromPrecision(op->get_output_element_type(0));
+    auto preprocessPrimID = "reorder:" + inputName + Program::m_preProcessTag;
+    cldnn::primitive_id meanBlobID = inputName + Program::m_meanValuesTag;
+    std::vector<float> meanValues;
+
+    if ((meanChannels > 0) &&
+        (meanChannels != networkInputLayout.size.feature[0])) {
+        THROW_IE_EXCEPTION << "Mismatched mean values channels in input " << inputName;
+    }
+
+    switch (preProcess.getMeanVariant()) {
+    case NONE:
+    case MEAN_VALUE: {
+        if (meanChannels > 0) {
+            for (size_t c = 0; c < meanChannels; c++) {
+                if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
+                    THROW_IE_EXCEPTION << "not supporting stdScale yet in input " << inputName;
+                meanValues.push_back(preProcess[c]->meanValue);
+            }
+        }
+        break;
+    }
+    case MEAN_IMAGE: {
+        IE_ASSERT(meanChannels);
+        // first merge all mean values to a single blob
+        // todo make sure mean blob precision is the same as the input precision
+        auto meanDims = inputDims;
+        // overwrite batches with 1
+        switch (meanDims.size()) {
+        case 4: meanDims[0] = 1;
+            break;
+        default:
+            THROW_IE_EXCEPTION << "Missing batch dimensions in input image";
+        }
+        const TensorDesc desc(Precision::FP32, meanDims, TensorDesc::getLayoutByDims(meanDims));
+        TBlob<float> meanBlob(desc);
+        meanBlob.allocate();
+        auto meanBlobData = meanBlob.data();
+        for (size_t c = 0; c < meanChannels; c++) {
+            if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
+                THROW_IE_EXCEPTION << "not supporting stdScale yet in input " << inputName;
+            auto channelMeanBlob = std::dynamic_pointer_cast<TBlob<float>>(preProcess[c]->meanData);
+            auto channelSize = channelMeanBlob->size();
+            auto channelBlobData = channelMeanBlob->data();
+            for (size_t i = 0; i < channelSize; i++) {
+                meanBlobData[(c * channelSize) + i] = channelBlobData[i];
+            }
+        }
+        // then create a data primitive for the mean values
+        auto meanBlobPtr = std::make_shared<TBlob<float>>(meanBlob);
+
+        // mean values will use external format (sub in the input format before convert to new format)
+        cldnn::tensor meanBlobTensor(networkInputLayout.size);
+        meanBlobTensor.batch[0] = 1;  // mean values have no batches
+        cldnn::layout meanBlobLayout(cldnn::data_types::f32, cldnn::format::bfyx, meanBlobTensor);
+
+        auto data = static_cast<const char *>(meanBlobPtr->buffer());
+
+        auto bufIter = p.blobMemCache.find(data);
+        if (bufIter != p.blobMemCache.end()) {
+            meanBlobID = bufIter->second;
+        } else {
+            auto mem = cldnn::memory::allocate(p.GetEngine(), meanBlobLayout, 0, false);
+            auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
+            auto buf = tmpPointer.data();
+            auto bufSize = meanBlobLayout.bytes_count();
+
+            std::memcpy(&buf[0], &data[0], bufSize);
+
+            p.AddPrimitive(cldnn::data(meanBlobID, mem));
+            p.blobMemCache[data] = meanBlobID;
+        }
+        break;
+    }
+    default: THROW_IE_EXCEPTION << "Invalid mean variant in input " << inputName;
+        break;
+    }
+
+    if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
+        // for NV12, create two input layouts with reorder instead of one,
+        // and then would expect compound blob in inferRequest
+        if (Layout::NCHW != l &&
+            (Precision::I8 != ip || Precision::U8 != ip)) {
+            THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") or precision "
+                               << ip.name() << ") for NV12 input " + inputInfo->name();
+        }
+        int height = inputDims[2];
+        int width = inputDims[3];
+
+        std::string y_name = inputName + "_Y";
+        std::string uv_name = inputName + "_UV";
+
+        cldnn::layout y_layout(DataTypeFromPrecision(ip),
+                                cldnn::format::nv12, { 1, 1, width, height });
+        cldnn::layout uv_layout(DataTypeFromPrecision(ip),
+                                cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
+        auto inputY = cldnn::input_layout(y_name, y_layout);
+        auto inputUV = cldnn::input_layout(uv_name, uv_layout);
+
+        p.AddPrimitive(inputY);
+        p.inputLayouts.insert({ inputInfo->name() + "_Y", y_layout });
+        p.AddPrimitive(inputUV);
+        p.inputLayouts.insert({ inputInfo->name() + "_UV", uv_layout });
+        switch (preProcess.getMeanVariant()) {
+        case NONE:
+        case MEAN_VALUE: {
+            p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanValues));
+            break;
+        }
+        case MEAN_IMAGE: {
+            p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID));
+            break;
+        }
+        default: THROW_IE_EXCEPTION << "Invalid mean variant in input " + inputName;
+            break;
+        }
+
+        p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
+        p.primitivesToIRLayersMap[y_name] = { inputInfo->name() };
+        p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() };
+        p.profilingIDs.push_back(preprocessPrimID);
+        p.InitProfileInfo(preprocessPrimID, "Reorder");
+    } else {
+        cldnn::layout inputLayout(networkInputLayout);
+        inputLayout.data_type = DataTypeFromPrecision(ip);
+        p.inputLayouts.insert({ inputInfo->name(), inputLayout });
+
+        p.AddPrimitive(cldnn::input_layout(inputName, inputLayout));
+        p.primitivesToIRLayersMap[inputName] = { inputInfo->name() };
+
+        switch (preProcess.getMeanVariant()) {
+        case NONE:
+        case MEAN_VALUE: {
+            p.AddPrimitive(cldnn::reorder(preprocessPrimID, inputName, networkInputLayout, meanValues));
+            break;
+        }
+        case MEAN_IMAGE: {
+            p.AddPrimitive(cldnn::reorder(preprocessPrimID,
+                                        inputName,
+                                        networkInputLayout,
+                                        meanBlobID));
+            break;
+        }
+        default: THROW_IE_EXCEPTION << "Invalid mean variant in input " << inputName;
+            break;
+        }
+        p.InitProfileInfo(preprocessPrimID, "reorder");
+        p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
+        p.profilingIDs.push_back(preprocessPrimID);
+    }
+
+    p.primitiveIDs[inputName] = preprocessPrimID;
+    p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
+}
+
+REGISTER_FACTORY_IMPL(v0, Parameter);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/pooling.cpp
+++ b/inference-engine/src/cldnn_engine/ops/pooling.cpp
@ -0,0 +1,101 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/max_pool.hpp"
+#include "ngraph/op/avg_pool.hpp"
+
+#include "api/pooling.hpp"
+
+namespace CLDNNPlugin {
+
+struct PoolingParameters {
+    cldnn::tensor kernel;
+    cldnn::tensor stride;
+    cldnn::tensor pad_begin;
+    cldnn::tensor pad_end;
+};
+
+static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
+                                              const ngraph::Strides& strides,
+                                              const ngraph::Shape& pads_begin,
+                                              const ngraph::Shape& pads_end) {
+    cldnn::tensor k, s, pb, pe;
+    if (pads_begin.size() != strides.size() || pads_end.size() != strides.size() || kernel.size() != strides.size())
+        THROW_IE_EXCEPTION << "Strides, KernelSizes and Pads are supposed to have the same elements count";
+
+    std::vector<cldnn::tensor::value_type> pb_casted(pads_begin.begin(), pads_begin.end());
+    std::vector<cldnn::tensor::value_type> pe_casted(pads_end.begin(), pads_end.end());
+    switch (strides.size()) {
+        case 3: {
+            k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[2], kernel[1], kernel[0]));
+            s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
+            pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[2], -pb_casted[1], -pb_casted[0]));
+            pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[2], -pe_casted[1], -pe_casted[0]));
+            break;
+        }
+        case 2: {
+            k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[1], kernel[0], 1));
+            s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
+            pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[1], -pb_casted[0], 0));
+            pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[1], -pe_casted[0], 0));
+            break;
+        }
+        case 1: {
+            k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[0], 1, 1));
+            s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
+            pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[0], 0, 0));
+            pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[0], 0, 0));
+            break;
+        }
+        default: THROW_IE_EXCEPTION << "Unsupported pooling parameters size. Only 1d, 2d, and 3d cases are supported";
+    }
+
+    return {k, s, pb, pe};
+}
+
+void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::AvgPool>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto params = GetPoolingParameters(op->get_kernel(), op->get_strides(), op->get_pads_begin(), op->get_pads_end());
+    auto poolPrim = cldnn::pooling(layerName,
+                                   inputPrimitives[0],
+                                   op->get_exclude_pad() ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average,
+                                   params.kernel,
+                                   params.stride,
+                                   params.pad_begin,
+                                   CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                   DataTypeFromPrecision(op->get_output_element_type(0)));
+    poolPrim.pad_end = params.pad_end;
+    p.AddPrimitive(poolPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::MaxPool>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto params = GetPoolingParameters(op->get_kernel(), op->get_strides(), op->get_pads_begin(), op->get_pads_end());
+    auto poolPrim = cldnn::pooling(layerName,
+                                   inputPrimitives[0],
+                                   cldnn::pooling_mode::max,
+                                   params.kernel,
+                                   params.stride,
+                                   params.pad_begin,
+                                   CldnnTensorFromIEDims(op->get_output_shape(0)),
+                                   DataTypeFromPrecision(op->get_output_element_type(0)));
+    poolPrim.pad_end = params.pad_end;
+    p.AddPrimitive(poolPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, MaxPool);
+REGISTER_FACTORY_IMPL(v1, AvgPool);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/prior_box.cpp
+++ b/inference-engine/src/cldnn_engine/ops/prior_box.cpp
@ -0,0 +1,115 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/prior_box.hpp"
+#include "ngraph/op/prior_box_clustered.hpp"
+
+#include "api/prior_box.hpp"
+
+namespace CLDNNPlugin {
+
+void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBoxClustered>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto attrs = op->get_attrs();
+
+    std::vector<float> width = attrs.widths;
+    std::vector<float> height = attrs.heights;
+    std::vector<float> variance = attrs.variances;
+    float offset = attrs.offset;
+    bool clip = attrs.clip;
+
+    auto inp_dims = op->get_input_shape(0);
+    auto img_dims = op->get_input_shape(1);
+
+    int img_w = static_cast<int>(img_dims.back());
+    int img_h = static_cast<int>(img_dims.at(img_dims.size() - 2));
+    cldnn::tensor img_size = (cldnn::tensor) cldnn::spatial(TensorValue(img_w), TensorValue(img_h));
+
+    auto step_w = attrs.step_widths;
+    auto step_h = attrs.step_heights;
+    if (std::abs(attrs.step_heights - attrs.step_widths) < 1e-5) {
+        step_w = attrs.step_widths;
+        step_h = attrs.step_widths;
+    }
+
+    if (step_w == 0.0f && step_h == 0.0f) {
+        step_w = static_cast<float>(img_w) / inp_dims.back();
+        step_h = static_cast<float>(img_h) / inp_dims.at(img_dims.size() - 2);
+    }
+
+    auto priorBoxPrim = cldnn::prior_box(layerName,
+                                         inputPrimitives[0],
+                                         img_size,
+                                         clip,
+                                         variance,
+                                         step_w,
+                                         step_h,
+                                         offset,
+                                         width,
+                                         height,
+                                         DataTypeFromPrecision(op->get_output_element_type(0)));
+
+    p.AddPrimitive(priorBoxPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreatePriorBoxOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBox>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto attrs = op->get_attrs();
+    // params
+    std::vector<float> min_size = attrs.min_size;
+    std::vector<float> max_size = attrs.max_size;
+    std::vector<float> aspect_ratio = attrs.aspect_ratio;
+    std::vector<float> variance = attrs.variance;
+    std::vector<float> fixed_size = attrs.fixed_size;
+    std::vector<float> fixed_ratio = attrs.fixed_ratio;
+    std::vector<float> density = attrs.density;
+    bool flip = attrs.flip;
+    bool clip = attrs.clip;
+    bool scale_all_sizes = attrs.scale_all_sizes;
+    float offset = attrs.offset;
+
+    auto step_w = attrs.step;
+    auto step_h = attrs.step;
+
+    auto img_dims = op->get_input_shape(1);
+
+    auto wdim = img_dims.back();
+    auto hdim = img_dims.at(img_dims.size()-2);
+
+    cldnn::tensor img_size = (cldnn::tensor) cldnn::spatial(TensorValue(wdim), TensorValue(hdim));
+    auto priorBoxPrim = cldnn::prior_box(layerName,
+                                         inputPrimitives[0],
+                                         img_size,
+                                         min_size,
+                                         max_size,
+                                         aspect_ratio,
+                                         flip,
+                                         clip,
+                                         variance,
+                                         step_w,
+                                         step_h,
+                                         offset,
+                                         scale_all_sizes,
+                                         fixed_ratio,
+                                         fixed_size,
+                                         density);
+
+    p.AddPrimitive(priorBoxPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, PriorBoxClustered);
+REGISTER_FACTORY_IMPL(v0, PriorBox);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/proposal.cpp
+++ b/inference-engine/src/cldnn_engine/ops/proposal.cpp
@ -0,0 +1,146 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/proposal.hpp"
+
+#include "api/proposal.hpp"
+#include "api/mutable_data.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal>& op) {
+    p.ValidateInputs(op, {3});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+
+    auto attrs = op->get_attrs();
+    float nms_thresh = attrs.nms_thresh;
+    int min_size = attrs.min_size;
+    int feature_stride = attrs.feat_stride;
+    int pre_nms_topn = attrs.pre_nms_topn;
+    int post_nms_topn = attrs.post_nms_topn;
+    const std::vector<float> ratio = attrs.ratio;
+    const std::vector<float> scale = attrs.scale;
+    float box_coordinate_scale = attrs.box_coordinate_scale;
+    float box_size_scale = attrs.box_size_scale;
+    int base_size = attrs.base_size;
+    std::string framework = attrs.framework;
+    bool normalize = attrs.normalize;
+    bool clip_before_nms = attrs.clip_before_nms;
+    bool clip_after_nms = attrs.clip_after_nms;
+
+    float coordinates_offset;
+    bool swap_xy;
+    bool initial_clip;
+    bool round_ratios;
+    bool shift_anchors;
+
+    if (framework == "tensorflow") {
+        coordinates_offset = 0.0f;
+        initial_clip = true;
+        shift_anchors = true;
+        round_ratios = false;
+        swap_xy = true;
+    } else {
+        coordinates_offset = 1.0f;
+        initial_clip = false;
+        shift_anchors = false;
+        round_ratios = true;
+        swap_xy = false;
+    }
+
+    if (op->get_output_size() == 2) {
+        auto mutable_precision = op->get_output_element_type(1);
+        if (mutable_precision == ngraph::element::i64) {
+            mutable_precision = ngraph::element::i32;
+        }
+
+        cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
+                                                    DefaultFormatForDims(op->get_output_shape(1).size()),
+                                                    CldnnTensorFromIEDims(op->get_output_shape(1)));
+
+        auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
+
+        cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
+        auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);
+        p.primitivesToIRLayersMap[proposal_mutable_id_w] = { op->get_friendly_name() };
+        p.primitiveIDs[proposal_mutable_id_w] = proposal_mutable_id_w;
+        p.AddPrimitive(argmax_mutable_prim);
+        inputPrimitives.push_back(proposal_mutable_id_w);
+
+        std::string proposalLayerName = layer_type_name_ID(op) + ".0";
+        auto proposalPrim = cldnn::proposal(proposalLayerName,
+                                            inputPrimitives[0],  // cls_score
+                                            inputPrimitives[1],  // bbox_pred
+                                            inputPrimitives[2],  // im_info
+                                            inputPrimitives[3],  // second_output
+                                            0,                   // max_num_proposals is unused
+                                            nms_thresh,
+                                            base_size,
+                                            min_size,
+                                            feature_stride,
+                                            pre_nms_topn,
+                                            post_nms_topn,
+                                            ratio,
+                                            scale,
+                                            coordinates_offset,
+                                            box_coordinate_scale,
+                                            box_size_scale,
+                                            false,
+                                            swap_xy,
+                                            initial_clip,
+                                            clip_before_nms,
+                                            clip_after_nms,
+                                            round_ratios,
+                                            shift_anchors,
+                                            normalize);
+
+        p.AddPrimitive(proposalPrim);
+
+        cldnn::primitive_id proposal_mutable_id_r = layer_type_name_ID(op) + ".1";
+        auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, { proposalLayerName }, shared_memory);
+        p.primitivesToIRLayersMap[proposal_mutable_id_r] = { op->get_friendly_name() };
+        p.primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r;
+        p.AddPrimitive(argmax_mutable_prim_r);
+
+        p.AddPrimitiveToProfiler(proposalLayerName, op);
+        return;
+    }
+
+    std::string proposalLayerName = layer_type_name_ID(op);
+    auto proposalPrim = cldnn::proposal(proposalLayerName,
+                                        inputPrimitives[0],  // cls_score
+                                        inputPrimitives[1],  // bbox_pred
+                                        inputPrimitives[2],  // im_info
+                                        0,                   // max_num_proposals is unused
+                                        nms_thresh,
+                                        base_size,
+                                        min_size,
+                                        feature_stride,
+                                        pre_nms_topn,
+                                        post_nms_topn,
+                                        ratio,
+                                        scale,
+                                        coordinates_offset,
+                                        box_coordinate_scale,
+                                        box_size_scale,
+                                        false,
+                                        swap_xy,
+                                        initial_clip,
+                                        clip_before_nms,
+                                        clip_after_nms,
+                                        round_ratios,
+                                        shift_anchors,
+                                        normalize);
+
+    p.AddPrimitive(proposalPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, Proposal);
+REGISTER_FACTORY_IMPL(v4, Proposal);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/reduce.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reduce.cpp
@ -0,0 +1,146 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/reduce_sum.hpp"
+#include "ngraph/op/reduce_prod.hpp"
+#include "ngraph/op/reduce_mean.hpp"
+#include "ngraph/op/reduce_logical_or.hpp"
+#include "ngraph/op/reduce_logical_and.hpp"
+#include "ngraph/op/reduce_l1.hpp"
+#include "ngraph/op/reduce_l2.hpp"
+#include "ngraph/op/min.hpp"
+#include "ngraph/op/max.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/reduce.hpp"
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::reduce_mode mode, bool keep_dims) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    size_t rank = op->get_input_shape(0).size();
+
+    auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+    if (!axes_constant) {
+        THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+    std::vector<int32_t> rawAxes = axes_constant->cast_vector<int32_t>();
+
+    std::vector<uint16_t> axes;
+    for (size_t a = 0; a < rawAxes.size(); a++) {
+        if (rawAxes[a] < 0)
+            rawAxes[a] = rawAxes[a] + rank;
+        if (rawAxes[a] < 0 || rawAxes[a] > rank - 1)
+            THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect Reduce axis value: " << rawAxes[a];
+        if (rank == 6) {
+            switch (rawAxes[a]) {
+                case 0: axes.push_back(cldnn::reduce::along_b); break;
+                case 1: axes.push_back(cldnn::reduce::along_f); break;
+                case 2: axes.push_back(cldnn::reduce::along_w); break;
+                case 3: axes.push_back(cldnn::reduce::along_z); break;
+                case 4: axes.push_back(cldnn::reduce::along_y); break;
+                case 5: axes.push_back(cldnn::reduce::along_x); break;
+            }
+        } else if (rank == 5) {
+            switch (rawAxes[a]) {
+                case 0: axes.push_back(cldnn::reduce::along_b); break;
+                case 1: axes.push_back(cldnn::reduce::along_f); break;
+                case 2: axes.push_back(cldnn::reduce::along_z); break;
+                case 3: axes.push_back(cldnn::reduce::along_y); break;
+                case 4: axes.push_back(cldnn::reduce::along_x); break;
+            }
+        } else {
+            switch (rawAxes[a]) {
+                case 0: axes.push_back(cldnn::reduce::along_b); break;
+                case 1: axes.push_back(cldnn::reduce::along_f); break;
+                case 2: axes.push_back(cldnn::reduce::along_y); break;
+                case 3: axes.push_back(cldnn::reduce::along_x); break;
+            }
+        }
+    }
+
+    sort(axes.begin(), axes.end());
+    axes.erase(unique(axes.begin(), axes.end()), axes.end());
+
+    auto reducePrim = cldnn::reduce(layerName,
+                                    inputPrimitives[0],
+                                    mode,
+                                    axes,
+                                    static_cast<int32_t>(keep_dims));
+
+    p.AddPrimitive(reducePrim);
+
+    auto reorderLayerName = layerName + "_reorder";
+    cldnn::format out_format = cldnn::format::any;
+    auto out_dt = DataTypeFromPrecision(op->get_output_element_type(0));
+    if (!keep_dims && rank > 4) {
+        if (rank - rawAxes.size() == 6)
+            out_format = cldnn::format::bfwzyx;
+        else if (rank - rawAxes.size() == 5)
+            out_format = cldnn::format::bfzyx;
+        else if (rank - rawAxes.size() <= 4)
+            out_format = cldnn::format::bfyx;
+
+        auto reorder_prim = cldnn::reorder(reorderLayerName, layerName, out_format, out_dt);
+        p.AddPrimitive(reorder_prim);
+        p.AddPrimitiveToProfiler(op, reorderLayerName);
+    } else {
+        p.AddPrimitiveToProfiler(op);
+    }
+}
+
+void CreateReduceMaxOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceMax>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::max, op->get_keep_dims());
+}
+
+void CreateReduceLogicalAndOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceLogicalAnd>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::logical_and, op->get_keep_dims());
+}
+
+void CreateReduceLogicalOrOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceLogicalOr>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::logical_or, op->get_keep_dims());
+}
+
+void CreateReduceMeanOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceMean>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::mean, op->get_keep_dims());
+}
+
+void CreateReduceMinOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceMin>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::min, op->get_keep_dims());
+}
+
+void CreateReduceProdOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceProd>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::prod, op->get_keep_dims());
+}
+
+void CreateReduceSumOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceSum>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::sum, op->get_keep_dims());
+}
+
+void CreateReduceL1Op(Program& p, const std::shared_ptr<ngraph::op::v4::ReduceL1>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::l1, op->get_keep_dims());
+}
+
+void CreateReduceL2Op(Program& p, const std::shared_ptr<ngraph::op::v4::ReduceL2>& op) {
+    CreateReduceOp(p, op, cldnn::reduce_mode::l2, op->get_keep_dims());
+}
+
+REGISTER_FACTORY_IMPL(v1, ReduceMax);
+REGISTER_FACTORY_IMPL(v1, ReduceLogicalAnd);
+REGISTER_FACTORY_IMPL(v1, ReduceLogicalOr);
+REGISTER_FACTORY_IMPL(v1, ReduceMean);
+REGISTER_FACTORY_IMPL(v1, ReduceMin);
+REGISTER_FACTORY_IMPL(v1, ReduceProd);
+REGISTER_FACTORY_IMPL(v1, ReduceSum);
+REGISTER_FACTORY_IMPL(v4, ReduceL1);
+REGISTER_FACTORY_IMPL(v4, ReduceL2);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/region_yolo.cpp
+++ b/inference-engine/src/cldnn_engine/ops/region_yolo.cpp
@ -0,0 +1,39 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/region_yolo.hpp"
+
+#include "api/region_yolo.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::RegionYolo>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    uint32_t coords = op->get_num_coords();
+    uint32_t classes = op->get_num_classes();
+    uint32_t num = op->get_num_regions();
+    bool do_softmax = op->get_do_softmax();
+    uint32_t mask_size = op->get_mask().size();
+
+    auto regionPrim = cldnn::region_yolo(layerName,
+                                         inputPrimitives[0],
+                                         coords,
+                                         classes,
+                                         num,
+                                         mask_size,
+                                         do_softmax);
+
+    p.AddPrimitive(regionPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, RegionYolo);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reorg_yolo.cpp
@ -0,0 +1,31 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/reorg_yolo.hpp"
+
+#include "api/reorg_yolo.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReorgYolo>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    uint32_t stride = op->get_strides()[0];
+
+    auto reorgPrim = cldnn::reorg_yolo(layerName,
+                                       inputPrimitives[0],
+                                       stride);
+
+    p.AddPrimitive(reorgPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, ReorgYolo);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/reshape.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reshape.cpp
@ -0,0 +1,72 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/reshape.hpp"
+#include "ngraph/op/squeeze.hpp"
+#include "ngraph/op/unsqueeze.hpp"
+
+#include "api/reshape.hpp"
+#include "api/reorder.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
+    p.ValidateInputs(op, {1, 2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto inDims = op->get_input_shape(0);
+    auto outDims = op->get_output_shape(0);
+    auto outTensor = CldnnTensorFromIEDims(outDims);
+
+    // if we convert from or to 5D/6D, additional reorder also required to change format
+    cldnn::primitive_id reshapeInputId = inputPrimitives[0];
+    if (inDims.size() != outDims.size()) {
+        cldnn::primitive_id reorderId = "reorder:" + op->get_friendly_name() + "_reorder";
+        cldnn::format outputFormat = cldnn::format::bfyx;
+
+        switch (outDims.size()) {
+        case 5: outputFormat = cldnn::format::bfzyx; break;
+        case 6: outputFormat = cldnn::format::bfwzyx; break;
+        default: break;
+        }
+
+        cldnn::layout outputLayout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outTensor);
+        p.AddPrimitive(cldnn::reorder(reorderId, reshapeInputId, outputLayout));
+        p.InitProfileInfo(reorderId, "Reorder", false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, layerName);
+        p.primitivesToIRLayersMap[reorderId] = { op->get_friendly_name() };
+        p.primitiveIDs[layerName + "_reorder"] = reorderId;
+        p.primitiveIDs[reorderId] = reorderId;
+        p.profilingIDs.push_back(reorderId);
+        reshapeInputId = reorderId;
+    }
+
+    auto reshapePrim = cldnn::reshape(layerName,
+                                      reshapeInputId,
+                                      outTensor);
+
+    p.AddPrimitive(reshapePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateReshapeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Reshape>& op) {
+    CreateCommonReshapeOp(p, op);
+}
+
+void CreateSqueezeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Squeeze>& op) {
+    CreateCommonReshapeOp(p, op);
+}
+
+void CreateUnsqueezeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Unsqueeze>& op) {
+    CreateCommonReshapeOp(p, op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Reshape);
+REGISTER_FACTORY_IMPL(v0, Squeeze);
+REGISTER_FACTORY_IMPL(v0, Unsqueeze);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/result.cpp
+++ b/inference-engine/src/cldnn_engine/ops/result.cpp
@ -0,0 +1,71 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/result.hpp"
+
+#include "api/reorder.hpp"
+
+using namespace InferenceEngine;
+
+namespace CLDNNPlugin {
+
+void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Result>& op) {
+    OutputsDataMap networkOutputs = p.GetNetworkOutputs();
+    p.ValidateInputs(op, {1});
+
+    auto prev = op->get_input_node_shared_ptr(0);
+    auto inputID = op->get_input_source_output(0).get_tensor().get_name();
+    if (inputID.empty()) {
+        inputID = prev->get_friendly_name();
+        if (prev->get_output_size() > 1) {
+            inputID += "." + std::to_string(op->get_input_source_output(0).get_index());
+        }
+    }
+    auto it = networkOutputs.find(inputID);
+    if (it == networkOutputs.end()) {
+        THROW_IE_EXCEPTION << "Can't find output " << inputID << " in OutputsDataMap";
+    }
+    std::string originalOutName = it->first;
+    DataPtr outputData = it->second;
+
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    const auto outputDesc = outputData->getTensorDesc();
+    const auto outputlayout = outputDesc.getLayout();
+
+    // TODO: add precision check once there's an outputInfo object
+    if (outputlayout != NCHW &&
+        // TODO: change 6d case once new layout added in IE
+        outputlayout != BLOCKED &&
+        outputlayout != NCDHW &&
+        outputlayout != NHWC &&
+        outputlayout != CHW &&
+        outputlayout != NC &&
+        outputlayout != C &&
+        outputlayout != SCALAR) {
+        THROW_IE_EXCEPTION << "Unsupported layout (" << outputlayout << ") in output: " << originalOutName;
+    }
+
+    auto outLayerName = layer_type_name_ID(op);
+    Precision precision = outputData->getPrecision();
+    std::string outputID = inputs[0];
+
+    p.AddPrimitive(cldnn::reorder(outLayerName,
+                                outputID,
+                                FormatFromLayout(outputData->getLayout()),
+                                DataTypeFromPrecision(precision)));
+    p.InitProfileInfo(outLayerName, "reorder");
+    p.profilingIDs.push_back(outLayerName);
+    p.primitiveIDs[outLayerName] = outLayerName;
+    p.primitiveIDs[originalOutName] = outLayerName;
+
+    p.outputDims[originalOutName] = outputDesc.getDims();
+    p.prevPrimitiveIDs[outLayerName] = {originalOutName};
+}
+
+REGISTER_FACTORY_IMPL(v0, Result);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp
+++ b/inference-engine/src/cldnn_engine/ops/reverse_sequence.cpp
@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/reverse_sequence.hpp"
+
+#include "api/reverse_sequence.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReverseSequence>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    size_t batch_axis = op->get_batch_axis();
+    size_t seq_axis = op->get_sequence_axis();
+    auto reverseSequencePrim = cldnn::reverse_sequence(layerName,
+                                                       inputPrimitives[0],
+                                                       inputPrimitives[1],
+                                                       seq_axis,
+                                                       batch_axis);
+
+    p.AddPrimitive(reverseSequencePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, ReverseSequence);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/rnn.cpp
+++ b/inference-engine/src/cldnn_engine/ops/rnn.cpp
@ -0,0 +1,315 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/lstm_cell.hpp"
+#include "ngraph/op/lstm_sequence.hpp"
+
+#include "api/reshape.hpp"
+#include "api/reorder.hpp"
+#include "api/fully_connected.hpp"
+#include "api/lstm.hpp"
+#include "api/crop.hpp"
+#include "api/concatenation.hpp"
+
+namespace CLDNNPlugin {
+cldnn::activation_func GetActivationFunc(std::string name) {
+    static const std::map<std::string, cldnn::activation_func> name_mapping = {
+        {"sigmoid", cldnn::activation_func::logistic},
+        {"tanh", cldnn::activation_func::hyperbolic_tan},
+        {"relu", cldnn::activation_func::relu},
+    };
+    auto itr = name_mapping.find(name);
+    if (itr != name_mapping.end())
+        return itr->second;
+    else
+        return cldnn::activation_func::none;
+}
+
+template <typename T>
+void GetLSTMActivationParams(const std::shared_ptr<T>& op,
+                             std::vector<cldnn::activation_func>& activations,
+                             std::vector<cldnn::activation_additional_params>& activation_params) {
+    activations = { cldnn::activation_func::logistic,
+                    cldnn::activation_func::hyperbolic_tan,
+                    cldnn::activation_func::hyperbolic_tan };
+    activation_params = {};
+    auto op_activations = op->get_activations();
+    if (!op_activations.empty()) {
+        if (op_activations.size() != 3)
+            THROW_IE_EXCEPTION << "Wrong number of activations for LSTMCell op " << op->get_friendly_name();
+        for (int i = 0; i < 3; i++) {
+            auto af = GetActivationFunc(op_activations[i]);
+            if (af == cldnn::activation_func::none)
+                THROW_IE_EXCEPTION << "Wrong or unsupported activation type " << op_activations[i]
+                << " for LSTMCell op " << op->get_friendly_name();
+            activations[i] = af;
+        }
+    }
+    auto op_a = op->get_activations_alpha();
+    auto op_b = op->get_activations_beta();
+    if (!op_a.empty()) {
+        if (op_a.size() != 3 || op_b.size() != 3)
+            THROW_IE_EXCEPTION << "Wrong number of activation parameters for LSTMCell op " << op->get_friendly_name();
+        for (int i = 0; i < 3; i++) {
+            cldnn::activation_additional_params params = { op_a[i], op_b[i] };
+            activation_params.push_back(cldnn::activation_additional_params(params));
+        }
+    }
+}
+
+void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell>& op) {
+    p.ValidateInputs(op, {6});
+    int lstm_batch_size, lstm_input_size, lstm_hidden_size;
+    bool hasBias = true;
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+
+    std::string layerName = layer_type_name_ID(op);
+    cldnn::primitive_id weightID = inputPrimitives[3];
+    cldnn::primitive_id recurrentID = inputPrimitives[4];
+    cldnn::primitive_id biasID = inputPrimitives[5];
+
+    /* check incoming CNN layer and setup required variables */
+    {
+        const auto in_dims0 = op->get_input_shape(0);
+        const auto out_dims0 = op->get_output_shape(0);
+
+        if (in_dims0.size() != 2 ||
+            op->get_input_shape(1).size() != 2 ||
+            op->get_input_shape(2).size() != 2)
+            THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell op " << op->get_friendly_name();
+
+        lstm_input_size = in_dims0.back();
+        lstm_batch_size = in_dims0.at(in_dims0.size()-2);
+        lstm_hidden_size = out_dims0.back();
+    }
+
+    std::vector<cldnn::activation_func> activations;
+    std::vector<cldnn::activation_additional_params> activation_params;
+    GetLSTMActivationParams(op, activations, activation_params);
+    float clip = op->get_clip();
+
+    //  LSTM primitive works with single precision for all in/out/weights tensors
+    auto lstm_dtype = DataTypeFromPrecision(op->get_output_element_type(0));
+
+    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
+    cldnn::primitive_id permuteID = layerName + "_inputReorder";
+    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
+    cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder";
+    cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape";
+    cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder";
+    cldnn::primitive_id input_concatID = layerName + "_inputConcat";
+
+    cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
+    cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
+    cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
+    cldnn::layout hiddenLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inStateShape);
+    p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
+    p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
+
+    p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
+
+    std::string hiddenInResh = inHiddenReshapeID + "_1";
+    std::string hiddenInStr = inHiddenReorderID + "_1";
+    std::string cellInResh = inHiddenReshapeID + "_2";
+    std::string cellInStr = inHiddenReorderID + "_2";
+    p.AddPrimitive(cldnn::reshape(hiddenInResh, inputPrimitives[1], inStateShape));
+    p.AddPrimitive(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout));
+    p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape));
+    p.AddPrimitive(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
+    p.AddPrimitive(cldnn::concatenation(input_concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
+
+    p.AddInnerPrimitiveToProfiler(hiddenInResh, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(hiddenInStr, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(cellInResh, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(cellInStr, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(input_concatID, op->get_friendly_name(), op);
+
+    cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
+    cldnn::layout gemmLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, gemmSz);
+    cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
+    cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
+
+    std::string lstm_fc_id = layerName + "_fully_connected";
+    std::string lstm_elt_id = layerName + "_lstm_elt";
+    std::string crop_id = layerName + "_crop";
+
+    cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
+    p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_f));
+    p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);
+
+    p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : ""));
+    p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
+    p.AddPrimitive(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
+    p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
+                                 clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
+
+    p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(gemmReshapeID, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(gemmReorderID, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);
+
+    cldnn::primitive_id outputHiddenID = layerName + ".0";
+    p.AddPrimitive(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
+    p.AddInnerPrimitiveToProfiler(outputHiddenID, op->get_friendly_name(), op);
+    cldnn::primitive_id outputCellID = layerName + ".1";
+    p.AddPrimitive(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
+    p.AddInnerPrimitiveToProfiler(outputCellID, op->get_friendly_name(), op);
+
+    // output primitive IDs
+    p.primitiveIDs[outputHiddenID] = outputHiddenID;     // LSTMCell:LSTMCell - "concat hidden"
+    p.primitiveIDs[layerName] = outputHiddenID;          // LSTMCell:LSTMCell:0 - hidden state
+    p.primitiveIDs[outputCellID] = outputCellID;         // LSTMCell:LSTMCell:1 - cell state
+
+    p.AddPrimitiveToProfiler(layerName, op, outputHiddenID);
+}
+
+void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTMSequence>& op) {
+    p.ValidateInputs(op, {7});
+
+    std::string layerName = layer_type_name_ID(op);
+    int lstm_batch_size, lstm_input_size, lstm_hidden_size, lstm_sequence_len;
+
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    cldnn::primitive_id weightID = inputPrimitives[4];
+    cldnn::primitive_id recurrentID = inputPrimitives[5];
+    cldnn::primitive_id biasID = inputPrimitives[6];
+
+    {
+        const auto in_dims0 = op->get_input_shape(0);
+        const auto out_dims0 = op->get_output_shape(0);
+
+        if (in_dims0.size() != 3 ||
+            op->get_input_shape(1).size() != 3 ||
+            op->get_input_shape(2).size() != 3)
+            THROW_IE_EXCEPTION << "Wrong input shapes for LSTMSequence op " << op->get_friendly_name();
+
+        lstm_input_size = in_dims0.back();
+        lstm_sequence_len = in_dims0.at(in_dims0.size() - 2);
+        lstm_batch_size = in_dims0.at(in_dims0.size() - 3);
+        lstm_hidden_size = out_dims0.back();
+    }
+
+    std::vector<cldnn::activation_func> activations;
+    std::vector<cldnn::activation_additional_params> activation_params;
+    GetLSTMActivationParams(op, activations, activation_params);
+    float clip = op->get_clip();
+    bool isForward = op->get_direction() == ngraph::op::RecurrentSequenceDirection::FORWARD;
+
+    //  LSTM primitive works with single precision for all in/out/weights tensors
+    auto lstm_dtype = DataTypeFromPrecision(op->get_output_element_type(0));
+
+    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
+    cldnn::primitive_id permuteID = layerName + "_inputReorder";
+    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
+    cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder";
+    cldnn::primitive_id inHiddenStateID = inHiddenReshapeID + "_1";
+    cldnn::primitive_id inCellStateID = inHiddenReshapeID + "_2";
+
+    std::vector<cldnn::primitive_id> output_ids_offsets;
+
+    cldnn::tensor inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
+    cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
+    cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
+    p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
+    p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
+
+    p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape));
+    p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape));
+
+    p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(inHiddenStateID, op->get_friendly_name(), op);
+    p.AddInnerPrimitiveToProfiler(inCellStateID, op->get_friendly_name(), op);
+
+    cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
+    cldnn::layout gemmLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, gemmSz);
+    cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
+    cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
+    cldnn::primitive_id hiddenStr = inHiddenReshapeID + "_1";
+    cldnn::primitive_id cellStr = inHiddenReshapeID + "_2";
+    cldnn::primitive_id inputCropID = layerName + "_inputCrop";
+
+    cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
+    p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_y));
+    p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);
+
+    std::vector<size_t> WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) };
+    cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape";
+    auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize));
+    p.AddPrimitive(reshapeInPrim);
+    p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op);
+
+    for (int i = 0; i < lstm_sequence_len; ++i) {
+        const std::string id_str = std::to_string(i);
+        cldnn::primitive_id concatID = layerName + "_inputConcat" + id_str;
+        cldnn::primitive_id lstm_fc_id = layerName + "_fully_connected" + id_str;
+        cldnn::primitive_id lstm_fc_resh_id = layerName + "_gemmReshape" + id_str;
+        cldnn::primitive_id lstm_fc_reor_id = layerName + "_gemmReorder" + id_str;
+        cldnn::primitive_id lstm_elt_id = layerName + "_lstm_elt" + id_str;
+        cldnn::primitive_id crop_id = layerName + "_crop" + id_str;
+
+        int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
+        const std::string seqIdx_str = std::to_string(seqIdx);
+
+        cldnn::tensor crop_tensor{ inputShape.batch[0], 1, inputShape.spatial[0], inputShape.spatial[1] };
+        cldnn::tensor offset_tensor{ 0, static_cast<cldnn::tensor::value_type>(seqIdx), 0, 0 };
+        cldnn::primitive_id inputCrop_id = inputCropID + ":" + seqIdx_str;
+        p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor));
+        p.AddInnerPrimitiveToProfiler(inputCrop_id, op->get_friendly_name(), op);
+
+        p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x));
+        p.AddInnerPrimitiveToProfiler(concatID, op->get_friendly_name(), op);
+        p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID));
+        p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
+
+        p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
+        p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
+        p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr,
+                                     clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
+        p.AddInnerPrimitiveToProfiler(lstm_fc_resh_id, op->get_friendly_name(), op);
+        p.AddInnerPrimitiveToProfiler(lstm_fc_reor_id, op->get_friendly_name(), op);
+        p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);
+
+        hiddenStr = crop_id + ":hidden";
+        cellStr = crop_id + ":cell";
+        p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
+        p.AddInnerPrimitiveToProfiler(hiddenStr, op->get_friendly_name(), op);
+        output_ids_offsets.push_back(hiddenStr);
+
+        if (i < lstm_sequence_len - 1) {
+            p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
+            p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
+        } else {
+            // last hidden state crop (output 2)
+            cldnn::primitive_id outputHiddenID = layerName + ".1";
+            p.primitiveIDs[hiddenStr] = hiddenStr;
+            p.primitiveIDs[outputHiddenID] = hiddenStr;
+
+            // last cell state crop (output 3)
+            p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
+            cldnn::primitive_id outputCellID = layerName + ".2";
+            p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
+            p.primitiveIDs[outputCellID] = cellStr;
+        }
+    }
+
+    if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
+    // concatenated hidden state (output 1)
+    cldnn::primitive_id outputConcatID = layerName + ".0";
+    cldnn::primitive_id concatStr = layerName + ":hiddenConcat";
+    p.AddPrimitive(cldnn::concatenation(concatStr, output_ids_offsets, cldnn::concatenation::along_f));
+
+    p.primitiveIDs[outputConcatID] = concatStr;
+    p.primitiveIDs[layerName] = concatStr;
+    p.AddPrimitiveToProfiler(layerName, op);
+}
+
+REGISTER_FACTORY_IMPL(v4, LSTMCell);
+REGISTER_FACTORY_IMPL(v5, LSTMSequence);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp
+++ b/inference-engine/src/cldnn_engine/ops/roi_pooling.cpp
@ -0,0 +1,122 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/roi_pooling.hpp"
+#include "ngraph/op/psroi_pooling.hpp"
+#include "ngraph/op/deformable_psroi_pooling.hpp"
+
+#include "api/roi_pooling.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::pooling_mode GetPoolingMode(std::string method) {
+    if (method == "bilinear")
+        return cldnn::pooling_mode::bilinear;
+    else if (method == "max")
+        return cldnn::pooling_mode::max;
+    else if (method == "average")
+        return cldnn::pooling_mode::average;
+    else
+        return cldnn::pooling_mode::deformable_bilinear;
+}
+
+void CreateDeformablePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v1::DeformablePSROIPooling>& op) {
+    p.ValidateInputs(op, {2, 3});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    cldnn::pooling_mode mode = GetPoolingMode(op->get_mode());
+    float trans_std = op->get_trans_std();
+    int part_size = op->get_part_size();
+    bool no_trans = op->get_input_size() == 2 ? true : false;
+
+    // temporary workaround due to incorrect usage of group_size in the nGraph operation for the DeformablePSROIPooling
+    int pooled_width = op->get_group_size();
+    int pooled_height = op->get_group_size();
+    int group_size = op->get_group_size();
+    int output_dim = op->get_output_dim();
+    float spatial_scale = op->get_spatial_scale();
+    int spatial_bins_x = op->get_spatial_bins_x();
+    int spatial_bins_y = op->get_spatial_bins_y();
+    bool position_sensitive = true;
+
+    auto psROIPoolingPrim = cldnn::roi_pooling(layerName,
+                                                inputPrimitives,
+                                                mode,
+                                                position_sensitive,
+                                                pooled_width,
+                                                pooled_height,
+                                                spatial_scale,
+                                                trans_std,
+                                                no_trans,
+                                                part_size,
+                                                group_size,
+                                                output_dim,
+                                                spatial_bins_x,
+                                                spatial_bins_y);
+    p.AddPrimitive(psROIPoolingPrim);
+}
+
+void CreatePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::PSROIPooling>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    cldnn::pooling_mode mode = GetPoolingMode(op->get_mode());
+    int group_size = op->get_group_size();
+    int output_dim = op->get_output_dim();
+    float spatial_scale = op->get_spatial_scale();
+    int spatial_bins_x = op->get_spatial_bins_x();
+    int spatial_bins_y = op->get_spatial_bins_y();
+    bool position_sensitive = true;
+
+    auto psROIPoolingPrim = cldnn::roi_pooling(layerName,
+                                               inputPrimitives[0],  // input data
+                                               inputPrimitives[1],  // input rois
+                                               mode,
+                                               position_sensitive,
+                                               group_size,
+                                               group_size,
+                                               spatial_scale,
+                                               output_dim,
+                                               spatial_bins_x,
+                                               spatial_bins_y);
+    p.AddPrimitive(psROIPoolingPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::ROIPooling>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    // params
+    auto out_size = op->get_output_size();
+    int pooled_height = out_size[0];
+    int pooled_width = out_size[1];
+    float spatial_scale = op->get_spatial_scale();
+    bool position_sensitive = false;
+
+    cldnn::pooling_mode mode = GetPoolingMode(op->get_method());
+    auto roiPoolingPrim = cldnn::roi_pooling(layerName,
+                                             inputPrimitives[0],  // input data
+                                             inputPrimitives[1],  // input rois
+                                             mode,
+                                             position_sensitive,
+                                             pooled_width,
+                                             pooled_height,
+                                             spatial_scale);
+
+    p.AddPrimitive(roiPoolingPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, DeformablePSROIPooling);
+REGISTER_FACTORY_IMPL(v0, PSROIPooling);
+REGISTER_FACTORY_IMPL(v0, ROIPooling);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/scatter_update.cpp
+++ b/inference-engine/src/cldnn_engine/ops/scatter_update.cpp
@ -0,0 +1,68 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/scatter_update.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/scatter_update.hpp"
+
+namespace CLDNNPlugin {
+
+static inline cldnn::scatter_update::scatter_update_axis GetScatterUpdateAxis(int axis, unsigned rank) {
+    if (axis < 0)
+        axis += rank;
+    if (axis < 0 || axis >= rank)
+        THROW_IE_EXCEPTION << "ScatterUpdate axis is not correspond to number of dimensions";
+
+    // Difference in dimension ordering between IE and clDNN,
+    // reverse spatial dimensions after batch and feature.
+    unsigned cldnn_axis = axis;
+    if (axis >= 2) {
+        auto spatial_axis = axis - 2;
+        // Default and minimum number of dimensions is 4
+        auto spatial_size = std::max(rank, 4u) - 2;
+        cldnn_axis = spatial_size - spatial_axis - 1 + 2;
+    }
+
+    switch (cldnn_axis) {
+        case 0: return cldnn::scatter_update::scatter_update_axis::along_b;
+        case 1: return cldnn::scatter_update::scatter_update_axis::along_f;
+        case 2: return cldnn::scatter_update::scatter_update_axis::along_x;
+        case 3: return cldnn::scatter_update::scatter_update_axis::along_y;
+        case 4: return cldnn::scatter_update::scatter_update_axis::along_z;
+        case 5: return cldnn::scatter_update::scatter_update_axis::along_w;
+        default: THROW_IE_EXCEPTION << "Unsupported ScatterUpdate axis: " << axis;
+    }
+
+    return cldnn::scatter_update::scatter_update_axis::along_f;  // shouldn't get here
+}
+
+void CreateScatterUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::ScatterUpdate>& op) {
+    p.ValidateInputs(op, {4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    size_t rank = op->get_input_shape(0).size();
+    auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(3));
+    if (!axes_constant) {
+        THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+    int32_t axis = axes_constant->cast_vector<int32_t>()[0];
+
+    auto primitive = cldnn::scatter_update(layerName,
+                                           inputPrimitives[0],
+                                           inputPrimitives[1],
+                                           inputPrimitives[2],
+                                           GetScatterUpdateAxis(axis, rank));
+
+    p.AddPrimitive(primitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v3, ScatterUpdate);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/select.cpp
+++ b/inference-engine/src/cldnn_engine/ops/select.cpp
@ -0,0 +1,85 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/select.hpp"
+
+#include "api/select.hpp"
+#include "api/reorder.hpp"
+#include "api/reshape.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& op) {
+    p.ValidateInputs(op, {3});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto outDims = op->get_output_shape(0);
+    auto outDimsN = outDims.size();
+
+    auto broadcast_type = op->get_auto_broadcast();
+
+    if (broadcast_type.m_type != ngraph::op::AutoBroadcastType::NONE &&
+        broadcast_type.m_type != ngraph::op::AutoBroadcastType::NUMPY) {
+        THROW_IE_EXCEPTION << "Unsupported broadcast type (" << broadcast_type.m_type << ") in layer " + op->get_friendly_name();
+    }
+
+    if (broadcast_type.m_type == ngraph::op::AutoBroadcastType::NUMPY) {
+        // Preprocess inputs
+        for (size_t i = 0; i < inputPrimitives.size(); ++i) {
+            auto inputDims = op->get_input_shape(i);
+            auto inputDimsN = inputDims.size();
+
+            // Add reorder if changing number of dimensions requires changing format
+            auto targetFormat = DefaultFormatForDims(outDimsN);
+
+            if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
+                auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
+                auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
+                auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
+
+                p.AddPrimitive(reorderPrim);
+                p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
+
+                inputPrimitives[i] = reorderName;
+            }
+
+            // Reshape input if they differ or select specific shape matches default one
+            if (inputDimsN != outDimsN || inputDimsN < 4) {
+                auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape";
+
+                // Extend input dimensions to the same size as output dimensions by prepending ones
+                inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul);
+
+                auto targetShape = CldnnTensorFromIEDims(inputDims);
+
+                auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
+
+                p.AddPrimitive(reshapePrim);
+                p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
+
+                inputPrimitives[i] = reshapeName;
+            }
+        }
+    }
+
+    std::string bc_string = broadcast_type.m_type == ngraph::op::AutoBroadcastType::NUMPY ? "numpy" : "none";
+
+    auto selectPrim = cldnn::select(layerName,
+                                    inputPrimitives[0],
+                                    inputPrimitives[1],
+                                    inputPrimitives[2],
+                                    cldnn::padding(),
+                                    bc_string);
+
+    p.AddPrimitive(selectPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Select);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp
+++ b/inference-engine/src/cldnn_engine/ops/shuffle_channels.cpp
@ -0,0 +1,47 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/shuffle_channels.hpp"
+
+#include "api/shuffle_channels.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op::v0::ShuffleChannels>& op) {
+    p.ValidateInputs(op, {1, 2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto in_rank = op->get_input_shape(0).size();
+
+    int32_t group = op->get_group();
+    int32_t axis = op->get_axis();
+
+    if (axis < 0)
+        axis += in_rank;
+
+    if (axis < 0 || axis >= in_rank)
+        THROW_IE_EXCEPTION << "Incorrect axis value! Actual axis is" + std::to_string(group);
+
+    if (group < 1)
+        THROW_IE_EXCEPTION << "Invalid group size value (should equal at least one). Actual block size is" << std::to_string(group);
+
+    if (op->get_input_shape(0)[axis] % group != 0)
+        THROW_IE_EXCEPTION << "Group parameter must evenly divide the channel dimension. Actual group size is " << std::to_string(axis);
+
+    auto shuffleChannelsPrim = cldnn::shuffle_channels(layerName,
+                                                       inputPrimitives[0],
+                                                       group,
+                                                       axis);
+
+    p.AddPrimitive(shuffleChannelsPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, ShuffleChannels);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/softmax.cpp
+++ b/inference-engine/src/cldnn_engine/ops/softmax.cpp
@ -0,0 +1,74 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/softmax.hpp"
+#include "ngraph/op/log_softmax.hpp"
+
+#include "api/softmax.hpp"
+#include "api/activation.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::softmax::dimension_t GetSoftmaxAxis(int64_t axis, size_t rank) {
+    switch (axis) {
+    // FIXME: it seems that axis=0 should correspond to normalize_b;
+    case 0: return cldnn::softmax::normalize_all;
+    case 1: return cldnn::softmax::normalize_f;
+    case 2:
+        if (rank > 4)
+            return cldnn::softmax::normalize_z;
+        else
+            return cldnn::softmax::normalize_y;
+    case 3:
+        if (rank > 4)
+            return cldnn::softmax::normalize_y;
+        else
+            return cldnn::softmax::normalize_x;
+    case 4:
+        return cldnn::softmax::normalize_x;
+    default: THROW_IE_EXCEPTION << "Invalid softmax axis " << axis;
+    }
+    return cldnn::softmax::normalize_fyx;
+}
+
+void CreateSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v1::Softmax>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+    auto softmaxPrim = cldnn::softmax(layerName,
+                                      inputPrimitives[0],
+                                      GetSoftmaxAxis(op->get_axis(), op->get_input_shape(0).size()));
+    p.AddPrimitive(softmaxPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5::LogSoftmax>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+    std::string layerNameSoftmax = layer_type_name_ID(op) + "_softmax";
+
+    auto axis = op->get_axis();
+    if (axis < 0)
+        axis += op->get_input_shape(0).size();
+
+    auto softmaxPrim = cldnn::softmax(layerNameSoftmax,
+                                      inputPrimitives[0],
+                                      GetSoftmaxAxis(static_cast<size_t>(axis), op->get_input_shape(0).size()));
+
+    auto logPrim = cldnn::activation(layerName, layerNameSoftmax, cldnn::activation_func::log);
+
+    p.AddPrimitive(softmaxPrim);
+    p.AddPrimitive(logPrim);
+    p.AddPrimitiveToProfiler(layerNameSoftmax, op);
+    p.AddPrimitiveToProfiler(layerName, op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Softmax);
+REGISTER_FACTORY_IMPL(v5, LogSoftmax);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp
+++ b/inference-engine/src/cldnn_engine/ops/space_to_batch.cpp
@ -0,0 +1,53 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/space_to_batch.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/space_to_batch.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v1::SpaceToBatch>& op) {
+    p.ValidateInputs(op, {4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto rank = op->get_input_shape(0).size();
+    auto format = DefaultFormatForDims(rank);
+
+    std::vector<cldnn::tensor> inputs;
+    inputs.reserve(3);
+
+    for (size_t i = 1; i < 4; ++i) {
+        auto inConst = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(i));
+        if (!inConst)
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+
+        std::vector<int32_t> sizes = inConst->cast_vector<int32_t>();
+        int32_t default_size = i == 1 ? 1 : 0;
+        for (size_t s = sizes.size(); s < rank; s++) {
+            sizes.push_back(default_size);
+        }
+        inputs.emplace_back(format, sizes, default_size);
+    }
+    auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
+
+    auto batchToSpacePrim = cldnn::space_to_batch(layerName,
+                                                  inputPrimitives[0], // input
+                                                  inputs[0],          // block_shape
+                                                  inputs[1],          // crops_begin
+                                                  inputs[2],          // crops_end
+                                                  out_size);
+
+    p.AddPrimitive(batchToSpacePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, SpaceToBatch);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp
+++ b/inference-engine/src/cldnn_engine/ops/space_to_depth.cpp
@ -0,0 +1,38 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/space_to_depth.hpp"
+
+#include "api/space_to_depth.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::space_to_depth::depth_mode GetDepthMode(ngraph::op::v0::SpaceToDepth::SpaceToDepthMode mode) {
+    switch (mode) {
+        case ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST: return cldnn::space_to_depth::blocks_first;
+        case ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST: return cldnn::space_to_depth::depth_first;
+        default: THROW_IE_EXCEPTION << "Unsupported SpaceToDepthMode value: " << static_cast<int>(mode);
+    }
+    return cldnn::space_to_depth::blocks_first;
+}
+
+void CreateSpaceToDepthOp(Program& p, const std::shared_ptr<ngraph::op::v0::SpaceToDepth>& op) {
+    p.ValidateInputs(op, {1});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+    auto spaceToDepthPrim = cldnn::space_to_depth(layerName,
+                                                  inputPrimitives[0],
+                                                  GetDepthMode(op->get_mode()),
+                                                  op->get_block_size());
+
+    p.AddPrimitive(spaceToDepthPrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, SpaceToDepth);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/split.cpp
+++ b/inference-engine/src/cldnn_engine/ops/split.cpp
@ -0,0 +1,73 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/split.hpp"
+#include "ngraph/op/variadic_split.hpp"
+
+#include "api/crop.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto inputDims = op->get_input_shape(0);
+    InferenceEngine::SizeVector startOffset(inputDims.size());
+
+    bool is_single_out_split = op->get_output_size() == 1;
+
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        std::string outLayerName = layerName + (is_single_out_split ? "" : "." + std::to_string(i));
+        const auto outLayerDims = op->get_output_shape(i);
+        if (outLayerDims.size() != startOffset.size()) {
+            THROW_IE_EXCEPTION << "Invalid dimesions in split layer: " << op->get_friendly_name()
+                               << " output: " <<  op->get_output_tensor_name(i);
+        }
+        for (size_t i = 0; i < inputDims.size(); i++) {
+            if ((outLayerDims[i] + startOffset[i]) > inputDims[i]) {
+                THROW_IE_EXCEPTION << "Invalid dimesions in split layer: " << op->get_friendly_name()
+                                   << " output: " <<  op->get_output_tensor_name(i);
+            }
+        }
+
+        auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1);
+        auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0);
+
+        auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
+        p.primitivesToIRLayersMap[outLayerName] = { op->get_friendly_name() };
+        p.primitiveIDs[outLayerName] = outLayerName;
+
+        p.AddPrimitive(cropPrim);
+        p.profilingIDs.push_back(outLayerName);
+        p.InitProfileInfo(outLayerName, "Crop");
+
+        for (size_t i = 0; i < inputDims.size(); i++) {
+            if (outLayerDims[i] != inputDims[i]) {
+                startOffset[i] += outLayerDims[i];
+            }
+        }
+    }
+
+    // set split as not_run
+    p.InitProfileInfo(op->get_friendly_name(), op->get_type_name(), false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
+}
+
+void CreateSplitOp(Program& p, const std::shared_ptr<ngraph::op::v1::Split>& op) {
+    p.ValidateInputs(op, {2});
+    CreateCommonSplitOp(p, op);
+}
+
+void CreateVariadicSplitOp(Program& p, const std::shared_ptr<ngraph::op::v1::VariadicSplit>& op) {
+    p.ValidateInputs(op, {3});
+    CreateCommonSplitOp(p, op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Split);
+REGISTER_FACTORY_IMPL(v1, VariadicSplit);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/strided_slice.cpp
+++ b/inference-engine/src/cldnn_engine/ops/strided_slice.cpp
@ -0,0 +1,276 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/strided_slice.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/strided_slice.hpp"
+#include "api/reshape.hpp"
+#include "api/crop.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::StridedSlice>& op) {
+    p.ValidateInputs(op, {4});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    do {
+        auto data_output = op->input_value(0);
+        auto begin_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(1).get_node_shared_ptr());
+        auto end_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(2).get_node_shared_ptr());
+        auto stride_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(3).get_node_shared_ptr());
+
+        auto partial_input_shape = op->get_input_partial_shape(0);
+
+        if (!begin_node || !end_node || !stride_node || partial_input_shape.is_dynamic()) {
+            break;
+        }
+
+        for (auto& m : op->get_begin_mask()) {
+            if (m != 0)
+                break;
+        }
+
+        for (auto& m : op->get_end_mask()) {
+            if (m != 0)
+                break;
+        }
+
+        auto input_shape = op->get_input_shape(0);
+        auto output_shape = op->get_output_shape(0);
+
+        auto begin = begin_node->cast_vector<int64_t>();
+        auto end = end_node->cast_vector<int64_t>();
+        auto strides = stride_node->cast_vector<int64_t>();
+
+        bool ones_stride = true;
+        for (auto & s : strides) {
+            if (s != 1)
+                ones_stride = false;
+        }
+
+        if (!ones_stride)
+            break;
+
+        auto convert_to_set = [](const std::vector<int64_t> mask) {
+            ngraph::AxisSet axis_set{};
+            for (size_t i = 0; i < static_cast<size_t>(mask.size()); ++i) {
+                if (mask[i] == 1) {
+                    axis_set.emplace(i);
+                }
+            }
+            return axis_set;
+        };
+
+        auto shrink_axis_mask = convert_to_set(op->get_shrink_axis_mask());
+        auto new_axis_mask = convert_to_set(op->get_new_axis_mask());
+        auto ellipsis_mask = convert_to_set(op->get_ellipsis_mask());
+        auto begin_mask = convert_to_set(op->get_begin_mask());
+        auto end_mask = convert_to_set(op->get_end_mask());
+
+        std::vector<size_t> reshape_pattern,
+                            axes,
+                            offset,
+                            dim;
+
+        size_t input_shape_idx = 0;
+        uint64_t uniq_id = 0;
+        for (size_t axis = 0; axis < begin.size(); ++axis) {
+            // add dimensions hidden under the ellipsis mask if ellipsis mask is set
+            if (ellipsis_mask.count(axis)) {
+                // only one bit in ellipsis mask is allowed
+                int num_new_axis_after_ellipses = 0;
+                int num_input_axis_before_ellipses = 0;
+                for (size_t i = 0; i < axis; ++i) {
+                    if (!new_axis_mask.count(i))
+                        num_input_axis_before_ellipses++;
+                }
+                for (size_t i = axis + 1; i < begin.size(); ++i) {
+                    if (new_axis_mask.count(i))
+                        num_new_axis_after_ellipses++;
+                }
+
+                // -1 because it's a position of ellipses
+                unsigned long num_input_axis_after_ellipses = (begin.size() - axis - num_new_axis_after_ellipses - 1);
+                unsigned long num_of_hidden_dims = input_shape.size() - num_input_axis_after_ellipses
+                                                    - num_input_axis_before_ellipses;
+                for (size_t i = 0; i < num_of_hidden_dims; ++i) {
+                    axes.emplace_back(uniq_id);
+                    uniq_id++;
+                    reshape_pattern.emplace_back(input_shape[input_shape_idx]);
+                    offset.emplace_back(0);
+
+                    dim.emplace_back(input_shape[input_shape_idx]);
+                    input_shape_idx++;
+                }
+            } else {
+                // add new single dimension if new_axis_mask is set
+                if (new_axis_mask.count(axis)) {
+                    reshape_pattern.emplace_back(1);
+                    dim.emplace_back(1);
+                    offset.emplace_back(0);
+                } else if (shrink_axis_mask.count(axis)) {
+                    // skip this dimension if shrink_axis_mask is set (input_shape_idx++)
+                    dim.emplace_back(1);
+                    offset.emplace_back(begin_mask.count(axis) ? 0 : begin[axis]);
+                    reshape_pattern.emplace_back(1);
+                    input_shape_idx++;
+                } else {
+                    // calculate dimension using begin, end, begin_mask, end_mask, stride
+                    reshape_pattern.emplace_back(input_shape[input_shape_idx]);
+
+                    int64_t lb = begin[axis];
+                    int64_t ub = end[axis];
+
+                    // convert negative indexes to positive
+                    if (lb < 0)
+                        lb = std::max(static_cast<int64_t>(input_shape[input_shape_idx]) + lb,
+                                        static_cast<int64_t>(0));
+                    if (ub < 0)
+                        ub = std::max(static_cast<int64_t>(input_shape[input_shape_idx]) + ub,
+                                        static_cast<int64_t>(0));
+
+                    // apply restrictions when begin or end values more/less than max/min possible values.
+                    lb = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), lb);
+                    ub = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), ub);
+
+                    offset.emplace_back(lb);
+
+                    // set default value for stride or use given value
+                    int64_t stride = 1;
+                    if (strides.size() > axis)
+                        stride = strides[axis];
+
+                    int64_t dimension = 0;
+                    if (stride < 0) {
+                        // apply masks
+                        if (begin_mask.count(axis))
+                            lb = static_cast<int64_t>(input_shape[input_shape_idx]) - 1;
+                        if (end_mask.count(axis))
+                            ub = -1;
+
+                        lb = std::min(lb, static_cast<int64_t>(input_shape[input_shape_idx]) - 1);
+                        lb -= 1;  // we always get 1st element, so we need decrease range
+                        if (ub <= lb)
+                            dimension = (ub - lb) / stride + 1;
+                    } else {
+                        // apply masks
+                        if (begin_mask.count(axis))
+                            lb = 0;
+                        if (end_mask.count(axis))
+                            ub = static_cast<int64_t>(input_shape[input_shape_idx]);
+
+                        lb += 1;  // we always get 1st element, so we need decrease range
+                        if (ub >= lb)
+                            dimension = (ub - lb) / stride + 1;
+                    }
+
+                    dim.emplace_back(dimension);
+                    input_shape_idx++;
+                }
+                axes.emplace_back(uniq_id);
+                uniq_id++;
+            }
+        }
+
+        for (; input_shape_idx < input_shape.size(); ++input_shape_idx) {
+            reshape_pattern.emplace_back(input_shape[input_shape_idx]);
+            offset.emplace_back(0);
+            dim.emplace_back(input_shape[input_shape_idx]);
+            axes.emplace_back(uniq_id);
+            uniq_id++;
+        }
+
+        if (axes.size() != 4) {
+            break;
+        }
+
+        auto inPrimitive = inputPrimitives[0];
+        // Reshape in case of new axis
+        if (!new_axis_mask.empty()) {
+            auto targetShape = CldnnTensorFromIEDims(reshape_pattern);
+            auto reshapeInName = op->get_friendly_name() + "/Reshape_before";
+            auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape);
+            p.AddPrimitive(reshapePrim);
+            p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
+            inPrimitive = reshapeInName;
+        }
+
+        auto data_node_shape = data_output.get_shape();
+
+        std::vector<cldnn::tensor::value_type> offset_tensor{ 0, 0, 0, 0 };
+        for (size_t i = 0; i < axes.size(); i++) {
+            if (axes[i] < 0 || axes[i] > 3) {
+                THROW_IE_EXCEPTION << "Invalid crop axis: " << std::to_string(axes[i]) << " in op " + op->get_friendly_name();
+            }
+            offset_tensor[axes[i]] = offset[i];
+        }
+
+        ngraph::Shape crop_shape(reshape_pattern);
+        for (int i = 0; i < axes.size(); ++i) {
+            crop_shape[axes[i]] = dim[i];
+        }
+
+
+        const size_t ods = crop_shape.size();
+        cldnn::tensor refSize = CldnnTensorFromIEDims(crop_shape);
+        cldnn::tensor offSize = CldnnTensorFromIEDims(offset, 0);
+
+
+        auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize);
+        p.AddPrimitive(cropPrim);
+        p.AddPrimitiveToProfiler(layerName, op);
+
+        // Reshape in case of deleting of axis
+        if (!shrink_axis_mask.empty()) {
+            auto targetShape = CldnnTensorFromIEDims(output_shape);
+            auto reshapeOutName = op->get_friendly_name() + "/Crop";
+            auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape);
+            p.AddPrimitive(reshapePrim);
+            p.AddInnerPrimitiveToProfiler(reshapeOutName, layerName, op);
+        }
+        return;
+    } while (false);
+
+    auto end_mask_ = op->get_end_mask();
+    auto begin_mask_ = op->get_begin_mask();
+    auto new_axis_mask_ = op->get_new_axis_mask();
+    auto shrink_axis_mask_ = op->get_shrink_axis_mask();
+    std::vector<uint8_t> begin_mask(begin_mask_.begin(), begin_mask_.end());
+    std::vector<uint8_t> end_mask(end_mask_.begin(), end_mask_.end());
+    std::vector<uint8_t> new_axis_mask(new_axis_mask_.begin(), new_axis_mask_.end());
+    std::vector<uint8_t> shrink_axis_mask(shrink_axis_mask_.begin(), shrink_axis_mask_.end());
+
+    // Plugin requires inverted mask values. Consider changing primitive impl to be aligned with the spec.
+    for (auto& b : begin_mask) {
+        b = 1 - b;
+    }
+    for (auto& e : end_mask) {
+        e = 1 - e;
+    }
+
+    auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
+
+    auto stridedSlicePrim = cldnn::strided_slice(layerName,
+                                                 inputPrimitives[0],
+                                                 inputPrimitives[1],
+                                                 inputPrimitives[2],
+                                                 inputPrimitives[3],
+                                                 begin_mask,
+                                                 end_mask,
+                                                 new_axis_mask,
+                                                 shrink_axis_mask,
+                                                 out_size);
+
+    p.AddPrimitive(stridedSlicePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, StridedSlice);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/tile.cpp
+++ b/inference-engine/src/cldnn_engine/ops/tile.cpp
@ -0,0 +1,29 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/tile.hpp"
+
+#include "api/tile.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    auto tilePrim = cldnn::tile(layerName,
+                                inputPrimitives[0],
+                                CldnnTensorFromIEDims(op->get_output_shape(0)));
+
+    p.AddPrimitive(tilePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v0, Tile);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/topk.cpp
+++ b/inference-engine/src/cldnn_engine/ops/topk.cpp
@ -0,0 +1,123 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "cldnn_common_utils.h"
+
+#include "ngraph/op/topk.hpp"
+
+#include "api/arg_max_min.hpp"
+#include "api/mutable_data.hpp"
+
+namespace CLDNNPlugin {
+
+static cldnn::arg_max_min::axis_name GetAxis(int32_t axis, size_t in_rank) {
+    if (in_rank == 5) {
+        if (-5 <= axis && axis <= -1)
+            axis += 5;
+
+        switch (axis) {
+            case 0: return cldnn::arg_max_min::axis_name::batch;
+            case 1: return cldnn::arg_max_min::axis_name::feature;
+            case 2: return cldnn::arg_max_min::axis_name::z;
+            case 3: return cldnn::arg_max_min::axis_name::y;
+            case 4: return cldnn::arg_max_min::axis_name::x;
+        }
+    } else {
+        if (-static_cast<int32_t>(in_rank) <= axis && axis <= -1)
+            axis += in_rank;
+
+        switch (axis) {
+            case 0: return cldnn::arg_max_min::axis_name::batch;
+            case 1: return cldnn::arg_max_min::axis_name::feature;
+            case 2: return cldnn::arg_max_min::axis_name::y;
+            case 3: return cldnn::arg_max_min::axis_name::x;
+        }
+    }
+
+    return cldnn::arg_max_min::axis_name::batch;
+}
+
+void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
+    p.ValidateInputs(op, {2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    cldnn::arg_max_min::out_type otype;
+    cldnn::arg_max_min::sort_type stype;
+
+    if (op->get_mode() == ngraph::op::v1::TopK::Mode::MAX)
+        otype = cldnn::arg_max_min::out_type::max;
+    else
+        otype = cldnn::arg_max_min::out_type::min;
+
+    if (op->get_sort_type() == ngraph::op::v1::TopK::SortType::SORT_VALUES)
+        stype = cldnn::arg_max_min::sort_type::sort_by_values;
+    else
+        stype = cldnn::arg_max_min::sort_type::sort_by_indices;
+
+    uint32_t top_k = op->get_k();
+    cldnn::arg_max_min::axis_name chosen_axis = GetAxis(static_cast<int32_t>(op->get_axis()),
+                                                        op->get_input_shape(0).size());
+
+    if (op->get_output_size() == 2) {
+        auto mutable_precision = op->get_output_element_type(1);
+        if (mutable_precision == ngraph::element::i64) {
+            mutable_precision = ngraph::element::i32;
+        }
+
+        cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
+                                                    DefaultFormatForDims(op->get_output_shape(1).size()),
+                                                    CldnnTensorFromIEDims(op->get_output_shape(1)));
+
+        auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
+
+        cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
+        auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory);
+        p.primitivesToIRLayersMap[argmax_mutable_id_w] = {op->get_friendly_name()};
+        p.primitiveIDs[argmax_mutable_id_w] = argmax_mutable_id_w;
+        p.AddPrimitive(argmax_mutable_prim);
+        inputPrimitives.push_back(argmax_mutable_id_w);
+
+        std::string ArgMaxLayerName = layerName + ".0";
+        auto argmaxPrim = cldnn::arg_max_min(ArgMaxLayerName,
+                                             inputPrimitives,
+                                             otype,
+                                             top_k,
+                                             chosen_axis,
+                                             stype,
+                                             true,
+                                             cldnn::padding({0, 0, 0, 0}, 0),
+                                             DataTypeFromPrecision(op->get_output_element_type(0)));
+
+        p.AddPrimitive(argmaxPrim);
+
+        cldnn::primitive_id argmax_mutable_id_r = layerName + ".1";
+        auto argmax_mutable_prim_r = cldnn::mutable_data(argmax_mutable_id_r, {ArgMaxLayerName}, shared_memory);
+        p.primitivesToIRLayersMap[argmax_mutable_id_r] = {op->get_friendly_name()};
+        p.primitiveIDs[argmax_mutable_id_r] = argmax_mutable_id_r;
+        p.AddPrimitive(argmax_mutable_prim_r);
+        p.InitProfileInfo(ArgMaxLayerName, layer_type_lower(op));
+        p.AddPrimitiveToProfiler(ArgMaxLayerName, op);
+    } else if (op->get_output_size() == 1) {
+        auto argmaxPrim = cldnn::arg_max_min(layerName,
+                                             inputPrimitives,
+                                             otype,
+                                             top_k,
+                                             chosen_axis,
+                                             stype,
+                                             true,
+                                             cldnn::padding({0, 0, 0, 0}, 0),
+                                             DataTypeFromPrecision(op->get_output_element_type(0)));
+
+        p.AddPrimitive(argmaxPrim);
+        p.AddPrimitiveToProfiler(op);
+    } else {
+        THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect TopK outputs number";
+    }
+}
+
+REGISTER_FACTORY_IMPL(v1, TopK);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/transpose.cpp
+++ b/inference-engine/src/cldnn_engine/ops/transpose.cpp
@ -0,0 +1,80 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+
+#include "ngraph/op/transpose.hpp"
+#include "ngraph/op/constant.hpp"
+
+#include "api/permute.hpp"
+
+namespace CLDNNPlugin {
+
+template<class Type>
+std::vector<Type> GetPermuteOrder(const std::vector<Type>& ie_order, Type value_to_align = 0) {
+    static_assert(std::is_integral<Type>::value, "Integeral required.");
+    std::vector<Type> cldnn_order = ie_order;
+
+    // 1. Align to min. 4 sizes
+    if (cldnn_order.size() < 4)
+        cldnn_order.push_back(value_to_align);
+
+    // 2. Swap spatial positions
+    for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
+        std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
+    }
+
+    return cldnn_order;
+}
+
+void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpose>& op) {
+    p.ValidateInputs(op, {1, 2});
+    auto inputPrimitives = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+
+    std::vector<uint16_t> ie_order;
+    if (op->get_input_size() == 2) {
+        auto order_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
+        if (!order_constant) {
+            THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+        ie_order = order_constant->cast_vector<uint16_t>();
+    }
+
+    int rank = std::max(4, static_cast<int>(op->get_input_shape(0).size()));
+    if (ie_order.empty()) {
+        // if order size is less than 4 - fill the rest with just copy
+        for (int o = rank - 1; o >= 0; o--)
+            ie_order.push_back((uint16_t)o);
+    }
+
+    // if order size is less than 4 - fill the rest with just copy
+    for (auto o = ie_order.size(); o < rank; o++)
+        ie_order.push_back((uint16_t)o);
+
+    /*
+        Because of the cldnn ordering: bfxy, and IE ordering: bfyx
+        we need to adjust the permute order.
+    */
+    std::vector<uint16_t> cldnn_permute_order;
+    // 1. Switch permute order values for spatial dims
+    for (auto const& o : ie_order) {
+        if (o >= 2)
+            cldnn_permute_order.push_back(1 + ie_order.size() - o);
+        else
+            cldnn_permute_order.push_back(o);
+    }
+    cldnn_permute_order = GetPermuteOrder(cldnn_permute_order);
+
+    auto permutePrim = cldnn::permute(layerName,
+                                      inputPrimitives[0],
+                                      cldnn_permute_order);
+
+    p.AddPrimitive(permutePrim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+REGISTER_FACTORY_IMPL(v1, Transpose);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/cldnn_engine/ops/unary.cpp
+++ b/inference-engine/src/cldnn_engine/ops/unary.cpp
@ -0,0 +1,312 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "cldnn_program.h"
+#include "transformations/utils/utils.hpp"
+
+#include "ngraph/op/tanh.hpp"
+#include "ngraph/op/elu.hpp"
+#include "ngraph/op/sigmoid.hpp"
+#include "ngraph/op/relu.hpp"
+#include "ngraph/op/prelu.hpp"
+#include "ngraph/op/clamp.hpp"
+#include "ngraph/op/exp.hpp"
+#include "ngraph/op/not.hpp"
+#include "ngraph/op/asin.hpp"
+#include "ngraph/op/asinh.hpp"
+#include "ngraph/op/acos.hpp"
+#include "ngraph/op/acosh.hpp"
+#include "ngraph/op/atan.hpp"
+#include "ngraph/op/atanh.hpp"
+#include "ngraph/op/abs.hpp"
+#include "ngraph/op/floor.hpp"
+#include "ngraph/op/ceiling.hpp"
+#include "ngraph/op/erf.hpp"
+#include "ngraph/op/hard_sigmoid.hpp"
+#include "ngraph/op/log.hpp"
+#include "ngraph/op/negative.hpp"
+#include "ngraph/op/selu.hpp"
+#include "ngraph/op/softplus.hpp"
+#include "ngraph/op/tan.hpp"
+#include "ngraph/op/sin.hpp"
+#include "ngraph/op/sinh.hpp"
+#include "ngraph/op/cos.hpp"
+#include "ngraph/op/cosh.hpp"
+#include "ngraph/op/swish.hpp"
+#include "ngraph/op/hswish.hpp"
+#include "ngraph/op/mish.hpp"
+#include "ngraph/op/gelu.hpp"
+#include "ngraph/op/sign.hpp"
+#include "ngraph/op/hsigmoid.hpp"
+#include "ngraph/op/round.hpp"
+
+#include "api/activation.hpp"
+
+namespace CLDNNPlugin {
+
+void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
+                          cldnn::activation_func func, cldnn::activation_additional_params params) {
+    auto inputs = p.GetInputPrimitiveIDs(op);
+    std::string layerName = layer_type_name_ID(op);
+    auto activationPrimitive = cldnn::activation(layerName, inputs[0], func, params);
+    p.AddPrimitive(activationPrimitive);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateTanhOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tanh>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hyperbolic_tan, {});
+}
+
+void CreateEluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Elu>& op) {
+    auto alpha = static_cast<float>(op->get_alpha());
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::elu, {alpha});
+}
+
+void CreateSigmoidOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sigmoid>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::logistic, {});
+}
+
+void CreateReluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Relu>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::relu, {});
+}
+
+void CreatePReluOp(Program& p, const std::shared_ptr<ngraph::op::v0::PRelu>& op) {
+    p.ValidateInputs(op, {2});
+
+    auto slope_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    auto slope_shape = op->get_input_shape(1);
+    auto out_shape = op->get_output_shape(0);
+
+    if (slope_node && ngraph::shape_size(slope_shape) == 1) {
+        float slope;
+        if (!ngraph::op::util::get_single_value(slope_node, slope))
+            THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        CreateUnaryEltwiseOp(p, op, cldnn::activation_func::relu_negative_slope, {slope});
+    } else if (out_shape.size() >= 2 && ngraph::shape_size(slope_shape) == out_shape[1]) {
+        auto inputs = p.GetInputPrimitiveIDs(op);
+        std::string layerName = layer_type_name_ID(op);
+        auto activationPrimitive = cldnn::activation(layerName, inputs[0], inputs[1], cldnn::activation_func::relu_negative_slope);
+        p.AddPrimitive(activationPrimitive);
+        p.AddPrimitiveToProfiler(op);
+    }
+}
+
+void CreateClampOp(Program& p, const std::shared_ptr<ngraph::op::v0::Clamp>& op) {
+    float min = static_cast<float>(op->get_min());
+    float max = static_cast<float>(op->get_max());
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::clamp, {min, max});
+}
+
+void CreateExpOp(Program& p, const std::shared_ptr<ngraph::op::v0::Exp>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::exp, {});
+}
+
+void CreateLogicalNotOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalNot>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::negation, {});
+}
+
+void CreateAsinOp(Program& p, const std::shared_ptr<ngraph::op::v0::Asin>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::asin, {});
+}
+
+void CreateAsinhOp(Program& p, const std::shared_ptr<ngraph::op::v3::Asinh>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::asinh, {});
+}
+
+void CreateAcosOp(Program& p, const std::shared_ptr<ngraph::op::v0::Acos>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::acos, {});
+}
+
+void CreateAcoshOp(Program& p, const std::shared_ptr<ngraph::op::v3::Acosh>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::acosh, {});
+}
+
+void CreateAtanOp(Program& p, const std::shared_ptr<ngraph::op::v0::Atan>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::atan, {});
+}
+
+void CreateAtanhOp(Program& p, const std::shared_ptr<ngraph::op::v3::Atanh>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::atanh, {});
+}
+
+void CreateAbsOp(Program& p, const std::shared_ptr<ngraph::op::v0::Abs>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::abs, {});
+}
+
+void CreateFloorOp(Program& p, const std::shared_ptr<ngraph::op::v0::Floor>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::floor, {});
+}
+
+void CreateCeilingOp(Program& p, const std::shared_ptr<ngraph::op::v0::Ceiling>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::ceil, {});
+}
+
+void CreateSqrtOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sqrt>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sqrt, {});
+}
+
+void CreateErfOp(Program& p, const std::shared_ptr<ngraph::op::v0::Erf>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::erf, {});
+}
+
+void CreateHardSigmoidOp(Program& p, const std::shared_ptr<ngraph::op::v0::HardSigmoid>& op) {
+    p.ValidateInputs(op, {3});
+    auto alpha_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    auto beta_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+    if (!alpha_node || !beta_node) {
+        THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+
+    if (ngraph::shape_size(alpha_node->get_output_shape(0)) == 1 &&
+        ngraph::shape_size(beta_node->get_output_shape(0)) == 1)  {
+        float alpha, beta;
+        if (!ngraph::op::util::get_single_value(alpha_node, alpha) || !ngraph::op::util::get_single_value(beta_node, beta)) {
+            THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+        CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hard_sigmoid, {alpha, beta});
+    }
+}
+
+void CreateLogOp(Program& p, const std::shared_ptr<ngraph::op::v0::Log>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::log, {});
+}
+
+void CreateNegativeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Negative>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::negative, {});
+}
+
+void CreateSeluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Selu>& op) {
+    p.ValidateInputs(op, {3});
+    auto alpha_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+    auto lambda_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
+    if (!alpha_node || !lambda_node) {
+        THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+
+    if (ngraph::shape_size(alpha_node->get_output_shape(0)) == 1 &&
+        ngraph::shape_size(lambda_node->get_output_shape(0)) == 1)  {
+        float alpha, lambda;
+        if (!ngraph::op::util::get_single_value(alpha_node, alpha) || !ngraph::op::util::get_single_value(lambda_node, lambda)) {
+            THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+        CreateUnaryEltwiseOp(p, op, cldnn::activation_func::selu, {alpha, lambda});
+    } else {
+        THROW_IE_EXCEPTION << "Unsupported shapes of parameter nodes in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+    }
+}
+
+void CreateSoftPlusOp(Program& p, const std::shared_ptr<ngraph::op::v4::SoftPlus>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::softplus, {});
+}
+
+void CreateTanOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tan>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::tan, {});
+}
+
+void CreateSinOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sin>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sin, {});
+}
+
+void CreateSinhOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sinh>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sinh, {});
+}
+
+void CreateCosOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cos>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::cos, {});
+}
+
+void CreateCoshOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cosh>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::cosh, {});
+}
+
+void CreateSwishOp(Program& p, const std::shared_ptr<ngraph::op::v4::Swish>& op) {
+    p.ValidateInputs(op, {1, 2});
+    if (op->get_input_size() == 2) {
+        auto beta_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
+        if (beta_node) {
+            if (ngraph::shape_size(beta_node->get_output_shape(0)) == 1) {
+                float beta;
+                if (!ngraph::op::util::get_single_value(beta_node, beta)) {
+                    THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+                }
+                CreateUnaryEltwiseOp(p, op, cldnn::activation_func::swish, {beta});
+            } else {
+                THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+            }
+        } else {
+            THROW_IE_EXCEPTION << "Unsupported parameter type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
+        }
+    } else {
+        CreateUnaryEltwiseOp(p, op, cldnn::activation_func::swish, {1.0f});
+    }
+}
+
+void CreateHSwishOp(Program& p, const std::shared_ptr<ngraph::op::v4::HSwish>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hswish, {});
+}
+
+void CreateMishOp(Program& p, const std::shared_ptr<ngraph::op::v4::Mish>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::mish, {});
+}
+
+void CreateGeluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Gelu>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::gelu, {});
+}
+
+void CreateSignOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sign>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sign, {});
+}
+
+void CreateHSigmoidOp(Program& p, const std::shared_ptr<ngraph::op::v5::HSigmoid>& op) {
+    CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hsigmoid, {});
+}
+
+void CreateRoundOp(Program& p, const std::shared_ptr<ngraph::op::v5::Round>& op) {
+    auto func = cldnn::activation_func::none;
+    switch (op->get_mode()) {
+        case ngraph::op::v5::Round::RoundMode::HALF_TO_EVEN : func = cldnn::activation_func::round_half_to_even; break;
+        case ngraph::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO : func = cldnn::activation_func::round_half_away_from_zero; break;
+        default: THROW_IE_EXCEPTION << "Unsupported round mode in " << op->get_friendly_name() << ": " << static_cast<int>(op->get_mode());
+    }
+    CreateUnaryEltwiseOp(p, op, func, {});
+}
+
+REGISTER_FACTORY_IMPL(v0, Tanh);
+REGISTER_FACTORY_IMPL(v0, Elu);
+REGISTER_FACTORY_IMPL(v0, Sigmoid);
+REGISTER_FACTORY_IMPL(v0, Relu);
+REGISTER_FACTORY_IMPL(v0, PRelu);
+REGISTER_FACTORY_IMPL(v0, Clamp);
+REGISTER_FACTORY_IMPL(v0, Exp);
+REGISTER_FACTORY_IMPL(v1, LogicalNot);
+REGISTER_FACTORY_IMPL(v0, Asin);
+REGISTER_FACTORY_IMPL(v3, Asinh);
+REGISTER_FACTORY_IMPL(v0, Acos);
+REGISTER_FACTORY_IMPL(v3, Acosh);
+REGISTER_FACTORY_IMPL(v0, Atan);
+REGISTER_FACTORY_IMPL(v3, Atanh);
+REGISTER_FACTORY_IMPL(v0, Abs);
+REGISTER_FACTORY_IMPL(v0, Floor);
+REGISTER_FACTORY_IMPL(v0, Ceiling);
+REGISTER_FACTORY_IMPL(v0, Sqrt);
+REGISTER_FACTORY_IMPL(v0, Erf);
+REGISTER_FACTORY_IMPL(v0, HardSigmoid);
+REGISTER_FACTORY_IMPL(v0, Log);
+REGISTER_FACTORY_IMPL(v0, Negative);
+REGISTER_FACTORY_IMPL(v0, Selu);
+REGISTER_FACTORY_IMPL(v4, SoftPlus);
+REGISTER_FACTORY_IMPL(v0, Tan);
+REGISTER_FACTORY_IMPL(v0, Sin);
+REGISTER_FACTORY_IMPL(v0, Sinh);
+REGISTER_FACTORY_IMPL(v0, Cos);
+REGISTER_FACTORY_IMPL(v0, Cosh);
+REGISTER_FACTORY_IMPL(v4, Swish);
+REGISTER_FACTORY_IMPL(v4, HSwish);
+REGISTER_FACTORY_IMPL(v4, Mish);
+REGISTER_FACTORY_IMPL(v0, Gelu);
+REGISTER_FACTORY_IMPL(v0, Sign);
+REGISTER_FACTORY_IMPL(v5, HSigmoid);
+REGISTER_FACTORY_IMPL(v5, Round);
+
+}  // namespace CLDNNPlugin
--- a/inference-engine/src/transformations/include/ngraph_ops/nms_ie_internal.hpp
+++ b/inference-engine/src/transformations/include/ngraph_ops/nms_ie_internal.hpp
@ -0,0 +1,59 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <transformations_visibility.hpp>
+
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace op {
+namespace internal {
+
+class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op {
+public:
+    static constexpr NodeTypeInfo type_info{"NonMaxSuppressionIEInternal", 0};
+    const NodeTypeInfo& get_type_info() const override { return type_info; }
+
+    NonMaxSuppressionIEInternal(const Output<Node>& boxes,
+                                const Output<Node>& scores,
+                                const Output<Node>& max_output_boxes_per_class,
+                                const Output<Node>& iou_threshold,
+                                const Output<Node>& score_threshold,
+                                int center_point_box,
+                                bool sort_result_descending,
+                                const ngraph::element::Type& output_type = ngraph::element::i64);
+
+    NonMaxSuppressionIEInternal(const Output<Node>& boxes,
+                                const Output<Node>& scores,
+                                const Output<Node>& max_output_boxes_per_class,
+                                const Output<Node>& iou_threshold,
+                                const Output<Node>& score_threshold,
+                                const Output<Node>& soft_nms_sigma,
+                                int center_point_box,
+                                bool sort_result_descending,
+                                const ngraph::element::Type& output_type = ngraph::element::i64);
+
+    void validate_and_infer_types() override;
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector & new_args) const override;
+
+    int m_center_point_box;
+    bool m_sort_result_descending = true;
+    element::Type m_output_type;
+
+private:
+    int64_t max_boxes_output_from_input() const;
+};
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ngraph
--- a/inference-engine/src/transformations/include/transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp
@ -0,0 +1,26 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+#include <utility>
+#include <memory>
+
+#include <transformations_visibility.hpp>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertNMSToNMSIEInternal;
+
+}  // namespace pass
+}  // namespace ngraph
+
+class ngraph::pass::ConvertNMSToNMSIEInternal: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    ConvertNMSToNMSIEInternal();
+};
--- a/inference-engine/src/transformations/src/ngraph_ops/nms_ie_internal.cpp
+++ b/inference-engine/src/transformations/src/ngraph_ops/nms_ie_internal.cpp
@ -0,0 +1,106 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+
+#include <ngraph/opsets/opset5.hpp>
+#include "ngraph_ops/nms_ie_internal.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+constexpr NodeTypeInfo op::internal::NonMaxSuppressionIEInternal::type_info;
+
+op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Output<Node>& boxes,
+                                                                       const Output<Node>& scores,
+                                                                       const Output<Node>& max_output_boxes_per_class,
+                                                                       const Output<Node>& iou_threshold,
+                                                                       const Output<Node>& score_threshold,
+                                                                       int center_point_box,
+                                                                       bool sort_result_descending,
+                                                                       const ngraph::element::Type& output_type)
+        : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}),
+          m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Output<Node>& boxes,
+                                                                       const Output<Node>& scores,
+                                                                       const Output<Node>& max_output_boxes_per_class,
+                                                                       const Output<Node>& iou_threshold,
+                                                                       const Output<Node>& score_threshold,
+                                                                       const Output<Node>& soft_nms_sigma,
+                                                                       int center_point_box,
+                                                                       bool sort_result_descending,
+                                                                       const ngraph::element::Type& output_type)
+        : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}),
+          m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type) {
+    constructor_validate_and_infer_types();
+}
+
+std::shared_ptr<Node> op::internal::NonMaxSuppressionIEInternal::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
+    if (new_args.size() == 6) {
+        return make_shared<NonMaxSuppressionIEInternal>(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3),
+                                             new_args.at(4), new_args.at(5), m_center_point_box, m_sort_result_descending,
+                                             m_output_type);
+    } else if (new_args.size() == 5) {
+        return make_shared<NonMaxSuppressionIEInternal>(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3),
+                                             new_args.at(4), m_center_point_box, m_sort_result_descending,
+                                             m_output_type);
+    }
+    throw ngraph::ngraph_error("Unsupported number of inputs: " + std::to_string(new_args.size()));
+}
+
+bool op::internal::NonMaxSuppressionIEInternal::visit_attributes(AttributeVisitor& visitor) {
+    visitor.on_attribute("center_point_box", m_center_point_box);
+    visitor.on_attribute("sort_result_descending", m_sort_result_descending);
+    visitor.on_attribute("output_type", m_output_type);
+    return true;
+}
+
+static constexpr size_t boxes_port = 0;
+static constexpr size_t scores_port = 1;
+static constexpr size_t max_output_boxes_per_class_port = 2;
+
+int64_t op::internal::NonMaxSuppressionIEInternal::max_boxes_output_from_input() const {
+    int64_t max_output_boxes{0};
+
+    size_t num_of_inputs = inputs().size();
+    if (num_of_inputs < 3) {
+        return 0;
+    }
+
+    const auto max_output_boxes_input =
+        as_type_ptr<op::Constant>(input_value(max_output_boxes_per_class_port).get_node_shared_ptr());
+    max_output_boxes = max_output_boxes_input->cast_vector<int64_t>().at(0);
+
+    return max_output_boxes;
+}
+
+void op::internal::NonMaxSuppressionIEInternal::validate_and_infer_types() {
+    const auto boxes_ps = get_input_partial_shape(boxes_port);
+    const auto scores_ps = get_input_partial_shape(scores_port);
+
+    // NonMaxSuppression produces triplets
+    // that have the following format: [batch_index, class_index, box_index]
+    PartialShape out_shape = {Dimension::dynamic(), 3};
+
+    if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) {
+        const auto num_boxes_boxes = boxes_ps[1];
+        const auto max_output_boxes_per_class_node = input_value(max_output_boxes_per_class_port).get_node_shared_ptr();
+        if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static() &&
+            op::is_constant(max_output_boxes_per_class_node)) {
+            const auto num_boxes = num_boxes_boxes.get_length();
+            const auto num_classes = scores_ps[1].get_length();
+            const auto max_output_boxes_per_class = max_boxes_output_from_input();
+
+            out_shape[0] = std::min(num_boxes, max_output_boxes_per_class) * num_classes *
+                           scores_ps[0].get_length();
+        }
+    }
+
+    set_output_type(0, m_output_type, out_shape);
+    set_output_type(1, element::f32, out_shape);
+    set_output_type(2, m_output_type, Shape{1});
+}
--- a/inference-engine/src/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp
@ -0,0 +1,123 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset5.hpp>
+
+#include <ngraph/rt_info.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+#include "ngraph_ops/nms_ie_internal.hpp"
+#include "transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp"
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertNMSToNMSIEInternal, "ConvertNMSToNMSIEInternal", 0);
+
+ngraph::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() {
+    auto nms = ngraph::pattern::wrap_type<ngraph::opset5::NonMaxSuppression>();
+
+    ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
+        auto nms_5 = std::dynamic_pointer_cast<ngraph::opset5::NonMaxSuppression>(m.get_match_root());
+        if (!nms_5) {
+            return false;
+        }
+
+        const auto new_args = nms_5->input_values();
+        const std::size_t num_of_inputs = new_args.size();
+
+        const auto& arg2 = num_of_inputs > 2 ? new_args.at(2) : ngraph::opset5::Constant::create(element::i32, Shape{}, {0});
+        const auto& arg3 = num_of_inputs > 3 ? new_args.at(3) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f});
+        const auto& arg4 = num_of_inputs > 4 ? new_args.at(4) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f});
+
+        // vector of new nGraph operations
+        NodeVector new_ops;
+
+        auto one_dim_shape = Shape{1};
+
+        Output<Node> new_max_per_class;
+        Output<Node> new_iou_threshold;
+        Output<Node> new_score_threshold;
+        Output<Node> new_soft_nms_sigma;
+
+        Output<Node> new_shape_for_max_per_class = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
+        Output<Node> new_shape_for_iou_threshold = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
+        Output<Node> new_shape_for_score_threshold = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
+        Output<Node> new_shape_for_soft_nms_sigma = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
+
+        new_max_per_class = std::make_shared<opset1::Reshape>(arg2, new_shape_for_max_per_class, true);
+        new_ops.emplace_back(new_max_per_class.get_node_shared_ptr());
+
+        new_iou_threshold = std::make_shared<opset1::Reshape>(arg3, new_shape_for_iou_threshold, true);
+        new_ops.emplace_back(new_iou_threshold.get_node_shared_ptr());
+
+        new_score_threshold = std::make_shared<opset1::Reshape>(arg4, new_shape_for_score_threshold, true);
+        new_ops.emplace_back(new_score_threshold.get_node_shared_ptr());
+
+        int center_point_box = 0;
+        switch (nms_5->get_box_encoding()) {
+            case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER:
+                center_point_box = 1;
+                break;
+            case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER:
+                center_point_box = 0;
+                break;
+            default:
+                throw ngraph_error("NonMaxSuppression layer " + nms_5->get_friendly_name() +
+                                   " has unsupported box encoding");
+        }
+
+        std::shared_ptr<op::internal::NonMaxSuppressionIEInternal> nms_legacy{nullptr};
+
+        if (num_of_inputs > 5 && nms_5->soft_nms_sigma_from_input() != 0.0f) {
+            new_soft_nms_sigma = std::make_shared<opset1::Reshape>(new_args.at(5), new_shape_for_soft_nms_sigma, true);
+            new_ops.emplace_back(new_soft_nms_sigma.get_node_shared_ptr());
+            nms_legacy = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(
+                    new_args.at(0),
+                    new_args.at(1),
+                    new_max_per_class,
+                    new_iou_threshold,
+                    new_score_threshold,
+                    new_soft_nms_sigma,
+                    center_point_box,
+                    nms_5->get_sort_result_descending(),
+                    element::i32);
+            new_ops.push_back(nms_legacy);
+        } else {
+            nms_legacy = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(
+                    new_args.at(0),
+                    new_args.at(1),
+                    new_max_per_class,
+                    new_iou_threshold,
+                    new_score_threshold,
+                    center_point_box,
+                    nms_5->get_sort_result_descending(),
+                    element::i32);
+            new_ops.push_back(nms_legacy);
+        }
+
+        Output<Node> output_0 = nms_legacy->output(0);
+        if (nms_5->output(0).get_element_type() != output_0.get_element_type()) {
+            output_0 = std::make_shared<opset1::Convert>(output_0, nms_5->output(0).get_element_type());
+            output_0.get_node_shared_ptr()->set_friendly_name(nms_5->get_friendly_name() + "/convert.0");
+            new_ops.emplace_back(output_0.get_node_shared_ptr());
+        }
+
+        Output<Node> output_2 = nms_legacy->output(2);
+        if (nms_5->output(2).get_element_type() != output_2.get_element_type()) {
+            output_2 = std::make_shared<opset1::Convert>(output_2, nms_5->output(2).get_element_type());
+            output_2.get_node_shared_ptr()->set_friendly_name(nms_5->get_friendly_name() + "/convert.2");
+            new_ops.emplace_back(output_2.get_node_shared_ptr());
+        }
+
+        nms_legacy->set_friendly_name(nms_5->get_friendly_name());
+        ngraph::copy_runtime_info(nms_5, new_ops);
+        ngraph::replace_node(nms_5, {output_0, nms_legacy->output(1), output_2});
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(nms, "ConvertNMSToNMSIEInternal");
+    this->register_matcher(m, callback);
+}
--- a/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_internal_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/convert_nms_to_nms_ie_internal_test.cpp
@ -0,0 +1,192 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <memory>
+#include <queue>
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset1.hpp>
+#include <ngraph/opsets/opset3.hpp>
+#include <ngraph/opsets/opset5.hpp>
+#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
+#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
+#include <transformations/init_node_info.hpp>
+#include <transformations/utils/utils.hpp>
+#include <ngraph_ops/nms_ie_internal.hpp>
+#include <ngraph/pass/constant_folding.hpp>
+#include <ngraph/pass/manager.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ngraph;
+
+TEST(TransformationTests, ConvertNMS1ToNMSIEInternal) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset1::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, op::v1::NonMaxSuppression::BoxEncodingType::CORNER, true);
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        const auto & orig_shape = f->get_output_partial_shape(0);
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertNMS1ToNMS5>();
+        manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
+    }
+
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
+        auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, 0, true, element::i32);
+        auto convert = std::make_shared<opset1::Convert>(nms->output(0), element::i64);
+
+        f_ref = std::make_shared<Function>(NodeVector{convert}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, ConvertNMS3ToNMSIEInternal) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, opset3::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        const auto & orig_shape = f->get_output_partial_shape(0);
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
+        manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
+    }
+
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{1}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
+        auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, 0, true, element::i32);
+
+        f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, ConvertNMS4ToNMSIEInternal) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
+        auto nms = std::make_shared<opset4::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, opset4::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        const auto & orig_shape = f->get_output_partial_shape(0);
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
+        manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
+    }
+
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{1}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
+        auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, 0, true, element::i32);
+
+        f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(TransformationTests, ConvertNMS5ToNMSIEInternal) {
+    std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
+        auto soft_nms_sigma = opset1::Constant::create(element::f32, Shape{}, {0.5});
+        auto nms = std::make_shared<opset5::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, soft_nms_sigma, opset5::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
+
+        f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+
+        const auto & orig_shape = f->get_output_partial_shape(0);
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        manager.run_passes(f);
+        ASSERT_NO_THROW(check_rt_info(f));
+        ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
+    }
+
+    {
+        auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
+        auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
+        auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{1}, {10});
+        auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
+        auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
+        auto soft_nms_sigma = opset1::Constant::create(element::f32, Shape{1}, {0.5});
+        auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
+                iou_threshold, score_threshold, soft_nms_sigma, 0, true, element::i32);
+
+        f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
+        ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
+    }
+
+    auto res = compare_functions(f, f_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp
@ -23,15 +23,7 @@ TEST_P(CoreThreadingTestsWithIterations, smoke_LoadNetwork_RemoteContext) {
    InferenceEngine::Core ie;
    std::atomic<unsigned int> counter{0u};

-    const FuncTestUtils::TestModel::TestModel models[] = {
-        FuncTestUtils::TestModel::convReluNormPoolFcModelFP32,
-        FuncTestUtils::TestModel::convReluNormPoolFcModelFP16
-    };
    std::vector<InferenceEngine::CNNNetwork> networks;
-    for (auto & model : models) {
-        networks.emplace_back(ie.ReadNetwork(model.model_xml_str, model.weights_blob));
-    }
-
    networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::make2InputSubtract()));
    networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeMultiSingleConv()));
    networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSingleConv()));
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/activation.cpp
@ -9,7 +9,13 @@
 using namespace LayerTestsDefinitions;
 using namespace ngraph::helpers;
 namespace {
-
+// Common params
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I16,
+        InferenceEngine::Precision::U8
+};

 const std::vector<InferenceEngine::Precision> netPrecisions = {
        InferenceEngine::Precision::FP32,
@ -46,15 +52,26 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
        {HSwish,                {}},
        {SoftPlus,              {}},
        {HSigmoid,              {}},
+        {Swish,                 {{0.5f}}},
        {RoundHalfToEven,       {}},
        {RoundHalfAwayFromZero, {}}
 };

+const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {
+    {PReLu, {{-0.01f}}},
+    {LeakyRelu, {{0.01f}}}
+};
+
 std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
        {{1, 50}, {{}}},
        {{1, 128}, {{}}},
 };

+std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> preluBasic = {
+        {{1, 50}, {{1}, {50}}},
+        {{1, 128}, {{1}, {128}}},
+};
+
 const auto basicCases = ::testing::Combine(
        ::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes)),
        ::testing::ValuesIn(netPrecisions),
@ -66,6 +83,21 @@ const auto basicCases = ::testing::Combine(
        ::testing::Values(CommonTestUtils::DEVICE_GPU)
 );

+const auto basicPreluCases = ::testing::Combine(
+        ::testing::ValuesIn(CommonTestUtils::combineParams(activationParamTypes)),
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::Values(InferenceEngine::Layout::ANY),
+        ::testing::ValuesIn(CommonTestUtils::combineParams(preluBasic)),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+
 INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationLayerTest, basicCases, ActivationLayerTest::getTestCaseName);
+INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);

 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/broadcast.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/broadcast.cpp
@ -0,0 +1,174 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/broadcast.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::BOOL
+};
+
+// NUMPY MODE
+
+std::vector<std::vector<size_t>> inShapesNumpy = {
+        {3, 1},
+        {1, 4, 1}
+};
+
+std::vector<std::vector<size_t>> targetShapesNumpy = {
+        {2, 3, 6},
+        {1, 4, 4}
+};
+
+const auto numpyBroadcastParams1 = ::testing::Combine(
+        ::testing::Values(targetShapesNumpy[0]),
+        ::testing::Values(ngraph::AxisSet{}), //not used in numpy mode
+        ::testing::Values(ngraph::op::BroadcastType::NUMPY),
+        ::testing::Values(inShapesNumpy[0]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestNumpyBroadcast1,
+        BroadcastLayerTest,
+        numpyBroadcastParams1,
+        BroadcastLayerTest::getTestCaseName
+);
+
+const auto numpyBroadcastParams2 = ::testing::Combine(
+        ::testing::Values(targetShapesNumpy[1]),
+        ::testing::Values(ngraph::AxisSet{}), //not used in numpy mode
+        ::testing::Values(ngraph::op::BroadcastType::NUMPY),
+        ::testing::Values(inShapesNumpy[1]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestNumpyBroadcast2,
+        BroadcastLayerTest,
+        numpyBroadcastParams2,
+        BroadcastLayerTest::getTestCaseName
+);
+
+// BIDIRECTIONAL MODE
+
+std::vector<std::vector<size_t>> inShapesBidi = {
+        {4, 1},
+        {1, 4, 1},
+        {4, 1, 1}
+};
+
+std::vector<std::vector<size_t>> targetShapesBidi = {
+        {2, 1, 4},
+        {1, 4, 4},
+        {1, 1, 2, 2}
+};
+
+const auto bidirectionalBroadcastParams1 = ::testing::Combine(
+        ::testing::Values(targetShapesBidi[0]),
+        ::testing::Values(ngraph::AxisSet{}), //not used in bidirectional mode
+        ::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL),
+        ::testing::Values(inShapesBidi[0]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestBidirectionalBroadcast1,
+        BroadcastLayerTest,
+        bidirectionalBroadcastParams1,
+        BroadcastLayerTest::getTestCaseName
+);
+
+const auto bidirectionalBroadcastParams2 = ::testing::Combine(
+        ::testing::Values(targetShapesBidi[1]),
+        ::testing::Values(ngraph::AxisSet{}), //not used in bidirectional mode
+        ::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL),
+        ::testing::Values(inShapesBidi[1]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestBidirectionalBroadcast2,
+        BroadcastLayerTest,
+        bidirectionalBroadcastParams2,
+        BroadcastLayerTest::getTestCaseName
+);
+
+const auto bidirectionalBroadcastParams3 = ::testing::Combine(
+        ::testing::Values(targetShapesBidi[2]),
+        ::testing::Values(ngraph::AxisSet{}), //not used in bidirectional mode
+        ::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL),
+        ::testing::Values(inShapesBidi[2]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestBidirectionalBroadcast3,
+        BroadcastLayerTest,
+        bidirectionalBroadcastParams3,
+        BroadcastLayerTest::getTestCaseName
+);
+
+// EXPLICIT MODE
+
+std::vector<std::vector<size_t>> inShapesExplicit = {
+        {3, 1},
+        {2, 4}
+};
+
+std::vector<std::vector<size_t>> targetShapesExplicit = {
+        {2, 3, 1},
+        {2, 3, 4}
+};
+
+std::vector<ngraph::AxisSet> axes = {
+        {1, 2},
+        {0, 2}
+};
+
+const auto explicitBroadcastParams1 = ::testing::Combine(
+        ::testing::Values(targetShapesExplicit[0]),
+        ::testing::Values(axes[0]),
+        ::testing::Values(ngraph::op::BroadcastType::EXPLICIT),
+        ::testing::Values(inShapesExplicit[0]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestExplicitBroadcast1,
+        BroadcastLayerTest,
+        explicitBroadcastParams1,
+        BroadcastLayerTest::getTestCaseName
+);
+
+const auto explicitBroadcastParams2 = ::testing::Combine(
+        ::testing::Values(targetShapesExplicit[1]),
+        ::testing::Values(axes[1]),
+        ::testing::Values(ngraph::op::BroadcastType::EXPLICIT),
+        ::testing::Values(inShapesExplicit[1]),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_TestExplicitBroadcast2,
+        BroadcastLayerTest,
+        explicitBroadcastParams2,
+        BroadcastLayerTest::getTestCaseName
+);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/detection_output.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/detection_output.cpp
@ -0,0 +1,85 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "single_layer_tests/detection_output.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const int numClasses = 11;
+const int backgroundLabelId = 0;
+const std::vector<int> topK = {75};
+const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
+const std::vector<std::string> codeType = {"caffe.PriorBoxParameter.CORNER", "caffe.PriorBoxParameter.CENTER_SIZE"};
+const float nmsThreshold = 0.5f;
+const float confidenceThreshold = 0.3f;
+const std::vector<bool> clipAfterNms = {true, false};
+const std::vector<bool> clipBeforeNms = {true, false};
+const std::vector<bool> decreaseLabelId = {true, false};
+const float objectnessScore = 0.4f;
+const std::vector<size_t> numberBatch = {1, 2};
+
+const auto commonAttributes = ::testing::Combine(
+        ::testing::Values(numClasses),
+        ::testing::Values(backgroundLabelId),
+        ::testing::ValuesIn(topK),
+        ::testing::ValuesIn(keepTopK),
+        ::testing::ValuesIn(codeType),
+        ::testing::Values(nmsThreshold),
+        ::testing::Values(confidenceThreshold),
+        ::testing::ValuesIn(clipAfterNms),
+        ::testing::ValuesIn(clipBeforeNms),
+        ::testing::ValuesIn(decreaseLabelId)
+);
+
+/* =============== 3 inputs cases =============== */
+
+const std::vector<ParamsWhichSizeDepends> specificParams3In = {
+    ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {}, {}},
+    ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {}, {}},
+    ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {}, {}},
+    ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {}, {}},
+
+    ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {}, {}},
+    ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {}, {}},
+    ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {}, {}},
+    ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {}, {}}
+};
+
+const auto params3Inputs = ::testing::Combine(
+        commonAttributes,
+        ::testing::ValuesIn(specificParams3In),
+        ::testing::ValuesIn(numberBatch),
+        ::testing::Values(0.0f),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput3In, DetectionOutputLayerTest, params3Inputs, DetectionOutputLayerTest::getTestCaseName);
+
+/* =============== 5 inputs cases =============== */
+
+const std::vector<ParamsWhichSizeDepends> specificParams5In = {
+    ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 60}},
+    ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 660}},
+    ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 60}},
+    ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 660}},
+
+    ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 60}},
+    ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 660}},
+    ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 60}},
+    ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 660}}
+};
+
+const auto params5Inputs = ::testing::Combine(
+        commonAttributes,
+        ::testing::ValuesIn(specificParams5In),
+        ::testing::ValuesIn(numberBatch),
+        ::testing::Values(objectnessScore),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput5In, DetectionOutputLayerTest, params5Inputs, DetectionOutputLayerTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp
@ -16,6 +16,8 @@ std::vector<std::vector<std::vector<size_t>>> inShapes = {
        {{1, 10, 100}},
        {{4, 4, 16}},
        {{1, 1, 1, 3}},
+        {{2, 17, 5, 4}, {1, 17, 1, 1}},
+        {{2, 17, 5, 1}, {1, 17, 1, 4}},
        {{1, 2, 4}},
        {{1, 4, 4}},
        {{1, 4, 4, 1}},
@ -40,10 +42,14 @@ std::vector<CommonTestUtils::OpType> opTypes = {
 };

 std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
+        ngraph::helpers::EltwiseTypes::ADD,
        ngraph::helpers::EltwiseTypes::MULTIPLY,
        ngraph::helpers::EltwiseTypes::SUBTRACT,
-        ngraph::helpers::EltwiseTypes::ADD,
-        ngraph::helpers::EltwiseTypes::POWER
+        ngraph::helpers::EltwiseTypes::DIVIDE,
+        ngraph::helpers::EltwiseTypes::FLOOR_MOD,
+        ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
+        ngraph::helpers::EltwiseTypes::POWER,
+        ngraph::helpers::EltwiseTypes::MOD
 };

 std::map<std::string, std::string> additional_config = {};
@ -61,4 +67,17 @@ const auto multiply_params = ::testing::Combine(
        ::testing::Values(additional_config));

 INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs, EltwiseLayerTest, multiply_params, EltwiseLayerTest::getTestCaseName);
-}  // namespace
+
+
+std::vector<std::vector<std::vector<size_t>>> inShapesSingleThread = {
+        {{1, 2, 3, 4}},
+        {{2, 2, 2, 2}},
+        {{2, 1, 2, 1, 2, 2}}
+};
+
+std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesSingleThread = {
+        ngraph::helpers::EltwiseTypes::ADD,
+        ngraph::helpers::EltwiseTypes::POWER,
+};
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp
@ -0,0 +1,48 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/fake_quantize.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6}};
+const std::vector<std::vector<size_t>> constShapes = {{1}};
+const std::vector<size_t> levels = {16, 255, 256};
+
+const std::pair<std::string, std::map<std::string, std::string>> config = {};
+const std::vector<float> fqArgs = {};
+const std::vector<float> inputParams = {};
+
+
+const auto fqParams = ::testing::Combine(
+        ::testing::ValuesIn(levels),
+        ::testing::ValuesIn(constShapes),
+        ::testing::Values(fqArgs),
+        ::testing::Values(inputParams)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
+                        ::testing::Combine(
+                                fqParams,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::ValuesIn(inputShapes),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                                ::testing::Values(config)),
+                        FakeQuantizeLayerTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp
@ -0,0 +1,129 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/group_convolution_backprop_data.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+        InferenceEngine::Precision::FP32
+};
+
+const std::vector<size_t> numOutChannels = {16, 32};
+const std::vector<size_t> numGroups = {2, 8, 16};
+
+/* ============= 2D GroupConvolution ============= */
+const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10},
+                                                         {1, 32, 10, 10}};
+const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
+const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
+
+const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::ValuesIn(padBegins2D),
+        ::testing::ValuesIn(padEnds2D),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels2D),
+        ::testing::ValuesIn(strides2D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0})),
+        ::testing::ValuesIn(dilations2D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData2D_ExplicitPadding, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData2DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData2D_AutoPadValid, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData2DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::ValuesIn(inputShapes2D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+/* ============= 3D GroupConvolution ============= */
+const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5},
+                                                         {1, 32, 5, 5, 5}};
+const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
+const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
+const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
+const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
+const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
+
+const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::ValuesIn(padBegins3D),
+        ::testing::ValuesIn(padEnds3D),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::EXPLICIT)
+);
+const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine(
+        ::testing::ValuesIn(kernels3D),
+        ::testing::ValuesIn(strides3D),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
+        ::testing::ValuesIn(dilations3D),
+        ::testing::ValuesIn(numOutChannels),
+        ::testing::ValuesIn(numGroups),
+        ::testing::Values(ngraph::op::PadType::VALID)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData3D_ExplicitPadding, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData3DParams_ExplicitPadding,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData3D_AutoPadValid, GroupConvBackpropDataLayerTest,
+                        ::testing::Combine(
+                                groupConvBackpropData3DParams_AutoPadValid,
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::Values(InferenceEngine::Layout::ANY),
+                                ::testing::ValuesIn(inputShapes3D),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        GroupConvBackpropDataLayerTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gru_cell.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gru_cell.cpp
@ -0,0 +1,37 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/gru_cell.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    std::vector<bool> should_decompose{false, true};
+    std::vector<size_t> batch{5};
+    std::vector<size_t> hidden_size{1, 10};
+    std::vector<size_t> input_size{1, 30};
+    std::vector<std::vector<std::string>> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"},
+                                                         {"tanh", "relu"}};
+    std::vector<float> clip = {0.0f, 0.7f};
+    std::vector<bool> linear_before_reset = {true, false};
+    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                             InferenceEngine::Precision::FP16};
+
+    INSTANTIATE_TEST_CASE_P(GRUCellCommon, GRUCellTest,
+            ::testing::Combine(
+            ::testing::ValuesIn(should_decompose),
+            ::testing::ValuesIn(batch),
+            ::testing::ValuesIn(hidden_size),
+            ::testing::ValuesIn(input_size),
+            ::testing::ValuesIn(activations),
+            ::testing::ValuesIn(clip),
+            ::testing::ValuesIn(linear_before_reset),
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+            GRUCellTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gru_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/gru_sequence.cpp
@ -0,0 +1,65 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/op/util/attr_types.hpp>
+#include "single_layer_tests/gru_sequence.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST,
+                                                         ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST,
+                                                         ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
+                                                         ngraph::helpers::SequenceTestsMode::PURE_SEQ};
+    // output values increase rapidly without clip, so use only seq_lenghts = 2
+    std::vector<size_t> seq_lengths_zero_clip{2};
+    std::vector<size_t> seq_lengths_clip_non_zero{20};
+    std::vector<size_t> batch{10};
+    std::vector<size_t> hidden_size{1, 10};
+    // std::vector<size_t> input_size{10};
+    std::vector<std::vector<std::string>> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"},
+                                                         {"tanh", "relu"}};
+    std::vector<bool> linear_before_reset = {true, false};
+    std::vector<float> clip{0.f};
+    std::vector<float> clip_non_zeros{0.7f};
+    std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
+                                                                     ngraph::op::RecurrentSequenceDirection::REVERSE,
+                                                                     ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL
+    };
+    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                             InferenceEngine::Precision::FP16};
+
+    INSTANTIATE_TEST_CASE_P(GRUSequenceCommonZeroClip, GRUSequenceTest,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(mode),
+                                    ::testing::ValuesIn(seq_lengths_zero_clip),
+                                    ::testing::ValuesIn(batch),
+                                    ::testing::ValuesIn(hidden_size),
+                                    // ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args
+                                    ::testing::ValuesIn(activations),
+                                    ::testing::ValuesIn(clip),
+                                    ::testing::ValuesIn(linear_before_reset),
+                                    ::testing::ValuesIn(direction),
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                            GRUSequenceTest::getTestCaseName);
+
+    INSTANTIATE_TEST_CASE_P(GRUSequenceCommonClip, GRUSequenceTest,
+                            ::testing::Combine(
+                                    ::testing::ValuesIn(mode),
+                                    ::testing::ValuesIn(seq_lengths_clip_non_zero),
+                                    ::testing::ValuesIn(batch),
+                                    ::testing::ValuesIn(hidden_size),
+                                    // ::testing::ValuesIn(input_size),  // hardcoded to 10 due to Combine supports up to 10 args
+                                    ::testing::ValuesIn(activations),
+                                    ::testing::ValuesIn(clip_non_zeros),
+                                    ::testing::ValuesIn(linear_before_reset),
+                                    ::testing::ValuesIn(direction),
+                                    ::testing::ValuesIn(netPrecisions),
+                                    ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                            GRUSequenceTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/lstm_cell.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/lstm_cell.cpp
@ -0,0 +1,49 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/lstm_cell.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+std::vector<bool> should_decompose{false, true};
+std::vector<size_t> batch{5};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> hidden_size_smoke{1};
+std::vector<size_t> input_size{1, 30};
+std::vector<std::vector<std::string>> activations_smoke = {{"relu", "sigmoid", "tanh"}};
+std::vector<std::vector<std::string>> activations = {{"relu", "sigmoid", "tanh"}, {"sigmoid", "tanh", "tanh"},
+                                                     {"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"},
+                                                     {"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}};
+std::vector<float> clip{0.f, 0.7f};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                        InferenceEngine::Precision::FP16};
+
+INSTANTIATE_TEST_CASE_P(LSTMCellCommon, LSTMCellTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(should_decompose),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations),
+                                ::testing::ValuesIn(clip),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        LSTMCellTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_LSTMCellCommon, LSTMCellTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(should_decompose),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size_smoke),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations_smoke),
+                                ::testing::ValuesIn(clip),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        LSTMCellTest::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/lstm_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/lstm_sequence.cpp
@ -0,0 +1,79 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/op/util/attr_types.hpp>
+#include "single_layer_tests/lstm_sequence.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST,
+                                                     ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST,
+                                                     ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
+                                                     ngraph::helpers::SequenceTestsMode::PURE_SEQ};
+// output values increase rapidly without clip, so use only seq_lenghts = 2
+std::vector<size_t> seq_lengths_zero_clip{2};
+std::vector<size_t> seq_lengths_clip_non_zero{20};
+std::vector<size_t> batch{10};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> hidden_size_smoke{1};
+std::vector<size_t> input_size{10};
+std::vector<std::vector<std::string>> activations = {{"relu", "sigmoid", "tanh"}, {"sigmoid", "tanh", "tanh"},
+                                                     {"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"},
+                                                     {"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}};
+std::vector<std::vector<std::string>> activations_smoke = {{"relu", "sigmoid", "tanh"}};
+std::vector<float> clip{0.f};
+std::vector<float> clip_non_zeros{0.7f};
+std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
+                                                                 ngraph::op::RecurrentSequenceDirection::REVERSE,
+                                                                 ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL
+};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                         InferenceEngine::Precision::FP16};
+
+INSTANTIATE_TEST_CASE_P(LSTMSequenceCommonZeroClip, LSTMSequenceTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(mode),
+                                ::testing::ValuesIn(seq_lengths_zero_clip),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations),
+                                ::testing::ValuesIn(clip),
+                                ::testing::ValuesIn(direction),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        LSTMSequenceTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(LSTMSequenceCommonClip, LSTMSequenceTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(mode),
+                                ::testing::ValuesIn(seq_lengths_clip_non_zero),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations),
+                                ::testing::ValuesIn(clip_non_zeros),
+                                ::testing::ValuesIn(direction),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        LSTMSequenceTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCommonClip, LSTMSequenceTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(mode),
+                                ::testing::ValuesIn(seq_lengths_clip_non_zero),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size_smoke),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations_smoke),
+                                ::testing::ValuesIn(clip_non_zeros),
+                                ::testing::ValuesIn(direction),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        LSTMSequenceTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/non_max_suppression.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/non_max_suppression.cpp
@ -0,0 +1,42 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/non_max_suppression.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace InferenceEngine;
+using namespace ngraph;
+
+const std::vector<InputShapeParams> inShapeParams = {
+    InputShapeParams{3, 100, 5},
+    InputShapeParams{1, 10, 50},
+    InputShapeParams{2, 50, 50}
+};
+
+const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
+const std::vector<float> threshold = {0.3f, 0.7f};
+const std::vector<float> sigmaThreshold = {0.0f, 0.5f};
+const std::vector<op::v5::NonMaxSuppression::BoxEncodingType> encodType = {op::v5::NonMaxSuppression::BoxEncodingType::CENTER,
+                                                                           op::v5::NonMaxSuppression::BoxEncodingType::CORNER};
+const std::vector<bool> sortResDesc = {true, false};
+const std::vector<element::Type> outType = {element::i32, element::i64};
+
+const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
+                                          ::testing::Combine(::testing::Values(Precision::FP32),
+                                                             ::testing::Values(Precision::I32),
+                                                             ::testing::Values(Precision::FP32)),
+                                          ::testing::ValuesIn(maxOutBoxPerClass),
+                                          ::testing::ValuesIn(threshold),
+                                          ::testing::ValuesIn(threshold),
+                                          ::testing::ValuesIn(sigmaThreshold),
+                                          ::testing::ValuesIn(encodType),
+                                          ::testing::ValuesIn(sortResDesc),
+                                          ::testing::ValuesIn(outType),
+                                          ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_NmsLayerTest, NmsLayerTest, nmsParams, NmsLayerTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/normalize_l2.cpp
@ -35,7 +35,7 @@ const auto normL2params = testing::Combine(
 );

 INSTANTIATE_TEST_CASE_P(
-        NormalizeL2,
+        smoke_NormalizeL2,
        NormalizeL2LayerTest,
        normL2params,
        NormalizeL2LayerTest::getTestCaseName
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/prior_box_clustered.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/prior_box_clustered.cpp
@ -64,8 +64,8 @@ INSTANTIATE_TEST_CASE_P(smoke_PriorBoxClustered_Basic, PriorBoxClusteredLayerTes
                            ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
                            ::testing::Values(InferenceEngine::Layout::ANY),
                            ::testing::Values(InferenceEngine::Layout::ANY),
-                            ::testing::Values(std::vector<size_t>({ 1, 16, 4, 4 })),
-                            ::testing::Values(std::vector<size_t>({ 1, 3, 50, 50 })),
+                            ::testing::Values(std::vector<size_t>({ 4, 4 })),
+                            ::testing::Values(std::vector<size_t>({ 50, 50 })),
                            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
                        PriorBoxClusteredLayerTest::getTestCaseName
 );
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reduce_ops.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/reduce_ops.cpp
@ -1,4 +1,4 @@
-// Copyright (C) 20120 Intel Corporation
+// Copyright (C) 2020 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //

@ -10,67 +10,230 @@
 using namespace LayerTestsDefinitions;

 namespace {
-    const std::vector<InferenceEngine::Precision> netPrecisions = {
+const std::vector<InferenceEngine::Precision> netPrecisions = {
        InferenceEngine::Precision::FP32,
-    };
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::U8,
+        InferenceEngine::Precision::I8,
+};

-    const std::vector<std::vector<size_t>> inputShapes = {
-        std::vector<size_t>{1, 2, 4, 4},
-        std::vector<size_t>{3, 2, 5, 6},
-    };
+const std::vector<bool> keepDims = {
+        true,
+        false,
+};

-    const std::vector<std::vector<int>> axes = {
+const std::vector<std::vector<size_t>> inputShapes = {
+        std::vector<size_t>{10, 20, 30, 40},
+        std::vector<size_t>{3, 5, 7, 9},
+};
+
+const std::vector<std::vector<size_t>> inputShapesOneAxis = {
+        std::vector<size_t>{10, 20, 30, 40},
+        std::vector<size_t>{3, 5, 7, 9},
+        std::vector<size_t>{10},
+};
+
+const std::vector<std::vector<int>> axes = {
+        {0},
+        {1},
+        {2},
+        {3},
+        {0, 1},
        {0, 2},
-        {1, 3}
-    };
+        {0, 3},
+        {1, 2},
+        {1, 3},
+        {2, 3},
+        {0, 1, 2},
+        {0, 1, 3},
+        {0, 2, 3},
+        {1, 2, 3},
+        {0, 1, 2, 3},
+        {1, -1}
+};

-    std::vector<CommonTestUtils::OpType> opTypes = {
+std::vector<CommonTestUtils::OpType> opTypes = {
        CommonTestUtils::OpType::SCALAR,
        CommonTestUtils::OpType::VECTOR,
-    };
+};

-    const std::vector<ngraph::helpers::ReductionType> reductionTypes = {
+const std::vector<ngraph::helpers::ReductionType> reductionTypes = {
        ngraph::helpers::ReductionType::Mean,
        ngraph::helpers::ReductionType::Min,
        ngraph::helpers::ReductionType::Max,
        ngraph::helpers::ReductionType::Sum,
        ngraph::helpers::ReductionType::Prod,
-    };
+        ngraph::helpers::ReductionType::L1,
+        ngraph::helpers::ReductionType::L2,
+};

-    const auto paramsOneAxis = testing::Combine(
+const std::vector<ngraph::helpers::ReductionType> reductionLogicalTypes = {
+        ngraph::helpers::ReductionType::LogicalOr,
+        ngraph::helpers::ReductionType::LogicalAnd
+};
+
+const auto paramsOneAxis = testing::Combine(
        testing::Values(std::vector<int>{0}),
        testing::ValuesIn(opTypes),
        testing::Values(true, false),
        testing::ValuesIn(reductionTypes),
-        testing::ValuesIn(netPrecisions),
+        testing::Values(InferenceEngine::Precision::FP32),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::ValuesIn(inputShapesOneAxis),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto paramsOneAxisLogical = testing::Combine(
+        testing::Values(std::vector<int>{0}),
+        testing::ValuesIn(opTypes),
+        testing::Values(true, false),
+        testing::ValuesIn(reductionLogicalTypes),
+        testing::Values(InferenceEngine::Precision::BOOL),
        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
        testing::Values(InferenceEngine::Layout::ANY),
        testing::ValuesIn(inputShapes),
-        testing::Values(CommonTestUtils::DEVICE_GPU));
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);

-    INSTANTIATE_TEST_CASE_P(
+const auto params_Precisions = testing::Combine(
+        testing::Values(std::vector<int>{1, 3}),
+        testing::Values(opTypes[1]),
+        testing::ValuesIn(keepDims),
+        testing::Values(ngraph::helpers::ReductionType::Sum),
+        testing::Values(InferenceEngine::Precision::FP32,
+                        InferenceEngine::Precision::I32),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{2, 2, 2, 2}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto params_InputShapes = testing::Combine(
+        testing::Values(std::vector<int>{0}),
+        testing::Values(opTypes[1]),
+        testing::ValuesIn(keepDims),
+        testing::Values(ngraph::helpers::ReductionType::Mean),
+        testing::Values(InferenceEngine::Precision::FP32),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{3},
+                        std::vector<size_t>{3, 5},
+                        std::vector<size_t>{2, 4, 6},
+                        std::vector<size_t>{2, 4, 6, 8},
+                        std::vector<size_t>{2, 2, 2, 2, 2},
+                        std::vector<size_t>{2, 2, 2, 2, 2, 2}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto params_Axes = testing::Combine(
+        testing::ValuesIn(axes),
+        testing::Values(opTypes[1]),
+        testing::ValuesIn(keepDims),
+        testing::Values(ngraph::helpers::ReductionType::Mean),
+        testing::Values(InferenceEngine::Precision::FP32),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::ValuesIn(inputShapes),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto params_ReductionTypes = testing::Combine(
+        testing::Values(std::vector<int>{0, 1, 3}),
+        testing::Values(opTypes[1]),
+        testing::ValuesIn(keepDims),
+        testing::ValuesIn(reductionTypes),
+        testing::Values(InferenceEngine::Precision::FP32),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{2, 9, 2, 9}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+const auto params_ReductionTypesLogical = testing::Combine(
+        testing::Values(std::vector<int>{0, 1, 3}),
+        testing::Values(opTypes[1]),
+        testing::ValuesIn(keepDims),
+        testing::ValuesIn(reductionLogicalTypes),
+        testing::Values(InferenceEngine::Precision::BOOL),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+        testing::Values(InferenceEngine::Layout::ANY),
+        testing::Values(std::vector<size_t>{2, 9, 2, 9}),
+        testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(
        smoke_ReduceOneAxis,
        ReduceOpsLayerTest,
        paramsOneAxis,
-        ReduceOpsLayerTest::getTestCaseName);
+        ReduceOpsLayerTest::getTestCaseName
+);

-    const auto params = testing::Combine(
-        testing::ValuesIn(axes),
-        testing::Values(opTypes[1]),
-        testing::Values(true, false),
-        testing::ValuesIn(reductionTypes),
-        testing::ValuesIn(netPrecisions),
-        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        testing::Values(InferenceEngine::Precision::UNSPECIFIED),
-        testing::Values(InferenceEngine::Layout::ANY),
-        testing::ValuesIn(inputShapes),
-        testing::Values(CommonTestUtils::DEVICE_GPU));
-
-    INSTANTIATE_TEST_CASE_P(
-        smoke_Reduce,
+INSTANTIATE_TEST_CASE_P(
+        smoke_ReduceLogicalOneAxis,
        ReduceOpsLayerTest,
-        params,
-        ReduceOpsLayerTest::getTestCaseName);
+        paramsOneAxisLogical,
+        ReduceOpsLayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Reduce_Precisions,
+        ReduceOpsLayerTest,
+        params_Precisions,
+        ReduceOpsLayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Reduce_InputShapes,
+        ReduceOpsLayerTest,
+        params_InputShapes,
+        ReduceOpsLayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Reduce_Axes,
+        ReduceOpsLayerTest,
+        params_Axes,
+        ReduceOpsLayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Reduce_ReductionTypes,
+        ReduceOpsLayerTest,
+        params_ReductionTypes,
+        ReduceOpsLayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_ReduceLogical_ReductionTypes,
+        ReduceOpsLayerTest,
+        params_ReductionTypesLogical,
+        ReduceOpsLayerTest::getTestCaseName
+);
+
+INSTANTIATE_TEST_CASE_P(
+        smoke_Reduce,
+        ReduceOpsLayerWithSpecificInputTest,
+        testing::Combine(
+                testing::ValuesIn(decltype(axes) {{0}, {1}}),
+                testing::Values(opTypes[1]),
+                testing::Values(true),
+                testing::Values(ngraph::helpers::ReductionType::Sum),
+                testing::Values(InferenceEngine::Precision::FP32,
+                                InferenceEngine::Precision::I32),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                testing::Values(InferenceEngine::Precision::UNSPECIFIED),
+                testing::Values(InferenceEngine::Layout::ANY),
+                testing::Values(std::vector<size_t> {2, 10}),
+                testing::Values(CommonTestUtils::DEVICE_GPU)),
+        ReduceOpsLayerWithSpecificInputTest::getTestCaseName
+);

 }  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rnn_cell.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rnn_cell.cpp
@ -0,0 +1,34 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/rnn_cell.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+    std::vector<bool> should_decompose{false, true};
+    std::vector<size_t> batch{1, 5};
+    std::vector<size_t> hidden_size{1, 10};
+    std::vector<size_t> input_size{1, 30};
+    std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
+    std::vector<float> clip = {0.f, 0.7f};
+    std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
+                                                             InferenceEngine::Precision::FP16};
+
+    INSTANTIATE_TEST_CASE_P(RNNCellCommon, RNNCellTest,
+            ::testing::Combine(
+            ::testing::ValuesIn(should_decompose),
+            ::testing::ValuesIn(batch),
+            ::testing::ValuesIn(hidden_size),
+            ::testing::ValuesIn(input_size),
+            ::testing::ValuesIn(activations),
+            ::testing::ValuesIn(clip),
+            ::testing::ValuesIn(netPrecisions),
+            ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+            RNNCellTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rnn_sequence.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/rnn_sequence.cpp
@ -0,0 +1,60 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/op/util/attr_types.hpp>
+#include "single_layer_tests/rnn_sequence.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST,
+                                                     ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST,
+                                                     ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
+                                                     ngraph::helpers::SequenceTestsMode::PURE_SEQ};
+// output values increase rapidly without clip, so use only seq_lenghts = 2
+std::vector<size_t> seq_lengths_zero_clip{2};
+std::vector<size_t> seq_lengths_clip_non_zero{20};
+std::vector<size_t> batch{1, 10};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{10};
+std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
+std::vector<float> clip{0.f};
+std::vector<float> clip_non_zeros{0.7f};
+std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
+                                                                 ngraph::op::RecurrentSequenceDirection::REVERSE,
+                                                                 ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL,
+};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
+
+INSTANTIATE_TEST_CASE_P(RNNSequenceCommonZeroClip, RNNSequenceTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(mode),
+                                ::testing::ValuesIn(seq_lengths_zero_clip),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations),
+                                ::testing::ValuesIn(clip),
+                                ::testing::ValuesIn(direction),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        RNNSequenceTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(RNNSequenceCommonClip, RNNSequenceTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(mode),
+                                ::testing::ValuesIn(seq_lengths_clip_non_zero),
+                                ::testing::ValuesIn(batch),
+                                ::testing::ValuesIn(hidden_size),
+                                ::testing::ValuesIn(input_size),
+                                ::testing::ValuesIn(activations),
+                                ::testing::ValuesIn(clip_non_zeros),
+                                ::testing::ValuesIn(direction),
+                                ::testing::ValuesIn(netPrecisions),
+                                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                        RNNSequenceTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_update.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/scatter_update.cpp
@ -0,0 +1,46 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <ngraph/opsets/opset3.hpp>
+
+#include "single_layer_tests/scatter_update.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+using namespace ngraph::opset3;
+
+namespace {
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::I32,
+};
+
+const std::vector<InferenceEngine::Precision> idxPrecisions = {
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::I64,
+};
+
+// map<inputShape, map<indicesShape, axis>>
+std::map<std::vector<size_t>, std::map<std::vector<size_t>, std::vector<int>>> axesShapeInShape {
+    {{10, 16, 12, 15}, {{{2, 4}, {0, 1, 2, 3}}, {{8}, {-1, -2, -3, -4}}}},
+    {{10, 9, 10, 9, 10}, {{{8}, {-3, -1, 0, 2, 4}}, {{4, 2}, {-2, 2}}}},
+};
+//indices should not be random value
+const std::vector<std::vector<size_t>> idxValue = {
+        {0, 2, 4, 6, 1, 3, 5, 7}
+};
+
+const auto ScatterUpdateCase = ::testing::Combine(
+        ::testing::ValuesIn(ScatterUpdateLayerTest::combineShapes(axesShapeInShape)),
+        ::testing::ValuesIn(idxValue),
+        ::testing::ValuesIn(inputPrecisions),
+        ::testing::ValuesIn(idxPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_ScatterUpdate, ScatterUpdateLayerTest, ScatterUpdateCase, ScatterUpdateLayerTest::getTestCaseName);
+
+}  // namespace
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@ -9,8 +9,6 @@

 std::vector<std::string> disabledTestPatterns() {
    return {
-            // Issues - 34059
-            ".*BehaviorTests\\.pluginDoesNotChangeOriginalNetwork.*",
            //TODO: Issue: 34748
            R"(.*(ComparisonLayerTest).*)",
            // TODO: Issue: 39014
@ -20,8 +18,6 @@ std::vector<std::string> disabledTestPatterns() {
            // Expected behavior
            R"(.*EltwiseLayerTest.*eltwiseOpType=Pow.*netPRC=I64.*)",
            R"(.*EltwiseLayerTest.*IS=\(.*\..*\..*\..*\..*\).*eltwiseOpType=Pow.*secondaryInputType=CONSTANT.*)",
-            // TODO: Issue: 40958
-            R"(.*(ConstantResultSubgraphTest).*)",
            // TODO: Issue: 43794
            R"(.*(PreprocessTest).*(SetScalePreProcess).*)",
            R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcess).*)",
@ -35,8 +31,23 @@ std::vector<std::string> disabledTestPatterns() {
            R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
            R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
            // TODO: Issue: 43511
-            R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*OpType=(Prod|Sub).*secondaryInputType=CONSTANT_opType=VECTOR_netPRC=(FP16|FP32).*)",
-            R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*OpType=Sum.*secondaryInputType=CONSTANT_opType=VECTOR_netPRC=(FP16|FP32).*)",
-            R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*OpType=Sub.*secondaryInputType=CONSTANT_opType=VECTOR_netPRC=I64.*)",
+            R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*)",
+            R"(.*EltwiseLayerTest.*IS=\(2\).*OpType=Mod.*opType=VECTOR.*)",
+            R"(.*EltwiseLayerTest.*OpType=FloorMod.*netPRC=I64.*)",
+
+            // These tests might fail due to accuracy loss a bit bigger than threshold
+            R"(.*(GRUCellTest).*)",
+            R"(.*(RNNSequenceTest).*)",
+            R"(.*(GRUSequenceTest).*)",
+            // These test cases might fail due to FP16 overflow
+            R"(.*(LSTM).*activations=\(relu.*netPRC=FP16.*)",
+
+            // Need to update activation primitive to support any broadcastable constant to enable these cases.
+            R"(.*ActivationParamLayerTest.*)",
+            // Unknown issues
+            R"(.*(LSTMSequence).*mode=CONVERT_TO_TI_RAND_SEQ_LEN.*)",
+            R"(.*(smoke_DetectionOutput3In).*)",
+            R"(.*(smoke_DetectionOutput5In).*)",
+            R"(.*(ScatterUpdateLayerTest).*)",
    };
 }
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/parameter_result.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/subgraph_tests/parameter_result.cpp
@ -0,0 +1,16 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "subgraph_tests/parameter_result.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace SubgraphTestsDefinitions;
+
+namespace {
+    INSTANTIATE_TEST_CASE_P(smoke_Check, ParameterResultSubgraphTest,
+                            ::testing::Values(CommonTestUtils::DEVICE_GPU),
+                            ParameterResultSubgraphTest::getTestCaseName);
+}  // namespace
--- a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/parameter_result.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/parameter_result.hpp
@ -0,0 +1,15 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "shared_test_classes/subgraph/parameter_result.hpp"
+
+namespace SubgraphTestsDefinitions {
+
+TEST_P(ParameterResultSubgraphTest, CompareWithRefs) {
+    Run();
+}
+
+}  // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/prior_box_clustered.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/prior_box_clustered.hpp
@ -67,7 +67,6 @@ protected:
    float offset;
    bool clip;

-    std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
    void SetUp() override;
 };

--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/parameter_result.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/parameter_result.hpp
@ -0,0 +1,28 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"
+
+namespace SubgraphTestsDefinitions {
+
+typedef std::tuple<
+            std::string                        // Device name
+> parameterResultParams;
+
+class ParameterResultSubgraphTest : public testing::WithParamInterface<parameterResultParams>,
+                                    virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<parameterResultParams> obj);
+protected:
+    void SetUp() override;
+};
+}  // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/prior_box_clustered.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/prior_box_clustered.cpp
@ -57,84 +57,8 @@ std::string PriorBoxClusteredLayerTest::getTestCaseName(const testing::TestParam
    return result.str();
 }

-std::vector<std::vector<std::uint8_t>> PriorBoxClusteredLayerTest::CalculateRefs() {
-    size_t numPriors = widths.size();
-    const size_t layerWidth = inputShapes[3];
-    const size_t layerHeight = inputShapes[2];
-    size_t imgWidth = imageShapes[3];
-    size_t imgHeight = imageShapes[2];
-
-    if (variances.empty())
-        variances.push_back(0.1f);
-    size_t varSize = variances.size();
-
-    size_t topDataOffset = 4 * layerWidth * layerHeight * numPriors;
-    size_t outSize = 2 * topDataOffset;
-    auto outBuf = std::vector<float>(outSize);
-    float* topData_0 = outBuf.data();
-    float* topData_1 = outBuf.data() + topDataOffset;
-
-    if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
-        //GPU inits buffers with 0.0f
-        for (auto i = 0; i < outSize; i++)
-            topData_0[i] = 0.0f;
-    }
-
-    float stepW = step_width;
-    float stepH = step_height;
-    if (stepW == 0 && stepH == 0) {
-        stepW = static_cast<float>(imgWidth) / layerWidth;
-        stepH = static_cast<float>(imgHeight) / layerHeight;
-    }
-
-    for (size_t h = 0; h < layerHeight; ++h) {
-        for (size_t w = 0; w < layerWidth; ++w) {
-            float center_x = (w + offset) * stepW;
-            float center_y = (h + offset) * stepH;
-
-            for (size_t s = 0; s < numPriors; ++s) {
-                float box_width = widths[s];
-                float box_height = heights[s];
-
-                float xmin = (center_x - box_width / 2.0f) / imgWidth;
-                float ymin = (center_y - box_height / 2.0f) / imgHeight;
-                float xmax = (center_x + box_width / 2.0f) / imgWidth;
-                float ymax = (center_y + box_height / 2.0f) / imgHeight;
-
-                if (clip) {
-                    xmin = (std::min)((std::max)(xmin, 0.0f), 1.0f);
-                    ymin = (std::min)((std::max)(ymin, 0.0f), 1.0f);
-                    xmax = (std::min)((std::max)(xmax, 0.0f), 1.0f);
-                    ymax = (std::min)((std::max)(ymax, 0.0f), 1.0f);
-                }
-
-                topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 0] = xmin;
-                topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 1] = ymin;
-                topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 2] = xmax;
-                topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 3] = ymax;
-
-                for (int j = 0; j < varSize; j++)
-                    topData_1[h * layerWidth * numPriors * varSize + w * numPriors * varSize +
-                    s * varSize +
-                    j] = variances[j];
-            }
-        }
-    }
-
-    // Be aligned with test utils ref calulcation method, which returns std::vector<std::vector<uint8_t>>...
-    std::vector<std::vector<uint8_t>> ret(1);
-    for (auto& val : outBuf) {
-        uint8_t* u8_val = reinterpret_cast<uint8_t*>(&val);
-        ret[0].push_back(u8_val[0]);
-        ret[0].push_back(u8_val[1]);
-        ret[0].push_back(u8_val[2]);
-        ret[0].push_back(u8_val[3]);
-    }
-
-    return ret;
-}
-
 void PriorBoxClusteredLayerTest::SetUp() {
+    SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
    priorBoxClusteredSpecificParams specParams;
    std::tie(specParams, netPrecision,
        inPrc, outPrc, inLayout, outLayout,
@ -149,9 +73,7 @@ void PriorBoxClusteredLayerTest::SetUp() {
        variances) = specParams;

    auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-    auto paramsIn = ngraph::builder::makeParams(ngPrc, { inputShapes, imageShapes });
-    auto paramsOut = ngraph::helpers::convert2OutputVector(
-        ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramsIn));
+    auto params = ngraph::builder::makeParams(ngPrc, { inputShapes, imageShapes });

    ngraph::op::PriorBoxClusteredAttrs attributes;
    attributes.widths = widths;
@ -162,12 +84,14 @@ void PriorBoxClusteredLayerTest::SetUp() {
    attributes.offset = offset;
    attributes.variances = variances;

-    auto priorBoxClustered = std::make_shared<ngraph::op::PriorBoxClusteredIE>(
-        paramsOut[0],
-        paramsOut[1],
+    auto shape_of_1 = std::make_shared<ngraph::opset3::ShapeOf>(params[0]);
+    auto shape_of_2 = std::make_shared<ngraph::opset3::ShapeOf>(params[1]);
+    auto priorBoxClustered = std::make_shared<ngraph::op::PriorBoxClustered>(
+        shape_of_1,
+        shape_of_2,
        attributes);

    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(priorBoxClustered) };
-    function = std::make_shared<ngraph::Function>(results, paramsIn, "PB_Clustered");
+    function = std::make_shared<ngraph::Function>(results, params, "PB_Clustered");
 }
 }  // namespace LayerTestsDefinitions
--- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/parameter_result.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/parameter_result.cpp
@ -0,0 +1,28 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/subgraph/parameter_result.hpp"
+
+namespace SubgraphTestsDefinitions {
+
+std::string ParameterResultSubgraphTest::getTestCaseName(testing::TestParamInfo<parameterResultParams> obj) {
+    std::string targetDevice;
+    std::tie(targetDevice) = obj.param;
+    std::ostringstream result;
+    result << "TargetDevice=" << targetDevice;
+    return result.str();
+}
+
+void ParameterResultSubgraphTest::SetUp() {
+    InferenceEngine::SizeVector inputShapes;
+    InferenceEngine::Precision inputPrecision;
+    std::tie(targetDevice) = this->GetParam();
+
+    auto parameter = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 10, 10});
+    const ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(parameter)};
+    ngraph::ParameterVector params = {parameter};
+    function = std::make_shared<ngraph::Function>(results, params, "ParameterResult");
+}
+
+}  // namespace SubgraphTestsDefinitions
--- a/inference-engine/tests_deprecated/functional/cldnn/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/cldnn/CMakeLists.txt
@ -6,16 +6,7 @@
 set(TARGET_NAME ClDnnFunctionalTests)

 file(GLOB CLDNN_TEST_SOURCES
-        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/regression_tests/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/io_blob_tests/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/input_tests/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/inference_engine_regression_tests/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/lstm/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/common_single_layer_tests/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/ie_class/*.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/single_layer_tests/*.cpp)
+        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)

 list(APPEND TEST_SRC ${CLDNN_TEST_SOURCES})

--- a/inference-engine/tests_deprecated/functional/cldnn/dummy.cpp
+++ b/inference-engine/tests_deprecated/functional/cldnn/dummy.cpp
@ -0,0 +1,3 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
--- a/Show More
+++ b/Show More