[CPU] CPU plugin api 2.0 migration for TensorDesc (#21002)

* [CPU] CPU API 2.0 migration for TensorDesc * Fixed CustomOpCPUTest issue * Cleanup unused code 1. normalize_preprocess 2. MemoryDescUtils::convertToBlockedMemoryDesc 3. Fix a typo issue * Fix zero dim with none zero strides issue * Fix Expected and actual shape are different: [] VS [1] * Remove InferenceEngine::Layout * Solve rebase issues * Update for code reviewer's comments * Keep ov::intel_cpu::node::Generic for legacy compatibility * Solve scalar data issue * Solve reviewer's comments * Restore Generic registeration
2023-12-08 18:28:52 +08:00 · 2023-12-08 18:28:52 +08:00 · 04db11e921
commit 04db11e921
parent fbec7be5e0
20 changed files with 120 additions and 524 deletions
--- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@ -68,7 +68,7 @@ dnnl::memory::data_type DnnlExtensionUtils::ElementTypeToDataType(const ov::elem
        case ov::element::undefined:
            return memory::data_type::undef;
        default: {
-            OPENVINO_THROW("The plugin does not support ", elementType.to_string(), " for use with oneDNN");
+            OPENVINO_THROW("CPU plugin does not support ", elementType.to_string(), " for use with oneDNN.");
        }
    }
 }
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@ -47,7 +47,6 @@
 #include "utils/ngraph_utils.hpp"
 #include "utils/node_dumper.h"
 #include "utils/verbose.h"
-#include "memory_desc/cpu_memory_desc_utils.h"

 #include "openvino/runtime/memory_solver.hpp"

@ -894,63 +893,17 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor>& inp
    if (!IsReady()) OPENVINO_THROW("Wrong state. Topology not ready.");
    auto input_itr = inputNodesMap.find(name);
    if (input_itr != inputNodesMap.end()) {
-        auto create_mem_desc = [&](const ov::SoPtr<ITensor>& tensor) -> CpuBlockedMemoryDesc {
-            auto element_type = tensor->get_element_type();
-            auto shape = tensor->get_shape();
-            if (shape.empty())
-                shape = {tensor->get_size()};
-            std::vector<size_t> blk_order(shape.size());
-            std::iota(blk_order.begin(), blk_order.end(), 0);
-            std::vector<size_t> dim_offset(shape.size(), 0);
-            std::vector<size_t> blk_strides;
-            auto byte_strides = element_type.bitwidth() >= 8 ? tensor->get_strides() : Strides{};
-            if (byte_strides.empty()) {
-                blk_strides = ov::row_major_strides(shape);
-            } else {
-                // ROI tensor need figure out correct blk_strides
-                blk_strides.resize(byte_strides.size());
-                std::transform(byte_strides.begin(),
-                               byte_strides.end(),
-                               blk_strides.begin(),
-                               [&element_type](size_t byte_stride) {
-                                   OPENVINO_ASSERT(byte_stride % element_type.size() == 0,
-                                                   "Limitation: Stride in bytes ",
-                                                   byte_stride,
-                                                   " should be divisible by size of element ",
-                                                   element_type.size());
-                                   return byte_stride / element_type.size();
-                               });
-            }
-            InferenceEngine::TensorDesc tensorDesc(
-                InferenceEngine::details::convertPrecision(tensor->get_element_type()),
-                shape,
-                InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides});
-            return MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensorDesc);
-        };
-
        auto node = input_itr->second;
        auto childEdge = node->getChildEdgeAt(0);
-        const auto& outDims = node->getOutputShapeAtPort(0);

        const void* ext_data_ptr = input->data();
        void* inter_data_ptr = childEdge->getMemory().getData();

        if (ext_data_ptr != inter_data_ptr) {
-            auto ext_tensor_desc = create_mem_desc(input);
+            auto ext_tensor_desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(input);
            Memory ext_mem(getEngine(), ext_tensor_desc, ext_data_ptr, false);
            childEdge->getMemory().load(ext_mem, false);
        }
-
-        // todo: make sure 'name' exists in this map...
-        if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) {
-            if (input->get_element_type() == ov::element::f32) {
-                _normalizePreprocMap[name].NormalizeImage(outDims,
-                                                          reinterpret_cast<float*>(inter_data_ptr),
-                                                          TensorDesc::getLayoutByDims(input->get_shape()));
-            } else {
-                OPENVINO_THROW("Mean image of type ", input->get_element_type().get_type_name(), " is unsupported");
-            }
-        }
    } else {
        OPENVINO_THROW("Input blob for infer '", name, "' doesn't correspond to input in network");
    }
@ -973,44 +926,32 @@ void Graph::PullOutputData(std::unordered_map<std::string, ov::SoPtr<ITensor>>&
            OPENVINO_THROW("The CPU plugin graph doesn't contain output node with name: ", name.c_str());
        }

-        InferenceEngine::TensorDesc expectedDesc(
-            InferenceEngine::details::convertPrecision(ext_blob->get_element_type()),
-            ext_blob->get_shape(),
-            InferenceEngine::TensorDesc::getLayoutByRank(ext_blob->get_shape().size()));
-        DEBUG_LOG(name, ", tensor data addr ", static_cast<void*>(output[name]->data()));
+        auto expected_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(ext_blob);
+        const auto actualDesc = intr_blob.getDescWithType<BlockedMemoryDesc>();

-        const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc());
+        DEBUG_LOG(name, ", tensor data addr ", static_cast<void*>(output[name]->data()));

        // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
        // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
        bool isScalarOutput = false;
-        if (actualDesc.getLayout() == SCALAR) {
-            isScalarOutput = expectedDesc.getLayout() == SCALAR ||
-                             (!expectedDesc.getDims().empty() &&
-                             std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1);
-        } else if (expectedDesc.getLayout() == SCALAR) {
-            isScalarOutput = actualDesc.getLayout() == SCALAR ||
-                             (!actualDesc.getDims().empty() &&
-                             std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1);
+        if (ext_blob->get_shape().empty() && ext_blob->get_size() == 1) {
+            const auto& actualDims = expected_desc_ptr->getShape().getStaticDims();
+            isScalarOutput =
+                !actualDims.empty() &&
+                std::accumulate(actualDims.begin(), actualDims.end(), (size_t)1, std::multiplies<size_t>()) == 1;
        }

        auto outDims = intr_blob.getStaticDims();
        if (ext_blob->get_shape() != outDims && !isScalarOutput) {
            // WA: because input/output info initially contains non empty dims, order etc.
            // and setDims (called inside setShape) can't correct modify blocked desc for desc with blocked layout
-            if (expectedDesc.getLayout() == InferenceEngine::Layout::BLOCKED) {
-                expectedDesc = TensorDesc(expectedDesc.getPrecision(), expectedDesc.getLayout());
-            }
            DEBUG_LOG(name, ", tensor data addr ", static_cast<void*>(output[name]->data()),
            " dims ", PartialShape(output[name]->get_shape()), " -> ", PartialShape(outDims),
            ", intr ptr ", intr_blob.getData(), " , parentedge's memory object ", parentEdge->getMemoryPtr().get());
            ext_blob->set_shape(outDims);
            DEBUG_LOG(name, ", tensor data addr ", static_cast<void*>(output[name]->data()),
            " dims ", PartialShape(output[name]->get_shape()), ", intr ptr ", intr_blob.getData());
-            expectedDesc =
-                InferenceEngine::TensorDesc(InferenceEngine::details::convertPrecision(ext_blob->get_element_type()),
-                                            ext_blob->get_shape(),
-                                            InferenceEngine::TensorDesc::getLayoutByRank(ext_blob->get_shape().size()));
+            expected_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(ext_blob);
        }

        // check for empty output blob
@ -1018,8 +959,8 @@ void Graph::PullOutputData(std::unordered_map<std::string, ov::SoPtr<ITensor>>&
            continue;
        }

-        auto srcPrec = actualDesc.getPrecision();
-        auto dstPrec = expectedDesc.getPrecision();
+        auto srcPrec = actualDesc->getPrecision();
+        auto dstPrec = expected_desc_ptr->getPrecision();
        if (!getConfig().isLegacyApi && srcPrec == dstPrec && ext_blob->get_byte_size() != intr_blob.getSize())
            OPENVINO_THROW("Output blob byte size is not equal network output byte size (",
                           ext_blob->get_byte_size(),
@ -1034,24 +975,13 @@ void Graph::PullOutputData(std::unordered_map<std::string, ov::SoPtr<ITensor>>&
        // That is the same memory. No need to copy
        if (ext_blob_ptr == intr_blob_ptr) continue;

-        if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
-            // User can initialize output via SetOutput API using tensorDesc with ANY layout.
-            // For these cases we create planar memory descriptor.
-            auto outBlobDesc =
-                expectedDesc.getLayout() == InferenceEngine::Layout::ANY
-                    ? DnnlBlockedMemoryDesc(InferenceEngine::details::convertPrecision(expectedDesc.getPrecision()),
-                                            Shape(expectedDesc.getDims()))
-                    : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc);
-            Memory outBloMem(getEngine(), outBlobDesc, ext_blob_ptr, false);
+        if (actualDesc->isCompatible(*expected_desc_ptr) && !isScalarOutput) {
+            Memory outBloMem(getEngine(), expected_desc_ptr, ext_blob_ptr, false);
            outBloMem.load(intr_blob, false);
        } else {
            size_t size_to_copy = intr_blob.getDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
            DEBUG_LOG("pull_output: convert ", srcPrec, " to ", dstPrec);
-            cpu_convert(intr_blob_ptr,
-                        ext_blob_ptr,
-                        InferenceEngine::details::convertPrecision(srcPrec),
-                        InferenceEngine::details::convertPrecision(dstPrec),
-                        size_to_copy);
+            cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
        }
    }
 }
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@ -11,7 +11,6 @@
 #include "edge.h"
 #include "graph_context.h"
 #include "node.h"
-#include "normalize_preprocess.h"
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/runtime/profiling_info.hpp"

@ -60,10 +59,6 @@ public:
                     const GraphContext::CPtr ctx,
                     std::string name);

-    bool hasMeanImageFor(const std::string& name) {
-        return _normalizePreprocMap.find(name) != _normalizePreprocMap.end();
-    }
-
    void PushInputData(const std::string& name, const ov::SoPtr<ITensor>& input);
    void PullOutputData(std::unordered_map<std::string, ov::SoPtr<ITensor>>& output);

@ -212,7 +207,6 @@ protected:
        outputNodesMap.clear();
        graphNodes.clear();
        graphEdges.clear();
-        _normalizePreprocMap.clear();
        syncNodesInds.clear();
    }
    Status status { Status::NotReady };
@ -228,7 +222,6 @@ protected:
    std::vector<NodePtr> graphNodes;
    std::vector<EdgePtr> graphEdges;

-    std::map<std::string, NormalizePreprocess> _normalizePreprocMap;
    std::string _name;

    bool graphHasDynamicInput = false;
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@ -332,37 +332,6 @@ void SyncInferRequest::throw_if_canceled() const {
    }
 }

-static InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr<ITensor>& tensor) {
-    auto element_type = tensor->get_element_type();
-    auto shape = tensor->get_shape();
-    std::vector<size_t> blk_order(shape.size());
-    std::iota(blk_order.begin(), blk_order.end(), 0);
-    std::vector<size_t> dim_offset(shape.size(), 0);
-    std::vector<size_t> blk_strides;
-    auto byte_strides = element_type.bitwidth() >= 8 ? tensor->get_strides() : Strides{};
-    if (byte_strides.empty()) {
-        blk_strides = ov::row_major_strides(shape);
-    } else {
-        blk_strides.resize(byte_strides.size());
-        std::transform(byte_strides.begin(),
-                       byte_strides.end(),
-                       blk_strides.begin(),
-                       [&element_type](size_t byte_stride) {
-                           OPENVINO_ASSERT(byte_stride % element_type.size() == 0,
-                                           "Limitation: Stride in bytes ",
-                                           byte_stride,
-                                           " should be divisible by size of element ",
-                                           element_type.size());
-                           return byte_stride / element_type.size();
-                       });
-    }
-    OPENVINO_SUPPRESS_DEPRECATED_START
-    return InferenceEngine::TensorDesc{InferenceEngine::details::convertPrecision(element_type),
-                          shape,
-                          InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}};
-    OPENVINO_SUPPRESS_DEPRECATED_END
-}
-
 ov::SoPtr<ov::ITensor> SyncInferRequest::get_tensor(const ov::Output<const ov::Node>& in_port) const {
    auto port = get_internal_port(in_port);
    return ov::ISyncInferRequest::get_tensor(port);
@ -398,7 +367,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
        tensor = ov::make_tensor(in_tensor->get_element_type(), in_port.get_shape(), in_tensor->data());
    }
    auto name = get_port_name(in_port, m_is_legacy_api);
-    auto tensor_desc = create_tensor_desc(tensor);
+    auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor);
    bool is_input = ov::op::util::is_parameter(port.get_node());
    if (is_input) {
        const auto netInPrc = port.get_element_type();
@ -436,14 +405,11 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
            // we must define desc for dynamic case
            // otherwise we got incorrect check on shape compatibility inside isCompatible
            // because lower and upper bound will be compared
-            OPENVINO_SUPPRESS_DEPRECATED_START
-            actualDesc = actualDesc->cloneWithNewDims(tensor_desc.getLayout() == InferenceEngine::Layout::SCALAR
-                                                          ? InferenceEngine::SizeVector{1}
-                                                          : tensor_desc.getDims());
-            OPENVINO_SUPPRESS_DEPRECATED_END
+            actualDesc = actualDesc->cloneWithNewDims(
+                ov::is_scalar(tensor->get_shape()) ? VectorDims{1} : VectorDims{tensor->get_shape()});
        }
-        if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensor_desc)) &&
-            m_graph->_normalizePreprocMap.find(name) == m_graph->_normalizePreprocMap.end()) {
+
+        if (actualDesc->isCompatible(*mem_desc_ptr)) {
            m_external_ptr[name] = tensor;
        } else if (m_external_ptr.find(name) != m_external_ptr.end()) {
            m_external_ptr.erase(name);
@ -481,7 +447,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
        }

        const auto& desc = m_graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc();
-        if (!isDynamic && tensor_desc == MemoryDescUtils::convertToTensorDesc(desc)) {
+        if (!isDynamic && mem_desc_ptr->isCompatible(desc)) {
            m_external_ptr[name] = tensor;
        } else if (m_external_ptr.find(name) != m_external_ptr.end()) {
            m_external_ptr.erase(name);
@ -538,12 +504,12 @@ void SyncInferRequest::init_tensor(const std::string& name) {
            tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
            ov::ISyncInferRequest::set_tensor(port, tensor);

-            auto desc = create_tensor_desc(tensor);
-            if (!isDynamic &&
-                desc == MemoryDescUtils::convertToTensorDesc(
-                            m_graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) &&
-                m_graph->_normalizePreprocMap.find(name) == m_graph->_normalizePreprocMap.end()) {
-                m_external_ptr[name] = tensor;
+            if (!isDynamic) {
+                auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor);
+                if (mem_desc_ptr->isCompatible(
+                        m_graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc())) {
+                    m_external_ptr[name] = tensor;
+                }
            }
        }
    }
@ -626,11 +592,11 @@ void SyncInferRequest::init_tensor(const std::string& name) {
                }
            }
            m_outputs[name] = tensor;
-            auto desc = create_tensor_desc(tensor);
-            if (!port_shape.is_dynamic() && !m_external_ptr.count(name) &&
-                desc == MemoryDescUtils::convertToTensorDesc(
-                            output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) {
-                m_external_ptr[name] = tensor;
+            if (!port_shape.is_dynamic() && !m_external_ptr.count(name)) {
+                auto desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor);
+                if (desc->isCompatible(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) {
+                    m_external_ptr[name] = tensor;
+                }
            }
            // update tensors in case of multiple output ports with the same name
            for (const auto& out : get_outputs()) {
--- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp
+++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp
@ -5,15 +5,18 @@
 #include <ie_ngraph_utils.hpp>
 #include "cpu_memory_desc.h"
 #include "memory_desc/cpu_memory_desc_utils.h"
-#include <cpu_memory.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
-#include "utils/general_utils.h"
+#include "openvino/runtime/itensor.hpp"
+#include "openvino/runtime/so_ptr.hpp"
 #include "utils/cpu_utils.hpp"
-#include <limits>
-#include <vector>
-#include <numeric>
+#include "utils/general_utils.h"
+
 #include <blob_factory.hpp>
+#include <cpu_memory.h>
 #include <dnnl_types.h>
+#include <limits>
+#include <numeric>
+#include <vector>

 using namespace dnnl;
 using namespace InferenceEngine;
@ -46,27 +49,12 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Memo
    }
 }

-CpuBlockedMemoryDesc MemoryDescUtils::convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
-    if (desc.getLayout() == InferenceEngine::Layout::ANY)
-        OPENVINO_THROW("Cannot convert InferenceEngine::TensorDesc with ANY layout to CpuBlockedMemoryDesc");
-
-    const auto& blkDesc = desc.getBlockingDesc();
-    const auto& dims = desc.getDims();
-
-    auto strides = blkDesc.getStrides();
-    // for empty tensor case InferenceEngine::TensorDesc fill strides with non zero values before first 0 dims
-    // i.e. dims[1, 0, 2, 3] -> strides [0, 6, 3, 1]
-    if (std::any_of(dims.begin(), dims.end(), [](size_t dim){ return dim == 0; })) {
-        std::fill(strides.begin(), strides.end(), 0);
+BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) {
+    if (desc->getType() & MemoryDescType::Blocked) {
+        return std::dynamic_pointer_cast<BlockedMemoryDesc>(desc);
+    } else {
+        OPENVINO_THROW("Can not convert unsupported memory descriptor");
    }
-
-    return CpuBlockedMemoryDesc(InferenceEngine::details::convertPrecision(desc.getPrecision()),
-                                Shape(dims),
-                                blkDesc.getBlockDims(),
-                                blkDesc.getOrder(),
-                                blkDesc.getOffsetPadding(),
-                                blkDesc.getOffsetPaddingToData(),
-                                strides);
 }

 CpuBlockedMemoryDescPtr MemoryDescUtils::generateCpuBlockedMemoryDesc(const ov::SoPtr<ov::ITensor>& tensor) {
@ -82,6 +70,8 @@ CpuBlockedMemoryDescPtr MemoryDescUtils::generateCpuBlockedMemoryDesc(const ov::

    if (byte_strides.empty()) {
        blk_strides = ov::row_major_strides(shape);
+    } else if (tensor->get_size() == 0) {
+        blk_strides.resize(shape.size());
    } else {
        // ROI tensor need figure out correct blk_strides
        blk_strides.resize(byte_strides.size());
@ -108,6 +98,7 @@ CpuBlockedMemoryDescPtr MemoryDescUtils::generateCpuBlockedMemoryDesc(const ov::
        blk_strides);
 }

+OPENVINO_SUPPRESS_DEPRECATED_START
 DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) {
    if (desc.getLayout() == InferenceEngine::Layout::ANY)
        OPENVINO_THROW("Cannot convert InferenceEngine::TensorDesc with ANY layout to DnnlBlockedMemoryDesc");
@ -131,15 +122,7 @@ DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const Infe
                                 strides);
 }

-BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) {
-    if (desc->getType() & MemoryDescType::Blocked) {
-        return std::dynamic_pointer_cast<BlockedMemoryDesc>(desc);
-    } else {
-        OPENVINO_THROW("Can not convert unsupported memory descriptor");
-    }
-}
-
-InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory &mem) {
+InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory& mem) {
    // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor
    auto& memDesc = mem.getDesc();
    InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc);
@ -148,7 +131,7 @@ InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory &mem)
    return make_blob_with_precision(desc, mem.getData());
 }

-InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const IMemory &mem) {
+InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const IMemory& mem) {
    auto& memDesc = mem.getDesc();
    InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc);

@ -174,6 +157,7 @@ InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDes
        OPENVINO_THROW("Cannot convert MemoryDesc to InferenceEngine::TensorDesc");
    }
 }
+OPENVINO_SUPPRESS_DEPRECATED_END

 std::string MemoryDescUtils::dim2str(Dim dim) {
    return dim == Shape::UNDEFINED_DIM ? "?" : std::to_string(dim);
--- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h
+++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h
@ -7,7 +7,8 @@
 #include <onednn/dnnl.h>
 #include "cpu_types.h"
 #include "cpu_shape.h"
-
+#include "openvino/runtime/itensor.hpp"
+#include "openvino/runtime/so_ptr.hpp"
 #include <ie_layouts.h>
 #include <ie_blob.h>
 #include <openvino/runtime/so_ptr.hpp>
@ -42,11 +43,11 @@ public:
    static DnnlBlockedMemoryDesc convertToDnnlBlockedMemoryDesc(const MemoryDesc& desc);

    /**
-     * @brief Converts InferenceEngine::TensorDesc to CpuBlockedMemoryDesc
-     * @param desc InferenceEngine::TensorDesc to be converted
-     * @return converted CpuBlockedMemoryDesc
+     * @brief Converts MemoryDesc to BlockedMemoryDesc
+     * @param desc MemoryDesc to be converted
+     * @return converted BlockedMemoryDesc
     */
-    static CpuBlockedMemoryDesc convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc);
+    static std::shared_ptr<BlockedMemoryDesc> convertToBlockedMemoryDesc(const std::shared_ptr<MemoryDesc> &desc);

    /**
     * @brief Builds CpuBlockedMemoryDesc for given ov::ITensor
@ -55,6 +56,7 @@ public:
     */
    static std::shared_ptr<CpuBlockedMemoryDesc> generateCpuBlockedMemoryDesc(const ov::SoPtr<ov::ITensor>& tensor);

+    OPENVINO_SUPPRESS_DEPRECATED_START
    /**
     * @brief Converts InferenceEngine::TensorDesc to DnnlBlockedMemoryDesc
     * @param desc InferenceEngine::TensorDesc to be converted
@ -62,13 +64,6 @@ public:
     */
    static DnnlBlockedMemoryDesc convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc);

-    /**
-     * @brief Converts MemoryDesc to BlockedMemoryDesc
-     * @param desc MemoryDesc to be converted
-     * @return converted BlockedMemoryDesc
-     */
-    static std::shared_ptr<BlockedMemoryDesc> convertToBlockedMemoryDesc(const std::shared_ptr<MemoryDesc> &desc);
-
    /**
     * @brief Creates InferenceEngine::Blob from Memory with the memory reuse
     * @param desc Memory from which will be created InferenceEngine::Blob
@ -89,6 +84,7 @@ public:
     * @return converted InferenceEngine::TensorDesc
     */
    static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc);
+    OPENVINO_SUPPRESS_DEPRECATED_END

    static constexpr Dim DEFAULT_DUMMY_VAL = 64;

--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@ -23,7 +23,6 @@
 #include "nodes/eltwise.h"
 #include "nodes/matmul.h"
 #include "nodes/fullyconnected.h"
-#include "nodes/generic.h"
 #include "nodes/if.h"
 #include "nodes/input.h"
 #include "nodes/lrn.h"
@ -855,13 +854,11 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) {
                       internalBlobs.size());
    }

-    const auto &internalBlob = internalBlobs[indx];
+    const auto& internalBlob = internalBlobs[indx];

-    auto create = [&] () {
-        // TODO [DS]: internal blobs should be removed or rewritten using Memory object
-        auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc());
-
-        Memory memory{engine, newDesc, internalBlob->buffer()};
+    auto create = [&]() {
+        auto newDesc = internalBlob->getDescPtr();
+        Memory memory{engine, newDesc, internalBlob->getData()};

        MemoryPtr _ptr = std::make_shared<Memory>(engine, intDesc);
        node::Reorder::reorderData(memory, *_ptr, context->getParamsCache());
@ -872,12 +869,13 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) {
    auto weightCache = context->getWeightsCache();
    if (weightCache != nullptr && memory::format_kind::blocked == intDesc->getDnnlDesc().get_format_kind()) {
        const auto& format = intDesc->serializeFormat();
-        const uint64_t data_hash = weightCache->GetHashFunc().hash(
-                internalBlob->buffer(), internalBlob->byteSize());
+        const uint64_t data_hash =
+            weightCache->GetHashFunc().hash(static_cast<const unsigned char*>(internalBlob->getData()),
+                                            internalBlob->getSize());

        const std::string string_hash = name + "_" + std::to_string(indx)
                                        + "_" + format
-                                        + "_" + std::to_string(internalBlob->byteSize())
+                                        + "_" + std::to_string(internalBlob->getSize())
                                        + "_" + std::to_string(data_hash);

        ptr = *weightCache->findOrCreate(string_hash, create);
@ -1254,24 +1252,22 @@ bool Node::isFusedWith(Type fusedNodeType) const {
    return false;
 }

-InferenceEngine::Layout Node::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
+dnnl::memory::format_tag Node::getWeightsFormatTagByDims(const SizeVector& dims) const {
    switch (dims.size()) {
-        case 0:
-            return InferenceEngine::Layout::SCALAR;
-        case 1:
-            return InferenceEngine::Layout::C;
-        case 2:
-            return InferenceEngine::Layout::NC;
-        case 3:
-            return InferenceEngine::Layout::CHW;
-        case 4:
-            return InferenceEngine::Layout::OIHW;
-        case 5:
-            return isGrouped ? InferenceEngine::Layout::GOIHW : InferenceEngine::Layout::OIDHW;
-        case 6:
-            return isGrouped ? InferenceEngine::Layout::GOIDHW : InferenceEngine::Layout::BLOCKED;
-        default:
-            return InferenceEngine::Layout::BLOCKED;
+    case 1:
+        return dnnl::memory::format_tag::a;
+    case 2:
+        return dnnl::memory::format_tag::ab;
+    case 3:
+        return dnnl::memory::format_tag::abc;
+    case 4:
+        return dnnl::memory::format_tag::abcd;
+    case 5:
+        return dnnl::memory::format_tag::abcde;
+    case 6:
+        return dnnl::memory::format_tag::abcdef;
+    default:
+        OPENVINO_THROW("getWeightsFormatTagByDims doesn't support dims.size() = ", dims.size());
    }
 }

--- a/src/plugins/intel_cpu/src/node.h
+++ b/src/plugins/intel_cpu/src/node.h
@ -526,7 +526,7 @@ public:
        return outputShapes[port];
    }

-    const std::vector<InferenceEngine::Blob::Ptr>& getInternalBlobs() const {
+    const std::vector<MemoryPtr>& getInternalBlobs() const {
        return internalBlobs;
    }

@ -606,7 +606,7 @@ protected:
    };
    mutable InPlaceType inplace = InPlaceType::Unknown;
    ConstantType constant = ConstantType::Unknown;
-    std::vector<InferenceEngine::Blob::Ptr> internalBlobs;
+    std::vector<MemoryPtr> internalBlobs;
    std::vector<MemoryPtr> internalBlobMemory;
    std::vector<NodeDesc> supportedPrimitiveDescriptors;
    std::unordered_map<int, dnnl::memory> primArgs;
@ -633,7 +633,7 @@ protected:

    virtual std::vector<dnnl::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const;

-    InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped);
+    dnnl::memory::format_tag getWeightsFormatTagByDims(const InferenceEngine::SizeVector& dims) const;

    /**
     * @brief Auxiliary function to get node input precisions
--- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp
+++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp
@ -105,10 +105,6 @@ void AdaptivePooling::initSupportedPrimitiveDescriptors() {
    // we supports only fp32 currently
    precision = ov::element::f32;

-    InferenceEngine::LayerConfig config;
-    config.inConfs.resize(2);
-    config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2));
-
    std::vector<LayoutType> dataFormats{ LayoutType::ncsp };
    const auto &inDims = getInputShapeAtPort(0).getDims();
    if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] != 1) {
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@ -1193,35 +1193,6 @@ bool Convolution::isNspcAvailable() const {
    return true;
 }

-InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) {
-    const auto constNode = std::dynamic_pointer_cast<Input>(getParentEdgeAt(edgeNum)->getParent());
-    if (!constNode) {
-        OPENVINO_THROW("Cannot cast ", edgeNum, " input to Input node for ", getName(), ".");
-    }
-    auto blb = constNode->getMemoryPtr();
-    if (blb == nullptr)
-        OPENVINO_THROW("Cannot get const blob for node ", getName(), ".");
-
-    auto const elementsCount = blb->getDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
-
-    InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(ov::element::f32), dims, getWeightsLayoutByDims(dims, isGrouped));
-
-    Blob::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
-    internalBlob->allocate();
-
-    if (internalBlob->size() != elementsCount) {
-        OPENVINO_THROW("Created internal blob and const blob has different size for node: ", getName(), ".");
-    }
-
-    cpu_convert(blb->getData(),
-                internalBlob->buffer(),
-                DnnlExtensionUtils::DataTypeToElementType(blb->getDataType()),
-                InferenceEngine::details::convertPrecision(internalBlob->getTensorDesc().getPrecision()),
-                elementsCount);
-
-    return internalBlob;
-}
-
 void Convolution::prepareParams() {
    auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr();
    auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr();
--- a/src/plugins/intel_cpu/src/nodes/conv.h
+++ b/src/plugins/intel_cpu/src/nodes/conv.h
@ -116,7 +116,6 @@ private:
    void SetPostOpsAndZeroPoints(std::vector<dnnl::primitive_attr> &attrs);
    void filterSupportedDescriptors();
    bool isNspcAvailable() const;
-    InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false);

    void updatePadding();
    MemoryDescPtr getSumMemDesc(const dnnl::primitive_desc &primitive_desc_it);
--- a/src/plugins/intel_cpu/src/nodes/deconv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp
@ -233,7 +233,7 @@ Deconvolution::Deconvolution(const std::shared_ptr<ov::Node>& op,
    attr = std::make_shared<dnnl::primitive_attr>();
 }

-InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::SizeVector dims) {
+MemoryPtr Deconvolution::createWeiBlobAsIO(const VectorDims& dims) {
    auto constNode = std::dynamic_pointer_cast<Input>(getParentEdgeAt(1)->getParent());
    if (!constNode)
        OPENVINO_THROW("Cannot cast const input node for node ", getName(), ".");
@ -244,7 +244,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz
    auto const blbSize = blb->getSize();

    // WA: In int8 case, we are processing weights using internal blob.
-    InferenceEngine::SizeVector dimsForBlockedDesc{dims};
+    VectorDims dimsForBlockedDesc{dims};
    std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]);

    VectorDims orderForBlockedDesc;
@ -256,18 +256,15 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz
    for (size_t i = 2 + withGroups; i < dimsForBlockedDesc.size(); i++)
        orderForBlockedDesc.push_back(i);

-    BlockingDesc blkDesc(dimsForBlockedDesc, orderForBlockedDesc);
-    InferenceEngine::TensorDesc tensorDesc(
-        InferenceEngine::details::convertPrecision(DnnlExtensionUtils::DataTypeToElementType(blb->getDataType())),
-        dims,
-        blkDesc);
-
-    Blob::Ptr internalBlob = InferenceEngine::make_shared_blob<int8_t>(tensorDesc);
-    internalBlob->allocate();
-    char *data = internalBlob->buffer();
-    if (data == nullptr)
-        OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), ".");
-    size_t intBuffSize = internalBlob->byteSize();
+    auto desc = CpuBlockedMemoryDesc(DnnlExtensionUtils::DataTypeToElementType(blb->getDataType()),
+                                     Shape(dims),
+                                     dimsForBlockedDesc,
+                                     orderForBlockedDesc);
+    MemoryPtr mem_ptr = std::make_shared<Memory>(getEngine(), desc);
+    if (!mem_ptr->isAllocated())
+        OPENVINO_THROW("NotAllocated: Internal tensor was not allocated for node ", getName(), ".");
+    char* data = static_cast<char*>(mem_ptr->getData());
+    size_t intBuffSize = mem_ptr->getSize();

    size_t offset = blbSize;
    if (intBuffSize < offset) {
@ -275,7 +272,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz
    }
    cpu_memcpy_s(data, intBuffSize, blb->getData(), blbSize);

-    return internalBlob;
+    return mem_ptr;
 }

 bool Deconvolution::canBeExecutedInInt8() const {
@ -846,8 +843,7 @@ void Deconvolution::createPrimitive() {
        if (found) {
            prepareMemory({DnnlExtensionUtils::makeDescriptor(prim_desc.weights_desc(0))});
        } else {
-            prepareMemory({std::make_shared<DnnlBlockedMemoryDesc>(
-                        MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlobs.front()->getTensorDesc()))});
+            prepareMemory({internalBlobs.front()->getDescWithType<DnnlMemoryDesc>()});
        }
    }

--- a/src/plugins/intel_cpu/src/nodes/deconv.h
+++ b/src/plugins/intel_cpu/src/nodes/deconv.h
@ -118,7 +118,7 @@ private:

    std::string errorPrefix;

-    InferenceEngine::Blob::Ptr createWeiBlobAsIO(InferenceEngine::SizeVector dims);
+    MemoryPtr createWeiBlobAsIO(const VectorDims& dims);
 };

 }   // namespace node
--- a/src/plugins/intel_cpu/src/nodes/generic.h
+++ b/src/plugins/intel_cpu/src/nodes/generic.h
@ -18,7 +18,7 @@ namespace node {

 class Generic : public Node {
 public:
-    Generic(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
+    Generic(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr context);
    ~Generic() = default;

    void getSupportedDescriptors() override;
--- a/src/plugins/intel_cpu/src/nodes/rnn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp
@ -746,24 +746,23 @@ void RNN::fillSequenceDesc() {

 template <typename Prec>
 void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) {
-    const auto& weightPrec       = DnnlExtensionUtils::DataTypeToElementType(inDataTypes[wIdx]);
-    const auto& targetWeightPrec = DnnlExtensionUtils::DataTypeToElementType(weightsByinputDataType.at(inDataTypes[xIdx]));
+    const auto& weightPrec = DnnlExtensionUtils::DataTypeToElementType(inDataTypes[wIdx]);
+    const auto& targetWeightDataType = weightsByinputDataType.at(inDataTypes[xIdx]);
+    const auto& targetWeightPrec = DnnlExtensionUtils::DataTypeToElementType(targetWeightDataType);

    // create weight blobs (data and state part)
-    const VectorDims dims_w = { L, D, DC, G, SC };
-    TensorDesc w_data_desc(InferenceEngine::details::convertPrecision(targetWeightPrec), dims_w, getWeightsLayoutByDims(dims_w, false));
+    const VectorDims dims_w = {L, D, DC, G, SC};
+    auto w_data_desc = DnnlBlockedMemoryDesc(Shape(dims_w), targetWeightDataType, getWeightsFormatTagByDims(dims_w));
+    MemoryPtr w_data_mem = std::make_shared<Memory>(getEngine(), w_data_desc);
+    auto w_ptr = static_cast<Prec*>(w_data_mem->getData());

-    Blob::Ptr w_data_mem = make_shared_blob<Prec>(w_data_desc);
-    w_data_mem->allocate();
-    auto w_ptr = static_cast<Prec*>(w_data_mem->buffer());
    if (w_ptr == nullptr)
        OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), ".");

-    const VectorDims dims_s = { L, D, SC, G, SC };
-    TensorDesc w_state_desc(InferenceEngine::details::convertPrecision(targetWeightPrec), dims_s, getWeightsLayoutByDims(dims_s, false));
-    Blob::Ptr w_state_mem = make_shared_blob<Prec>(w_state_desc);
-    w_state_mem->allocate();
-    auto r_ptr = static_cast<Prec*>(w_state_mem->buffer());
+    const VectorDims dims_s = {L, D, SC, G, SC};
+    auto w_state_desc = DnnlBlockedMemoryDesc(Shape(dims_s), targetWeightDataType, getWeightsFormatTagByDims(dims_s));
+    MemoryPtr w_state_mem = std::make_shared<Memory>(getEngine(), w_state_desc);
+    auto r_ptr = static_cast<Prec*>(w_state_mem->getData());
    if (r_ptr == nullptr)
        OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), ".");

@ -803,7 +802,6 @@ void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx)
            }
        }
    }
-
    internalBlobs.push_back(w_data_mem);
    internalBlobs.push_back(w_state_mem);
 }
@ -817,10 +815,11 @@ void RNN::fillBiases(const int *gate_map) {
    }

    VectorDims dims_b = { L, D, Gb, SC };
-    TensorDesc w_bias_data_desc(InferenceEngine::details::convertPrecision(Prec), dims_b, getWeightsLayoutByDims(dims_b, false));
-    Blob::Ptr w_bias_data_mem = make_shared_blob<dataType>(w_bias_data_desc);
-    w_bias_data_mem->allocate();
-    auto b_ptr = static_cast<dataType*>(w_bias_data_mem->buffer());
+
+    auto _data_type = DnnlExtensionUtils::ElementTypeToDataType(Prec);
+    auto w_bias_data_desc = DnnlBlockedMemoryDesc(Shape(dims_b), _data_type, getWeightsFormatTagByDims(dims_b));
+    MemoryPtr w_bias_data_mem = std::make_shared<Memory>(getEngine(), w_bias_data_desc);
+    auto b_ptr = static_cast<dataType*>(w_bias_data_mem->getData());
    if (b_ptr == nullptr)
        OPENVINO_THROW("NotAllocated: Internal blob was not allocated for node ", getName(), ".");

--- a/src/plugins/intel_cpu/src/normalize_preprocess.cpp
+++ b/src/plugins/intel_cpu/src/normalize_preprocess.cpp
@ -1,130 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "normalize_preprocess.h"
-#include "ie_parallel.hpp"
-#include "nodes/common/cpu_memcpy.h"
-#include "utils/general_utils.h"
-#include "ie_ngraph_utils.hpp"
-
-using namespace InferenceEngine;
-
-namespace ov {
-namespace intel_cpu {
-
-NormalizePreprocess::NormalizePreprocess() : meanBuffer(nullptr) {
-}
-
-void NormalizePreprocess::Load(const Shape& inputShape, InputInfo::Ptr inputInfo) {
-    PreProcessInfo &pp = inputInfo->getPreProcess();
-    size_t inChannels = pp.getNumberOfChannels();
-    if (inChannels == 0) {
-        meanBuffer = nullptr;
-        return;
-    }
-
-    if (!dimsEqualStrong(inChannels, inputShape.getDims()[1])) {
-        OPENVINO_THROW("channels mismatch between mean and input");
-    }
-
-    switch (pp.getMeanVariant()) {
-        case MEAN_VALUE: {
-            // mean and standard deviation image common value per channel (1x1xC)
-            meanValues.resize(inChannels);
-            stdScales.resize(inChannels);
-
-            for (unsigned channel = 0; channel < inChannels; channel++) {
-                if (pp[channel]->stdScale == 0) {
-                    OPENVINO_THROW("Preprocessing error: stdScale cannot be equal zero");
-                }
-                meanValues[channel] = pp[channel]->meanValue;
-                stdScales[channel] = pp[channel]->stdScale;
-            }
-        }
-        break;
-        case MEAN_IMAGE: {
-            // since oneDNN expects all channels in the same buffer - we copy it here as it comes from different channels...
-            auto meanWidth = pp[0]->meanData->getTensorDesc().getDims()[pp[0]->meanData->getTensorDesc().getDims().size() - 1];
-            auto meanHeight = pp[0]->meanData->getTensorDesc().getDims()[pp[0]->meanData->getTensorDesc().getDims().size() - 2];
-
-            TensorDesc desc(InferenceEngine::details::convertPrecision(ov::element::f32), {inChannels, meanHeight, meanWidth}, InferenceEngine::Layout::CHW);
-
-            meanBuffer = make_shared_blob<float>(desc);
-
-            meanBuffer->allocate();
-
-            for (unsigned channel = 0; channel < inChannels; channel++) {
-                Blob::Ptr meanBlob = pp[channel]->meanData;
-                if (!meanBlob || InferenceEngine::details::convertPrecision(meanBlob->getTensorDesc().getPrecision()) != ov::element::f32)
-                    OPENVINO_THROW("mean image not provided or not in Float 32");
-                if (meanBlob->size() != meanHeight*meanWidth) {
-                    OPENVINO_THROW("mean image size does not match expected network input, expecting ",
-                                   meanWidth,
-                                   " x ",
-                                   meanHeight);
-                }
-                // todo: cast to TBlob and make sure it is floats
-                cpu_memcpy_s(meanBuffer->data() + channel*meanBlob->size(), meanBuffer->byteSize() - channel*meanBlob->byteSize(),
-                          meanBlob->buffer(), meanBlob->byteSize());
-            }
-        }
-            break;
-
-        case NONE: {
-            // there is no mean image. So disable mean image step
-            meanBuffer = nullptr;
-        }
-            break;
-
-        default: {
-            OPENVINO_THROW("Unsupported mean variant: ", pp.getMeanVariant());
-        }
-    }
-}
-
-void NormalizePreprocess::NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout) {
-    OPENVINO_ASSERT(input != nullptr);
-
-    const auto inputDims = inputShape.getStaticDims();
-    if (inputDims.size() != 4) {
-        OPENVINO_THROW("Expecting input as 4 dimension blob with format NxCxHxW.");
-    }
-
-    if (layout != NCHW && layout != NHWC) {
-        OPENVINO_THROW("Expecting input layout NCHW or NHWC.");
-    }
-
-    int MB = inputDims[0];
-    int srcSize = inputShape.getElementsCount() / MB;
-
-    if (meanBuffer && meanBuffer->size()) {
-        const float * meanBufferValues = meanBuffer->readOnly();
-
-        parallel_for2d(MB, srcSize, [&](int mb, int i) {
-            input[srcSize * mb + i] -= meanBufferValues[i];
-        });
-    } else if (!meanValues.empty() && !stdScales.empty()) {
-        int C = inputDims[1];
-        srcSize /= inputDims[1];
-
-        if (layout == NCHW) {
-            parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) {
-                input[mb * C * srcSize + c * srcSize + i] -= meanValues[c];
-                input[mb * C * srcSize + c * srcSize + i] /= stdScales[c];
-            });
-        } else if (layout == NHWC) {
-            parallel_for2d(MB, srcSize, [&](int mb, int i) {
-                for (int c = 0; c < C; c++) {
-                    input[mb * srcSize * C + i * C + c] -= meanValues[c];
-                    input[mb * srcSize * C + i * C + c] /= stdScales[c];
-                }
-            });
-        }
-    } else {
-        OPENVINO_THROW("Preprocessing error: meanValues and stdScales arrays are inconsistent.");
-    }
-}
-
-}   // namespace intel_cpu
-}   // namespace ov
--- a/src/plugins/intel_cpu/src/normalize_preprocess.h
+++ b/src/plugins/intel_cpu/src/normalize_preprocess.h
@ -1,93 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "ie_input_info.hpp"
-
-#include "cpu_shape.h"
-#include "ie_parallel.hpp"
-#include <vector>
-#include <limits>
-
-namespace ov {
-namespace intel_cpu {
-
-class NormalizePreprocess {
-public:
-    NormalizePreprocess();
-
-public:
-    void Load(const Shape& inputShape, InferenceEngine::InputInfo::Ptr inputInfo);
-    void NormalizeImage(const Shape &inputShape, float *input, InferenceEngine::Layout layout);
-
-    template<typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
-    void NormalizeImage(const Shape &inputShape, T *input, InferenceEngine::Layout layout) {
-        OPENVINO_ASSERT(input != nullptr);
-
-        const auto inputDims = inputShape.getStaticDims();
-        if (inputDims.size() != 4) {
-            OPENVINO_THROW("Expecting input as 4 dimension blob with format NxCxHxW.");
-        }
-
-        if (layout != InferenceEngine::NCHW && layout != InferenceEngine::NHWC) {
-            OPENVINO_THROW("Expecting input layout NCHW or NHWC.");
-        }
-
-        int MB = inputDims[0];
-        int srcSize = inputShape.getElementsCount() / MB;
-
-        if (meanBuffer && meanBuffer->size()) {
-            const float * meanBufferValues = meanBuffer->readOnly();
-
-            InferenceEngine::parallel_for2d(MB, srcSize, [&](int mb, int i) {
-                int buf = input[srcSize * mb + i];
-                buf -= meanBufferValues[i];
-                if (buf < (std::numeric_limits<T>::min)()) buf = (std::numeric_limits<T>::min)();
-                if (buf > (std::numeric_limits<T>::max)()) buf = (std::numeric_limits<T>::max)();
-                input[srcSize * mb + i] = buf;
-            });
-        } else if (!meanValues.empty() && !stdScales.empty()) {
-            int C = inputDims[1];
-            srcSize /= inputDims[1];
-
-            for (int c = 0; c < C; c++) {
-                if (stdScales[c] != 1)
-                    OPENVINO_THROW("Preprocessing error: fractional normalization is not supported for integer data. ");
-            }
-
-            if (layout == InferenceEngine::NCHW) {
-                InferenceEngine::parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) {
-                    int buf = input[srcSize * mb * C + c * srcSize + i];
-                    buf -= meanValues[c];
-                    if (buf < (std::numeric_limits<T>::min)()) buf = (std::numeric_limits<T>::min)();
-                    if (buf > (std::numeric_limits<T>::max)()) buf = (std::numeric_limits<T>::max)();
-                    input[srcSize * mb * C + c * srcSize + i] = buf;
-                });
-            } else if (layout == InferenceEngine::NHWC) {
-                InferenceEngine::parallel_for2d(MB, srcSize, [&](int mb, int i) {
-                    for (int c = 0; c < C; c++) {
-                        int buf = input[mb * srcSize * C + i * C + c];
-                        buf -= meanValues[c];
-                        if (buf < (std::numeric_limits<T>::min)()) buf = (std::numeric_limits<T>::min)();
-                        if (buf > (std::numeric_limits<T>::max)()) buf = (std::numeric_limits<T>::max)();
-                        input[mb * srcSize * C + i * C + c] = buf;
-                    }
-                });
-            }
-        } else {
-            OPENVINO_THROW("Preprocessing error: meanValues and stdScales arrays are inconsistent.");
-        }
-    }
-
-private:
-    std::vector<float> meanValues;
-
-    std::vector<float> stdScales;
-
-    InferenceEngine::TBlob<float>::Ptr meanBuffer;
-};
-
-}   // namespace intel_cpu
-}   // namespace ov
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@ -136,7 +136,7 @@ std::mutex Engine::SchedulerGuard::mutex;
 std::weak_ptr<Engine::SchedulerGuard> Engine::SchedulerGuard::ptr;

 Engine::SchedulerGuard::SchedulerGuard() {
-#if IE_THREAD == IE_THREAD_SEQ
+#if OV_THREAD == OV_THREAD_SEQ
    // To save state for ACL cores in single-thread mode
    arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST);
 #else
--- a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp
+++ b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp
@ -86,11 +86,6 @@ inline bool isPerTensorOrPerChannelBroadcastable(const VectorDims &firstInputDim
    return true;
 }

-inline bool isEmptyTensorDesc(const InferenceEngine::TensorDesc &td) {
-    const auto dims = td.getDims();
-    return std::any_of(dims.begin(), dims.end(), [](size_t dim) { return dim == 0; } );
-}
-
 /**
 * @brief Return precision to which given precision must be converted to be supported in plug-in
 * @param precision
--- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp
+++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp
@ -107,12 +107,10 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config
        std::string file_name = NameFromType(node->getType()) + "_" + nodeName + "_blb" + std::to_string(i) + ".ieb";
        auto dump_file = config.blobDumpDir + "/#" + std::to_string(node->getExecIndex()) + "_" + file_name;

-        TensorDesc desc = blb->getTensorDesc();
-        if (InferenceEngine::details::convertPrecision(desc.getPrecision()) == ov::element::u1)
+        if (blb->getDesc().getPrecision() == ov::element::u1)
            continue;

-        MemoryPtr memory = std::make_shared<Memory>(node->getEngine(), MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer());
-        BlobDumper dumper(memory);
+        BlobDumper dumper(blb);
        dump(dumper, dump_file, config);
    }
 }