[CPU] Cache for runtime data (#9192)

Caching added for Eltwise and MatMul nodes
2021-12-29 09:19:45 +03:00 · 2021-12-29 09:19:45 +03:00 · 2870dc7d3f
commit 2870dc7d3f
parent cb9fe0910d
37 changed files with 2776 additions and 1422 deletions
--- a/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
+++ b/src/inference/dev_api/cpp_interfaces/interface/ie_internal_plugin_config.hpp
@ -45,6 +45,13 @@ DECLARE_CONFIG_KEY(LP_TRANSFORMS_MODE);
 */
 DECLARE_CONFIG_KEY(CPU_THREADS_PER_STREAM);

+/**
+ * @brief Defines how many records can be stored in the CPU runtime parameters cache per CPU runtime parameter type per
+ * stream
+ * @ingroup ie_dev_api_plugin_api
+ */
+DECLARE_CONFIG_KEY(CPU_RUNTIME_CACHE_CAPACITY);
+
 /**
 * @brief This key should be used to force disable export while loading network even if global cache dir is defined
 *        Used by HETERO plugin to disable automatic caching of subnetworks (set value to YES)
--- a/src/plugins/intel_cpu/src/cache/cache_entry.h
+++ b/src/plugins/intel_cpu/src/cache/cache_entry.h
@ -0,0 +1,69 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <functional>
+#include "lru_cache.h"
+
+namespace MKLDNNPlugin {
+
+class CacheEntryBase {
+public:
+    enum class LookUpStatus : int8_t {
+        Hit,
+        Miss
+    };
+public:
+    virtual ~CacheEntryBase() = default;
+};
+
+/**
+ * @brief Class represents a templated record in multi cache
+ * @tparam KeyType is a key type that must define hash() const method with return type convertible to size_t and define comparison operator.
+ * @tparam ValType is a type that must meet all the requirements to the std::unordered_map mapped type
+ * @tparam ImplType is a type for the internal storage. It must provide put(KeyType, ValueType) and ValueType get(const KeyType&)
+ *         interface and must have constructor of type ImplType(size_t).
+ *
+ * @note In this implementation default constructed value objects are treated as empty objects.
+ */
+
+template<typename KeyType,
+         typename ValType,
+         typename ImplType = LruCache<KeyType, ValType>>
+class CacheEntry : public CacheEntryBase {
+public:
+    using ResultType = std::pair<ValType, LookUpStatus>;
+
+public:
+    explicit CacheEntry(size_t capacity) : _impl(capacity) {}
+
+    /**
+     * @brief Searches the key in the underlying storage and returns value if it exists, or creates a value using the builder functor and adds it to
+     *        the underlying storage.
+     * @param key is the search key
+     * @param builder is a callable object that creates the ValType object from the KeyType lval reference
+     * @return result of the operation which is a pair of the requested object of ValType and the status of whether the cache hit or miss occurred
+     */
+
+    ResultType getOrCreate(const KeyType& key, std::function<ValType(const KeyType&)> builder) {
+        if (0 == _impl.getCapacity()) {
+            // fast track
+            return {builder(key), CacheEntryBase::LookUpStatus::Miss};
+        }
+        auto retStatus = LookUpStatus::Hit;
+        ValType retVal = _impl.get(key);
+        if (retVal == ValType()) {
+            retStatus = LookUpStatus::Miss;
+            retVal = builder(key);
+            _impl.put(key, retVal);
+        }
+        return {retVal, retStatus};
+    }
+
+public:
+    ImplType _impl;
+};
+}// namespace MKLDNNPlugin
--- a/src/plugins/intel_cpu/src/cache/lru_cache.h
+++ b/src/plugins/intel_cpu/src/cache/lru_cache.h
@ -0,0 +1,106 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <list>
+#include <unordered_map>
+
+/**
+ * @brief This is yet another implementation of a preemptive cache with LRU eviction policy.
+ * @tparam Key is a key type that must define hash() const method with return type convertible to size_t and define comparison operator.
+ * @tparam Value is a type that must meet all the requirements to the std::unordered_map mapped type
+ *
+ * @attention This cache implementation IS NOT THREAD SAFE!
+ */
+
+namespace MKLDNNPlugin {
+
+template<typename Key, typename Value>
+class LruCache {
+public:
+    using value_type = std::pair<Key, Value>;
+
+public:
+    explicit LruCache(size_t capacity) : _capacity(capacity) {}
+
+    /**
+     * @brief Puts the value associated with the key into the cache.
+     * @param key
+     * @param value
+     */
+
+    void put(Key key, Value val) {
+        if (0 == _capacity) {
+            return;
+        }
+        auto mapItr = _cacheMapper.find(key);
+        if (mapItr != _cacheMapper.end()) {
+            touch(mapItr->second);
+            mapItr->second->second = std::move(val);
+        } else {
+            if (_cacheMapper.size() == _capacity) {
+                evict(1);
+            }
+            auto itr = _lruList.insert(_lruList.begin(), {key, std::move(val)});
+            _cacheMapper.insert({std::move(key), itr});
+        }
+    }
+
+    /**
+     * @brief Searches a value associated with the key.
+     * @param key
+     * @return Value associated with the key or default constructed instance of the Value type.
+     */
+
+    Value get(const Key &key) {
+        auto itr = _cacheMapper.find(key);
+        if (itr == _cacheMapper.end()) {
+            return Value();
+        }
+
+        touch(itr->second);
+        return _lruList.front().second;
+    }
+
+    /**
+     * @brief Evicts n least recently used cache records
+     * @param n number of records to be evicted, can be greater than capacity
+     */
+
+    void evict(size_t n) {
+        for (size_t i = 0; i < n && !_lruList.empty(); ++i) {
+            _cacheMapper.erase(_lruList.back().first);
+            _lruList.pop_back();
+        }
+    }
+
+    /**
+     * @brief Returns the current capacity value
+     * @return the current capacity value
+     */
+     size_t getCapacity() const noexcept {
+         return _capacity;
+     }
+
+private:
+    struct key_hasher {
+        std::size_t operator()(const Key &k) const {
+            return k.hash();
+        }
+    };
+
+    using lru_list_type = std::list<value_type>;
+    using cache_map_value_type = typename lru_list_type::iterator;
+
+    void touch(typename lru_list_type::iterator itr) {
+        _lruList.splice(_lruList.begin(), _lruList, itr);
+    }
+
+    lru_list_type _lruList;
+    std::unordered_map<Key, cache_map_value_type, key_hasher> _cacheMapper;
+    size_t _capacity;
+};
+
+} // namespace MKLDNNPlugin
--- a/src/plugins/intel_cpu/src/cache/multi_cache.cpp
+++ b/src/plugins/intel_cpu/src/cache/multi_cache.cpp
@ -0,0 +1,9 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "multi_cache.h"
+
+using namespace MKLDNNPlugin;
+
+std::atomic_size_t MultiCache::_typeIdCounter{0};
--- a/src/plugins/intel_cpu/src/cache/multi_cache.h
+++ b/src/plugins/intel_cpu/src/cache/multi_cache.h
@ -0,0 +1,84 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <functional>
+#include <unordered_map>
+#include <atomic>
+#include "cache_entry.h"
+
+namespace MKLDNNPlugin {
+
+/**
+ * @brief Class that represent a preemptive cache for different key/value pair types.
+ *
+ * @attention This implementation IS NOT THREAD SAFE!
+ */
+
+class MultiCache {
+public:
+    template<typename KeyType, typename ValueType>
+    using EntryTypeT = CacheEntry<KeyType, ValueType>;
+    using EntryBasePtr = std::shared_ptr<CacheEntryBase>;
+    template<typename KeyType, typename ValueType>
+    using EntryPtr = std::shared_ptr<EntryTypeT<KeyType, ValueType>>;
+
+public:
+    /**
+    * @param capacity here means maximum records limit FOR EACH entry specified by a pair of Key/Value types.
+    * @note zero capacity means empty cache so no records are stored and no entries are created
+    */
+    explicit MultiCache(size_t capacity) : _capacity(capacity) {}
+
+    /**
+    * @brief Searches a value of ValueType in the cache using the provided key or creates a new ValueType instance (if nothing was found)
+    *       using the key and the builder functor and adds the new record to the cache
+    * @param key is the search key
+    * @param builder is a callable object that creates the ValType object from the KeyType lval reference.
+    *       Also the builder type is used for the ValueType deduction
+    * @return result of the operation which is a pair of the requested object of ValType and the status of whether the cache hit or miss occurred
+    */
+
+    template<typename KeyType, typename BuilderType, typename ValueType = typename std::result_of<BuilderType&(const KeyType&)>::type>
+    typename CacheEntry<KeyType, ValueType>::ResultType
+    getOrCreate(const KeyType& key, BuilderType builder) {
+        auto entry = getEntry<KeyType, ValueType>();
+        return entry->getOrCreate(key, std::move(builder));
+    }
+
+private:
+    template<typename T>
+    size_t getTypeId();
+    template<typename KeyType, typename ValueType>
+    EntryPtr<KeyType, ValueType> getEntry();
+
+private:
+    static std::atomic_size_t _typeIdCounter;
+    size_t _capacity;
+    std::unordered_map<size_t, EntryBasePtr> _storage;
+};
+
+template<typename T>
+size_t MultiCache::getTypeId() {
+    static size_t id = _typeIdCounter.fetch_add(1);
+    return id;
+}
+
+template<typename KeyType, typename ValueType>
+MultiCache::EntryPtr<KeyType, ValueType> MultiCache::getEntry() {
+    using EntryType = EntryTypeT<KeyType, ValueType>;
+    size_t id = getTypeId<EntryType>();
+    auto itr = _storage.find(id);
+    if (itr == _storage.end()) {
+        auto result = _storage.insert({id, std::make_shared<EntryType>(_capacity)});
+        itr = result.first;
+    }
+    return std::static_pointer_cast<EntryType>(itr->second);
+}
+
+using MultiCachePtr = std::shared_ptr<MultiCache>;
+using MultiCacheCPtr = std::shared_ptr<const MultiCache>;
+
+} // namespace MKLDNNPlugin
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@ -117,6 +117,17 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
            }
        } else if (key == PluginConfigParams::KEY_CACHE_DIR) {
            cache_dir = val;
+        } else if (PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) {
+            int val_i = -1;
+            try {
+                val_i = std::stoi(val);
+            } catch (const std::exception&) {
+                IE_THROW() << "Wrong value for property key " << PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY
+                           << ". Expected only integer numbers";
+            }
+            // any negative value will be treated
+            // as zero that means disabling the cache
+            rtCacheCapacity = std::max(val_i, 0);
        } else {
            IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
        }
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@ -26,6 +26,7 @@ struct Config {
    bool enableDynamicBatch = false;
    std::string dumpToDot = "";
    int batchLimit = 0;
+    size_t rtCacheCapacity = 100ul;
    InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
    InferenceEngine::PerfHintsConfig  perfHintsConfig;
 #if defined(__arm__) || defined(__aarch64__)
--- a/src/plugins/intel_cpu/src/emitters/jit_bf16_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_bf16_emitters.hpp
@ -10,8 +10,8 @@ namespace MKLDNNPlugin {

 class jit_emu_vcvtneps2bf16 : public jit_emitter {
 public:
-    jit_emu_vcvtneps2bf16(mkldnn::impl::cpu::x64::jit_generator* host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
-        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::BF16) : jit_emitter(host, host_isa, node, exec_prc) {
+    jit_emu_vcvtneps2bf16(mkldnn::impl::cpu::x64::jit_generator* host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
+        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::BF16) : jit_emitter(host, host_isa, exec_prc) {
        prepare_table();
    }

--- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp
@ -18,8 +18,8 @@ namespace MKLDNNPlugin {
 /// ADD ///
 jit_add_emitter::jit_add_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_add_emitter::jit_add_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_add_emitter::jit_add_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_add_emitter::get_inputs_num() const { return 2; }

@ -55,8 +55,8 @@ void jit_add_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std
 /// MUL_ADD ///
 jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_mul_add_emitter::jit_mul_add_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_mul_add_emitter::get_inputs_num() const { return 3; }

@ -117,8 +117,8 @@ size_t jit_mul_add_emitter::aux_vecs_count() const {
 /// SUB ///
 jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_subtract_emitter::jit_subtract_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_subtract_emitter::get_inputs_num() const { return 2; }

@ -155,8 +155,8 @@ void jit_subtract_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
 /// MULTIPLY ///
 jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_multiply_emitter::jit_multiply_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_multiply_emitter::get_inputs_num() const { return 2; }

@ -193,8 +193,8 @@ void jit_multiply_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
 /// DIVIDE ///
 jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_divide_emitter::jit_divide_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_divide_emitter::get_inputs_num() const { return 2; }

@ -260,8 +260,8 @@ size_t jit_divide_emitter::aux_vecs_count() const {
 /// FLOOR_MOD ///
 jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_floor_mod_emitter::jit_floor_mod_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_floor_mod_emitter::get_inputs_num() const { return 2; }

@ -312,8 +312,8 @@ size_t jit_floor_mod_emitter::aux_vecs_count() const {
 /// MOD ///
 jit_mod_emitter::jit_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_mod_emitter::jit_mod_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_mod_emitter::jit_mod_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_mod_emitter::get_inputs_num() const { return 2; }

@ -364,8 +364,8 @@ size_t jit_mod_emitter::aux_vecs_count() const {
 /// MAXIMUM ///
 jit_maximum_emitter::jit_maximum_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_maximum_emitter::jit_maximum_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_maximum_emitter::jit_maximum_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_maximum_emitter::get_inputs_num() const { return 2; }

@ -414,8 +414,8 @@ std::set<InferenceEngine::Precision> jit_maximum_emitter::get_supported_precisio
 /// MINIMUM ///
 jit_minimum_emitter::jit_minimum_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_minimum_emitter::jit_minimum_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_minimum_emitter::jit_minimum_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_minimum_emitter::get_inputs_num() const { return 2; }

@ -465,8 +465,8 @@ std::set<InferenceEngine::Precision> jit_minimum_emitter::get_supported_precisio
 jit_squared_difference_emitter::jit_squared_difference_emitter(
    jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_squared_difference_emitter::jit_squared_difference_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_squared_difference_emitter::jit_squared_difference_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_squared_difference_emitter::get_inputs_num() const { return 2; }

@ -506,8 +506,8 @@ void jit_squared_difference_emitter::emit_isa(const std::vector<size_t> &in_vec_
 /// POWER_DYNAMIC ///
 jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_power_dynamic_emitter::jit_power_dynamic_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_power_dynamic_emitter::get_inputs_num() const { return 2; }

@ -617,8 +617,8 @@ jit_equal_emitter::jit_equal_emitter(jit_generator *host, cpu_isa_t host_isa, co
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_equal_emitter::jit_equal_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_equal_emitter::jit_equal_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -678,8 +678,8 @@ jit_not_equal_emitter::jit_not_equal_emitter(jit_generator *host, cpu_isa_t host
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_not_equal_emitter::jit_not_equal_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_not_equal_emitter::jit_not_equal_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -739,8 +739,8 @@ jit_greater_emitter::jit_greater_emitter(jit_generator *host, cpu_isa_t host_isa
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_greater_emitter::jit_greater_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_greater_emitter::jit_greater_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -800,8 +800,8 @@ jit_greater_equal_emitter::jit_greater_equal_emitter(jit_generator *host, cpu_is
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_greater_equal_emitter::jit_greater_equal_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_greater_equal_emitter::jit_greater_equal_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -861,8 +861,8 @@ jit_less_emitter::jit_less_emitter(jit_generator *host, cpu_isa_t host_isa, cons
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_less_emitter::jit_less_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_less_emitter::jit_less_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -922,8 +922,8 @@ jit_less_equal_emitter::jit_less_equal_emitter(jit_generator *host, cpu_isa_t ho
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_less_equal_emitter::jit_less_equal_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_less_equal_emitter::jit_less_equal_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -984,8 +984,8 @@ jit_logical_and_emitter::jit_logical_and_emitter(jit_generator *host, cpu_isa_t
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_logical_and_emitter::jit_logical_and_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_and_emitter::jit_logical_and_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -1066,8 +1066,8 @@ jit_logical_or_emitter::jit_logical_or_emitter(jit_generator *host, cpu_isa_t ho
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_logical_or_emitter::jit_logical_or_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_or_emitter::jit_logical_or_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -1147,8 +1147,8 @@ jit_logical_xor_emitter::jit_logical_xor_emitter(jit_generator *host, cpu_isa_t
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_logical_xor_emitter::jit_logical_xor_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_xor_emitter::jit_logical_xor_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -1228,8 +1228,8 @@ jit_logical_not_emitter::jit_logical_not_emitter(jit_generator *host, cpu_isa_t
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_logical_not_emitter::jit_logical_not_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_logical_not_emitter::jit_logical_not_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

@ -1298,16 +1298,10 @@ jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_
    prepare_table();
 }

-jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
-    const MKLDNNEltwiseNode *powerNode = dynamic_cast<const MKLDNNEltwiseNode *>(node);
-    if (powerNode == nullptr) {
-        IE_THROW() << "Can't cast to MKLDNNEltwiseNode";
-    }
-    power = powerNode->getAlpha();
-    scale = powerNode->getBeta();
-    shift = powerNode->getGamma();
-
+jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_t host_isa,
+                                                   float inpPower, float inpScale, float inpShift,
+                                                   Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc), power(inpPower), scale(inpScale), shift(inpShift) {
    prepare_table();
 }

@ -1483,8 +1477,8 @@ jit_prelu_emitter::jit_prelu_emitter(jit_generator *host, cpu_isa_t host_isa, co
 : jit_emitter(host, host_isa, node, exec_prc) {
    prepare_table();
 }
-jit_prelu_emitter::jit_prelu_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_prelu_emitter::jit_prelu_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }
 size_t jit_prelu_emitter::get_inputs_num() const { return 2; }
@ -1541,8 +1535,8 @@ size_t jit_prelu_emitter::aux_vecs_count() const {
 /// SQRT ///
 jit_sqrt_emitter::jit_sqrt_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& node, Precision exec_prc)
 : jit_emitter(host, host_isa, node, exec_prc) {}
-jit_sqrt_emitter::jit_sqrt_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {}
+jit_sqrt_emitter::jit_sqrt_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {}

 size_t jit_sqrt_emitter::get_inputs_num() const { return 1; }

@ -1599,8 +1593,8 @@ void jit_negative_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, cons
 }

 /// ERF ///
-jit_erf_emitter::jit_erf_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc)
-: jit_emitter(host, host_isa, node, exec_prc) {
+jit_erf_emitter::jit_erf_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc)
+: jit_emitter(host, host_isa, exec_prc) {
    prepare_table();
 }

--- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp
@ -12,7 +12,7 @@ namespace MKLDNNPlugin {

 class jit_add_emitter : public jit_emitter {
 public:
-    jit_add_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_add_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_add_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -30,7 +30,7 @@ private:

 class jit_mul_add_emitter : public jit_emitter {
 public:
-    jit_mul_add_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_mul_add_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_mul_add_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -51,7 +51,7 @@ private:

 class jit_subtract_emitter : public jit_emitter {
 public:
-    jit_subtract_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_subtract_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_subtract_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -70,7 +70,7 @@ private:

 class jit_multiply_emitter : public jit_emitter {
 public:
-    jit_multiply_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_multiply_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_multiply_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                         InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -89,7 +89,7 @@ private:

 class jit_divide_emitter : public jit_emitter {
 public:
-    jit_divide_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_divide_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_divide_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -110,7 +110,7 @@ private:

 class jit_floor_mod_emitter : public jit_emitter {
 public:
-    jit_floor_mod_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_floor_mod_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_floor_mod_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -130,7 +130,7 @@ private:

 class jit_mod_emitter : public jit_emitter {
 public:
-    jit_mod_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_mod_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_mod_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -150,7 +150,7 @@ private:

 class jit_maximum_emitter : public jit_emitter {
 public:
-    jit_maximum_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_maximum_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_maximum_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -170,7 +170,7 @@ private:

 class jit_minimum_emitter : public jit_emitter {
 public:
-    jit_minimum_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_minimum_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_minimum_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -191,7 +191,6 @@ private:
 class jit_squared_difference_emitter : public jit_emitter {
 public:
    jit_squared_difference_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
-                                   const MKLDNNNode* node,
                                   InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_squared_difference_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                                   const std::shared_ptr<ngraph::Node>& n,
@ -211,7 +210,7 @@ private:

 class jit_power_dynamic_emitter : public jit_emitter {
 public:
-    jit_power_dynamic_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_power_dynamic_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_power_dynamic_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -230,7 +229,7 @@ private:

 class jit_equal_emitter : public jit_emitter {
 public:
-    jit_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -252,7 +251,7 @@ private:

 class jit_not_equal_emitter : public jit_emitter {
 public:
-    jit_not_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_not_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_not_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                          InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -274,7 +273,7 @@ private:

 class jit_greater_emitter : public jit_emitter {
 public:
-    jit_greater_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_greater_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_greater_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -296,7 +295,7 @@ private:

 class jit_greater_equal_emitter : public jit_emitter {
 public:
-    jit_greater_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_greater_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_greater_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                              InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -318,7 +317,7 @@ private:

 class jit_less_emitter : public jit_emitter {
 public:
-    jit_less_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_less_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_less_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                     InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -340,7 +339,7 @@ private:

 class jit_less_equal_emitter : public jit_emitter {
 public:
-    jit_less_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_less_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);

    jit_less_equal_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
@ -363,7 +362,7 @@ private:

 class jit_logical_and_emitter : public jit_emitter {
 public:
-    jit_logical_and_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_logical_and_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_logical_and_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -385,7 +384,7 @@ private:

 class jit_logical_or_emitter : public jit_emitter {
 public:
-    jit_logical_or_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_logical_or_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_logical_or_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -407,7 +406,7 @@ private:

 class jit_logical_xor_emitter : public jit_emitter {
 public:
-    jit_logical_xor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_logical_xor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_logical_xor_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -428,7 +427,7 @@ private:

 class jit_logical_not_emitter : public jit_emitter {
 public:
-    jit_logical_not_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_logical_not_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_logical_not_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -449,8 +448,9 @@ private:

 class jit_power_static_emitter : public jit_emitter {
 public:
-    jit_power_static_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
-                            InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
+    jit_power_static_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
+                             float inpPower, float inpScale, float inpShift,
+                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_power_static_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                             InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);

@ -474,7 +474,7 @@ private:

 class jit_prelu_emitter : public jit_emitter {
 public:
-    jit_prelu_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_prelu_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_prelu_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                      InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -494,7 +494,7 @@ private:

 class jit_sqrt_emitter : public jit_emitter {
 public:
-    jit_sqrt_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_sqrt_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_sqrt_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -528,7 +528,7 @@ private:

 class jit_erf_emitter : public jit_emitter {
 public:
-    jit_erf_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_erf_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
        InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);

    jit_erf_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
--- a/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp
@ -28,7 +28,7 @@ struct emitter_context {

 class jit_emitter : public ngraph::snippets::Emitter {
 public:
-    jit_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_emitter(dnnl::impl::cpu::x64::jit_generator* host, dnnl::impl::cpu::x64::cpu_isa_t host_isa,
                InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_vec)
        : Emitter(nullptr), h(host), host_isa_(host_isa), exec_prc_(exec_prc), in_out_type_(in_out_type), l_table (new Xbyak::Label()) {
        k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well
--- a/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.cpp
@ -18,9 +18,9 @@ using namespace Xbyak::util;
 namespace MKLDNNPlugin {

 /// LOAD ///
-jit_load_emitter::jit_load_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node,
+jit_load_emitter::jit_load_emitter(jit_generator *host, cpu_isa_t host_isa,
    Precision exec_prc, emitter_in_out_map in_out_type)
-: jit_emitter(host, host_isa, node, exec_prc, in_out_type), name(node ? node->getName() : "unknown") {
+: jit_emitter(host, host_isa, exec_prc, in_out_type), name("unknown") {
    prepare_table();
    v_len_elt = get_vec_length() / exec_prc.size();
 }
@ -486,12 +486,12 @@ void jit_load_emitter::register_table_entries() {
 }

 /// STORE ///
-jit_store_emitter::jit_store_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node,
+jit_store_emitter::jit_store_emitter(jit_generator *host, cpu_isa_t host_isa,
    Precision exec_prc, emitter_in_out_map in_out_type)
-: jit_emitter(host, host_isa, node, exec_prc, in_out_type), name(node ? node->getName() : "unknown") {
+: jit_emitter(host, host_isa, exec_prc, in_out_type), name("unknown") {
    v_len_elt = get_vec_length() / exec_prc.size();
    if (!mayiuse(cpu::x64::avx512_core_bf16) && mayiuse(cpu::x64::avx512_core)) {
-        emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(host, host_isa, nullptr));
+        emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(host, host_isa));
    }
 }

--- a/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_load_store_emitters.hpp
@ -44,7 +44,7 @@ struct store_emitter_context : public emitter_context {

 class jit_load_emitter : public jit_emitter {
 public:
-    jit_load_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_load_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::gpr_to_vec);
    /**
    * load_num values with src_prc precision are loaded from ptr[Reg64(in_idxs[0]) + offset_byte] address to Vmm[out_idxs[0]] as dst_prc.
@ -100,7 +100,7 @@ private:

 class jit_store_emitter : public jit_emitter {
 public:
-    jit_store_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_store_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
                    InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32, emitter_in_out_map in_out_type = emitter_in_out_map::vec_to_gpr);

    /**
--- a/src/plugins/intel_cpu/src/emitters/jit_mkldnn_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_mkldnn_emitters.cpp
@ -22,15 +22,10 @@ jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa,
    set_injector();
 }

-jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, InferenceEngine::Precision exec_prc)
-    : jit_emitter(host, host_isa, node, exec_prc) {
-    auto eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode*>(node);
-    if (!eltwiseNode) {
-        IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNEltwiseNode";
-    }
-    kind = static_cast<mkldnn_alg_kind_t>(eltwiseNode->getMKLDNNAlgorithm());
-    alpha = eltwiseNode->getAlpha();
-    beta = eltwiseNode->getBeta();
+jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa,
+                                       mkldnn_alg_kind_t algKind, float alpha, float beta,
+                                       InferenceEngine::Precision exec_prc)
+    : jit_emitter(host, host_isa, exec_prc), kind(algKind), alpha(alpha), beta(beta) {

    set_injector();
 }
@ -83,8 +78,10 @@ void jit_mkldnn_emitter::emit_data() const {
    }
 }

-jit_mkldnn_aux_emitter::jit_mkldnn_aux_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, InferenceEngine::Precision exec_prc)
-    : jit_mkldnn_emitter(host, host_isa, node, exec_prc) {
+jit_mkldnn_aux_emitter::jit_mkldnn_aux_emitter(jit_generator *host, cpu_isa_t host_isa,
+                                               mkldnn_alg_kind_t algKind, float inpAlpha, float inpBeta,
+                                               InferenceEngine::Precision exec_prc)
+    : jit_mkldnn_emitter(host, host_isa, algKind, inpAlpha, inpBeta, exec_prc) {
 }

 } // namespace MKLDNNPlugin
--- a/src/plugins/intel_cpu/src/emitters/jit_mkldnn_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/jit_mkldnn_emitters.hpp
@ -25,7 +25,8 @@ public:
                   const emitter_context *emit_context = nullptr) const override {};

 protected:
-    jit_mkldnn_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_mkldnn_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
+                       mkldnn_alg_kind_t algKind, float inpAlpha, float inpBeta,
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
    jit_mkldnn_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
                       InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
@ -45,7 +46,8 @@ private:

 class jit_mkldnn_aux_emitter : public jit_mkldnn_emitter {
 public:
-    jit_mkldnn_aux_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa, const MKLDNNNode* node,
+    jit_mkldnn_aux_emitter(mkldnn::impl::cpu::x64::jit_generator *host, mkldnn::impl::cpu::x64::cpu_isa_t host_isa,
+                           mkldnn_alg_kind_t algKind, float inpAlpha, float inpBeta,
                           InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);

 private:
--- a/src/plugins/intel_cpu/src/mkldnn_graph.cpp
+++ b/src/plugins/intel_cpu/src/mkldnn_graph.cpp
@ -67,9 +67,11 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg

    if (IsReady())
        ForgetGraphData();
-    // disable caching if graph was created only once
+    // disable weights caching if graph was created only once
    weightsCache = config.streamExecutorConfig._streams != 1 ? w_cache : nullptr;

+    rtParamsCache = std::make_shared<MultiCache>(config.rtCacheCapacity);
+
    Replicate(net, extMgr);
    InitGraph();

@ -113,6 +115,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgr
        if (isQuantized()) {
            node->setQuantizedGraphFlag(true);
        }
+        node->setRuntimeCache(rtParamsCache);

        graphNodes.push_back(node);

@ -209,6 +212,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
        if (isQuantized()) {
            node->setQuantizedGraphFlag(true);
        }
+        node->setRuntimeCache(rtParamsCache);
        graphNodes.push_back(node);

        if (op->get_type_info() == ngraph::op::v0::Parameter::get_type_info_static()) {
@ -1191,6 +1195,7 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
    if (isQuantized()) {
        node->setQuantizedGraphFlag(true);
    }
+    node->setRuntimeCache(rtParamsCache);

    if (initNode) {
        node->getSupportedDescriptors();
--- a/src/plugins/intel_cpu/src/mkldnn_graph.h
+++ b/src/plugins/intel_cpu/src/mkldnn_graph.h
@ -10,6 +10,7 @@
 #include "normalize_preprocess.h"
 #include "mkldnn_node.h"
 #include "mkldnn_edge.h"
+#include "cache/multi_cache.h"
 #include <map>
 #include <string>
 #include <vector>
@ -247,6 +248,8 @@ private:
    std::vector<MKLDNNNodePtr> constantGraphNodes;
    std::vector<MKLDNNNodePtr> executableGraphNodes;

+    MultiCachePtr rtParamsCache;
+
    void EnforceBF16();
 };

--- a/src/plugins/intel_cpu/src/mkldnn_node.h
+++ b/src/plugins/intel_cpu/src/mkldnn_node.h
@ -29,6 +29,7 @@
 #include "cpu_types.h"
 #include "cpu_shape.h"
 #include "memory_desc/cpu_memory_desc.h"
+#include "cache/multi_cache.h"

 namespace MKLDNNPlugin {

@ -582,6 +583,19 @@ public:
    */
    std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const MKLDNNNode *parentNode) const;

+    /**
+     * @brief Appends new item into ops list with the information on how the node should be executed as post operation.
+     * Seed node should call this routine and pass its post operations list as parameter.
+     * @param ops List of fused post operations
+     */
+    virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims);
+
+    virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
+
+    void setRuntimeCache(MultiCachePtr cache) {
+        rtParamsCache = cache;
+    }
+
 protected:
    bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;

@ -597,15 +611,7 @@ protected:
    virtual MemoryDescPtr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
    virtual MemoryDescPtr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);

-    /**
-     * @brief Appends new item into ops list with the information on how the node should be executed as post operation.
-     * Seed node should call this routine and pass its post operations list as parameter.
-     * @param ops List of fused post operations
-     */
-    virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims);
-    virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
-
-    virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }
+    virtual AttrPtr initPrimitiveAttr() { return nullptr; }

    typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
            GetPrimitiveMemoryFormatFunc;
@ -755,6 +761,10 @@ protected:
        IE_THROW(NotImplemented) << "[DS] prapareParams not implemented for node with type " << NameFromType(getType());
    }

+    MultiCachePtr getRuntimeCache() const {
+        return rtParamsCache;
+    }
+
    std::vector<VectorDims> lastInputDims = {};

    std::shared_ptr<ngraph::Node> opToShapeInfer;
@ -780,6 +790,8 @@ private:
    PerfCount perfCounter;
    PerfCounters profiling;

+    MultiCachePtr rtParamsCache;
+
    bool isEdgesEmpty(const std::vector<MKLDNNEdgeWeakPtr>& edges) const;

    void createShapeInferSubgraph(const std::shared_ptr<ngraph::Node>& op);
--- a/src/plugins/intel_cpu/src/nodes/common/softmax.cpp
+++ b/src/plugins/intel_cpu/src/nodes/common/softmax.cpp
@ -64,7 +64,7 @@ struct jit_uni_softmax_kernel_f32 : public jit_uni_softmax_kernel, public jit_ge
        exp_injector.reset(new jit_uni_eltwise_injector_f32<isa>(this, mkldnn::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.0f));

        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr));
+            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa));

        this->preamble();

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.cpp
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.h
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_eltwise_node.h
@ -38,6 +38,8 @@ struct jit_eltwise_params {
 struct jit_eltwise_call_args_ptrs {
    const void *src_ptr[MAX_ELTWISE_INPUTS];
    void *dst_ptr;
+    //ptr to array of post op inputs pointers (flat list)
+    const void** post_op_data;
 };

 struct jit_eltwise_call_args_indexes {
@ -54,16 +56,37 @@ struct jit_uni_eltwise_kernel {
        ker_(const_args, indexes);
    }

-    explicit jit_uni_eltwise_kernel(const jit_eltwise_params& jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(jep), eltwiseNode(node) {}
+    explicit jit_uni_eltwise_kernel(const jit_eltwise_params& jep) : ker_(nullptr), jep_(jep) {}
    virtual ~jit_uni_eltwise_kernel() {}

    virtual void create_ker() = 0;

    jit_eltwise_params jep_;
-    MKLDNNEltwiseNode& eltwiseNode;
 };

 class MKLDNNEltwiseNode : public MKLDNNNode {
+public:
+    struct EltwiseData {
+        Algorithm algo;
+        mkldnn::algorithm mkldnnAlgorithm;
+        float alpha;
+        float beta;
+        float gamma;
+
+        bool operator==(const EltwiseData& rhs) const noexcept;
+    };
+
+    class IEltwiseExecutor {
+    public:
+        IEltwiseExecutor() = default;
+        virtual void exec(const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) = 0;
+        virtual size_t getBatchDimIdx() const = 0;
+        virtual const VectorDims& getOutDims() const = 0;
+        virtual ~IEltwiseExecutor() = default;
+    };
+
+    using executorPtr = std::shared_ptr<IEltwiseExecutor>;
+
 public:
    MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);

@ -108,41 +131,11 @@ public:


 private:
-    struct EltwiseExecutor {
-        EltwiseExecutor(size_t batch) : batchDimIdx(batch) {}
-        virtual void exec(const MKLDNNEltwiseNode& node, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) = 0;
-        virtual const jit_eltwise_params& getJep() const = 0;
-        virtual ~EltwiseExecutor() = default;
-
-        size_t batchDimIdx = 0;
-    };
-    using executorPtr = std::shared_ptr<EltwiseExecutor>;
    executorPtr execPtr = nullptr;
-
-    struct EltwiseJitExecutor : public EltwiseExecutor {
-        EltwiseJitExecutor(const jit_eltwise_params &_jep, MKLDNNEltwiseNode& node, const size_t schedWA, const size_t batch);
-        void exec(const MKLDNNEltwiseNode& node, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override;
-        const jit_eltwise_params& getJep() const override;
-
-        std::unique_ptr<jit_uni_eltwise_kernel> pKernel;
-        size_t schedulerWorkAmount = 0;
-    };
-
-    struct EltwiseRefExecutor : public EltwiseExecutor {
-        EltwiseRefExecutor(const jit_eltwise_params &_jep, const size_t fullWA, const size_t batch) : jep(_jep), fullWorkAmount(fullWA),
-                                                                                                       EltwiseExecutor(batch) {}
-        void exec(const MKLDNNEltwiseNode& node, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override;
-        const jit_eltwise_params& getJep() const override { return jep; }
-
-        jit_eltwise_params jep;
-        size_t fullWorkAmount = 0;
-    };
-
    BroadcastingPolicy broadcastingPolicy;

    mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;

-    static const int optimalTensorRank = 6;
    bool canUseOptimizedImpl = false;
    bool isDynBatchEnabled = false;
    bool specialConvolutionAddFusing = false;
@ -163,22 +156,13 @@ private:
    std::vector<float> shiftsBuffer = {};

    std::vector<MKLDNNMemoryPtr> memPtrs = {};
+    std::vector<const void*> fqDataPtrs;

    using Initializer = std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>;
    static const std::map<const ngraph::DiscreteTypeInfo, Initializer> initializers;

    static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op);

-    void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
-                            const VectorDims &dims_out) const;
-    void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
-                                 const VectorDims &dims_out, const size_t schedulerWorkAmount) const;
-    void executeReference(const jit_eltwise_params &jep, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out,
-                          const size_t fullWorkAmount) const;
-
-    void offset_out_calc(VectorDims& offset, VectorDims& dims);
-    void offset_in_calc(VectorDims& offset, VectorDims& dims_in, VectorDims& dims_out);
-
    size_t getOpInputsNum() const;
 };

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_interpolate_node.cpp
@ -76,7 +76,7 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi
        }

        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr));
+            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa));

        this->preamble();

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_matmul_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_matmul_node.cpp
@ -13,7 +13,6 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include <mkldnn_types.h>
 #include "common/cpu_memcpy.h"
 #include <ngraph/opsets/opset1.hpp>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
@ -21,12 +20,63 @@
 #include "utils/general_utils.h"
 #include "memory_desc/cpu_memory_desc_utils.h"
 #include "mkldnn_extension_utils.h"
-#include "utils/cpu_utils.hpp"
+#include <common/primitive_hashing_utils.hpp>

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

+namespace {
+struct MatMulKey {
+    DnnlMemoryDescCPtr inp0;
+    DnnlMemoryDescCPtr inp1;
+    DnnlMemoryDescCPtr bias;
+    DnnlMemoryDescCPtr out;
+    mkldnn::primitive_attr attr;
+    impl_desc_type implType;
+
+    size_t hash() const;
+    bool operator==(const MatMulKey& rhs) const;
+};
+
+size_t MatMulKey::hash() const {
+    using namespace dnnl::impl;
+    using namespace dnnl::impl::primitive_hashing;
+
+    size_t seed = 0;
+
+    for (const auto& ptr : {inp0, inp1, bias, out}) {
+        if (ptr) {
+            seed = hash_combine(seed, get_md_hash(ptr->getDnnlDesc().data));
+        }
+    }
+
+    seed = hash_combine(seed, get_attr_hash(*attr.get()));
+    seed = hash_combine(seed, implType);
+    return seed;
+}
+
+bool MatMulKey::operator==(const MatMulKey &rhs) const {
+    bool retVal = true;
+    if (inp0 != rhs.inp0) {
+        retVal = retVal && inp0 && rhs.inp0 && inp0->getDnnlDesc() == rhs.inp0->getDnnlDesc();
+    }
+    if (inp1 != rhs.inp1) {
+        retVal = retVal && inp1 && rhs.inp1 && inp1->getDnnlDesc() == rhs.inp1->getDnnlDesc();
+    }
+    if (bias != rhs.bias) {
+        retVal = retVal && bias && rhs.bias && bias->getDnnlDesc() == rhs.bias->getDnnlDesc();
+    }
+    if (out != rhs.out) {
+        retVal = retVal && out && rhs.out && out->getDnnlDesc() == rhs.out->getDnnlDesc();
+    }
+    retVal = retVal && *attr.get() == *rhs.attr.get() &&
+             implType == rhs.implType;
+    return retVal;
+}
+
+} // namespace
+
 bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
        const auto matMul = std::dynamic_pointer_cast<const ngraph::opset1::MatMul>(op);
@ -377,35 +427,58 @@ void MKLDNNMatMulNode::prepareParams() {

    auto dstDnnlDesc = dstMemPtr->GetDescWithType<DnnlMemoryDesc>();

-    std::shared_ptr<mkldnn::matmul::desc> matmul_desc;
-
+    DnnlMemoryDescPtr dnnlBiasMemDesc = nullptr;
    if (withBiases) {
-        matmul_desc.reset(new mkldnn::matmul::desc{src0TransposedDesc->getDnnlDesc(),
-                                            src1TransposedDesc->getDnnlDesc(),
-                                            getBiasDescFrom(dstDnnlDesc),
-                                            dstDnnlDesc->getDnnlDesc()});
-    } else {
-        matmul_desc.reset(new mkldnn::matmul::desc(src0TransposedDesc->getDnnlDesc(),
-                                            src1TransposedDesc->getDnnlDesc(),
-                                            dstDnnlDesc->getDnnlDesc()));
+        auto& biasMemory = getParentEdgeAt(2)->getMemoryPtr();
+        if (!biasMemory || !biasMemory->GetPrimitivePtr())
+            IE_THROW()  << errorPrefix << " did not allocate bias memory";
+        dnnlBiasMemDesc = biasMemory->GetDescWithType<DnnlMemoryDesc>();
    }

-    MKLDNNDescriptor desc(matmul_desc);
-    primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *attr);
-    matmul::primitive_desc prim_desc;
+    MatMulKey key = {src0TransposedDesc, src1TransposedDesc, dnnlBiasMemDesc,
+                     dstDnnlDesc, *attr, selected_pd->getImplementationType()};

-    while (static_cast<bool>(itpd))  {
-        impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
+    auto engine = getEngine();

-        if (impl_type == selected_pd->getImplementationType()) {
-            prim_desc = itpd.get();
-            break;
+    auto builder = [&engine](const MatMulKey& key) -> std::shared_ptr<mkldnn::primitive> {
+        std::shared_ptr<mkldnn::matmul::desc> matmul_desc;
+
+        if (key.bias) {
+            matmul_desc.reset(new mkldnn::matmul::desc{key.inp0->getDnnlDesc(),
+                                                       key.inp1->getDnnlDesc(),
+                                                       key.bias->getDnnlDesc(),
+                                                       key.out->getDnnlDesc()});
+        } else {
+            matmul_desc.reset(new mkldnn::matmul::desc(key.inp0->getDnnlDesc(),
+                                                       key.inp1->getDnnlDesc(),
+                                                       key.out->getDnnlDesc()));
        }
-        if (!itpd.next_impl())
-            IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
+
+        MKLDNNDescriptor desc(matmul_desc);
+        primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine, key.attr);
+        matmul::primitive_desc prim_desc;
+
+        while (static_cast<bool>(itpd))  {
+            impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
+
+            if (impl_type == key.implType) {
+                prim_desc = itpd.get();
+                break;
+            }
+            if (!itpd.next_impl())
+                return nullptr;
+        }
+        return std::make_shared<matmul>(prim_desc);
+    };
+
+    auto cache = getRuntimeCache();
+    auto result = cache->getOrCreate(key, builder);
+
+    if (!result.first) {
+        IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
    }

-    prim.reset(new matmul(prim_desc));
+    prim = result.first;

    primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive();
    primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive();
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_matmul_node.h
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_matmul_node.h
@ -59,7 +59,6 @@ private:

    std::array<DnnlBlockedMemoryDescPtr, 2> inDataDesc;
    DnnlBlockedMemoryDescPtr outDataDesc;
-    AttrPtr pAttr;
 };

 }  // namespace MKLDNNPlugin
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_mvn_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_mvn_node.cpp
@ -59,7 +59,7 @@ struct jit_uni_mvn_mean_variance_kernel_f32 : public jit_uni_mvn_mean_variance_k
    }

    void generate() override {
-        load_emitter.reset(new jit_load_emitter(this, isa, nullptr));
+        load_emitter.reset(new jit_load_emitter(this, isa));

        this->preamble();
        mov(reg_src, ptr[reg_params + GET_OFF(src)]);
@ -384,8 +384,8 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator
            }
        }

-        load_emitter.reset(new jit_load_emitter(this, isa, nullptr));
-        store_emitter.reset(new jit_store_emitter(this, isa, nullptr));
+        load_emitter.reset(new jit_load_emitter(this, isa));
+        store_emitter.reset(new jit_store_emitter(this, isa));

        this->preamble();

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_non_max_suppression_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_non_max_suppression_node.cpp
@ -41,8 +41,8 @@ struct jit_uni_nms_kernel_f32 : public jit_uni_nms_kernel, public jit_generator
    }

    void generate() override {
-        load_emitter.reset(new jit_load_emitter(this, isa, nullptr));
-        store_emitter.reset(new jit_store_emitter(this, isa, nullptr));
+        load_emitter.reset(new jit_load_emitter(this, isa));
+        store_emitter.reset(new jit_store_emitter(this, isa));
        exp_injector.reset(new jit_uni_eltwise_injector_f32<isa>(this, mkldnn::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.0f));

        this->preamble();
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_normalize_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_normalize_node.cpp
@ -189,7 +189,7 @@ struct jit_uni_normalize_kernel_f32 : public jit_uni_normalize_kernel, public ji
        }

        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr));
+            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa));

        this->preamble();

@ -1356,10 +1356,16 @@ private:

                auto quant = post_op.quantization;

-                float crop_low = quant.crop_low_data->shifts_[quant.crop_low_data->count_ == 1 ? 0 : index_c];
-                float crop_high = quant.crop_high_data->shifts_[quant.crop_high_data->count_ == 1 ? 0 : index_c];
-                float input_scale = quant.input_scale_data->scales_[quant.input_scale_data->count_ == 1 ? 0 : index_c];
-                float input_shift = quant.input_shift_data->shifts_[quant.input_shift_data->count_ == 1 ? 0 : index_c];
+                using quantization_fields = post_ops_t::entry_t::quantization_t::quantization_fields;
+                auto dataVal = [&](const quantization_fields& field) {
+                    const int channelIdx = quant.per_channel[field] ? index_c : 0;
+                    return quant.data[field][channelIdx];
+                };
+
+                float crop_low = dataVal(quant.crop_low);
+                float crop_high = dataVal(quant.crop_high);
+                float input_scale = dataVal(quant.inp_scale);
+                float input_shift = dataVal(quant.inp_shift);

                dst_value = nstl::min(crop_high, nstl::max(crop_low, dst_value));
                dst_value = dst_value * input_scale + input_shift;
@ -1369,8 +1375,8 @@ private:
                }

                if (do_dequantization) {
-                    float output_scale = quant.output_scale_data->scales_[quant.output_scale_data->count_ == 1 ? 0 : index_c];
-                    float output_shift = quant.output_shift_data->shifts_[quant.output_shift_data->count_ == 1 ? 0 : index_c];
+                    float output_scale = dataVal(quant.output_scale);
+                    float output_shift = dataVal(quant.output_shift);
                    dst_value = dst_value * output_scale + output_shift;
                }
            }
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_reduce_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_reduce_node.cpp
@ -93,7 +93,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene
        }

        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16 = std::make_shared<jit_emu_vcvtneps2bf16>(this, isa, nullptr);
+            emu_vcvtneps2bf16 = std::make_shared<jit_emu_vcvtneps2bf16>(this, isa);

        this->preamble();

@ -1075,7 +1075,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi
        }

        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16 = std::make_shared<jit_emu_vcvtneps2bf16>(this, isa, nullptr);
+            emu_vcvtneps2bf16 = std::make_shared<jit_emu_vcvtneps2bf16>(this, isa);

        this->preamble();

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_region_yolo_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_region_yolo_node.cpp
@ -39,7 +39,7 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_
        exp_injector.reset(new jit_uni_eltwise_injector_f32<isa>(this, mkldnn::impl::alg_kind::eltwise_exp, 0.f, 0.f, 1.f));

        if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core))
-            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa, nullptr));
+            emu_vcvtneps2bf16.reset(new jit_emu_vcvtneps2bf16(this, isa));

        this->preamble();

--- a/src/plugins/intel_cpu/src/nodes/mkldnn_roi_pooling_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_roi_pooling_node.cpp
@ -44,8 +44,8 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi
    };

    void generate() override {
-        load_emitter.reset(new jit_load_emitter(this, isa, nullptr));
-        store_emitter.reset(new jit_store_emitter(this, isa, nullptr));
+        load_emitter.reset(new jit_load_emitter(this, isa));
+        store_emitter.reset(new jit_store_emitter(this, isa));

        this->preamble();

--- a/src/plugins/intel_cpu/thirdparty/mkl-dnn
+++ b/src/plugins/intel_cpu/thirdparty/mkl-dnn
@ -1 +1 @@
-Subproject commit 7cd4218a3d45c2c7e7b321dc825c3635a723b60e
+Subproject commit f06708e9cf6c3973efee9d2a1a4df086050e1fcd
--- a/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp
--- a/src/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp
@ -604,8 +604,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic
 const std::vector<ShapeRelatedParams> IS_Dynamic_Fusing = {
    {
        { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
-            {{-1, -1}, {{16, 12}, {33, 7}}}, // input 0
-            {{-1, 33}, {{12, 33}, {7, 33}}}  // input 1
+            {{-1, -1}, {{16, 12}, {33, 7}, {16, 12}}}, // input 0
+            {{-1, 33}, {{12, 33}, {7, 33}, {12, 33}}}  // input 1
        },
        {false, false}
    },
@ -625,8 +625,8 @@ const std::vector<ShapeRelatedParams> IS_Dynamic_Fusing = {
    },
    {
        { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
-            {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0
-            {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}}}  // input 1
+            {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}, {10, 10, 10}}}, // input 0
+            {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}, {10, 10, 5}}}  // input 1
        },
        {false, false}
    },
--- a/src/tests/functional/plugin/cpu/subgraph_tests/src/eltwise_chain.cpp
+++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/eltwise_chain.cpp
@ -1,305 +1,468 @@
-//// Copyright (C) 2018-2021 Intel Corporation
-//// SPDX-License-Identifier: Apache-2.0
-////
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
-//#include <tuple>
-//#include <string>
-//#include <vector>
-//#include <memory>
-//#include <debug.h>
-//#include <shared_test_classes/base/layer_test_utils.hpp>
-//#include <ngraph_functions/builders.hpp>
-//#include <ie_precision.hpp>
-//#include "common_test_utils/common_utils.hpp"
-//#include "functional_test_utils/precision_utils.hpp"
-//#include "functional_test_utils/skip_tests_config.hpp"
-//#include "test_utils/cpu_test_utils.hpp"
-//#include "ie_system_conf.h"
-//
-//using namespace CPUTestUtils;
-//using InferenceEngine::Precision;
-//using ngraph::helpers::EltwiseTypes;
-//using FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc;
-//
-//namespace CPUSubgraphTestsDefinitions {
-//
-//typedef std::tuple<
-//        std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>, // Input shapes
-//        ngraph::helpers::InputLayerType,                                                       // Secondary input type
-//        std::vector<InferenceEngine::Precision>,                                               // Input precisions
-//        std::vector<EltwiseTypes>,                                                             // Eltwise operations
-//        bool,                                                                                  // With quantization
-//        std::string                                                                            // Device name
-//> EltwiseChainTuple;
-//
-//class EltwiseChainTest : public testing::WithParamInterface<EltwiseChainTuple>,
-//                         virtual public LayerTestsUtils::LayerTestsCommon {
-//public:
-//    static std::string getTestCaseName(const testing::TestParamInfo<EltwiseChainTuple> &obj) {
-//        std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>> inputShapes;
-//        ngraph::helpers::InputLayerType secondaryInputType;
-//        std::vector<InferenceEngine::Precision> inputPrecisions;
-//        std::vector<EltwiseTypes> eltwiseOpTypes;
-//        bool withQuantization;
-//        std::string targetName;
-//        std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, targetName) = obj.param;
-//        std::ostringstream results;
-//
-//        results << "IS=" << CommonTestUtils::partialShape2str(inputShapes.first) << "_";
-//        results << "TS=";
-//        for (const auto& shape : inputShapes.second) {
-//            results << "(";
-//            for (const auto& item : shape) {
-//                results << CommonTestUtils::vec2str(item) << "_";
-//            }
-//            results << ")_";
-//        }
-//        for (int i = 0; i < inputPrecisions.size(); i++) {
-//            results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i].name() << "_";
-//        }
-//        for (int i = 0; i < eltwiseOpTypes.size(); i++) {
-//            results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_";
-//        }
-//        results << "secondaryInputType=" << secondaryInputType << "_";
-//        results << "WithQuant=" << withQuantization << "_";
-//        results << "targetDevice=" << targetName;
-//
-//        return results.str();
-//    }
-//
-//    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override {
-//        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 10, 1);
-//    }
-//
-//protected:
-//    void SetUp() override {
-//        threshold = 0.1f;
-//
-//        std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>> inputShapes;
-//        ngraph::helpers::InputLayerType secondaryInputType;
-//        std::vector<InferenceEngine::Precision> inputPrecisions;
-//        std::vector<EltwiseTypes> eltwiseOpTypes;
-//        bool withQuantization;
-//        std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, targetDevice) = this->GetParam();
-//
-//        targetStaticShapes = inputShapes.second;
-//        inputDynamicShapes = inputShapes.first;
-//
-//        ngraph::ParameterVector ngraphParam;
-//        std::vector<std::shared_ptr<ngraph::Node>> ngraphInputs;
-//        if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
-//            for (size_t i = 0; i < targetStaticShapes[0].size(); i++) {
-//                ngraphParam.push_back(std::make_shared<ngraph::opset1::Parameter>(convertIE2nGraphPrc(inputPrecisions[i]), targetStaticShapes[0][i]));
-//                ngraphInputs.push_back(ngraphParam.back());
-//            }
-//        } else {
-//            ngraphParam = ngraph::builder::makeParams(convertIE2nGraphPrc(inputPrecisions[0]), {targetStaticShapes[0][0]});
-//            for (int i = 1; i < inputPrecisions.size(); i++) {
-//                std::vector<float> ngraphInput1Data(ngraph::shape_size(targetStaticShapes[0][i]));
-//                ngraphInputs.push_back(ngraph::builder::makeConstant(convertIE2nGraphPrc(inputPrecisions[i]), targetStaticShapes[0][i],
-//                                                                     ngraphInput1Data, true));
-//            }
-//        }
-//
-//        if (withQuantization) {
-//            std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
-//            eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
-//            for (int i = 1; i < eltwiseOpTypes.size() - 1; i++) {
-//                eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
-//            }
-//
-//            std::vector<size_t> constShape(targetStaticShapes[0][0].size(), 1);
-//            constShape[1] = targetStaticShapes[0][0][1];
-//            auto fq = ngraph::builder::makeFakeQuantize(eltwiseOps[eltwiseOps.size() - 1],
-//                                                        ::ngraph::element::Type(::ngraph::element::Type_t::f32),
-//                                                        256, constShape);
-//
-//            eltwiseOps.push_back(ngraph::builder::makeEltwise(fq, ngraphInputs[eltwiseOpTypes.size() - 1], eltwiseOpTypes[eltwiseOpTypes.size() - 1]));
-//
-//            ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
-//            function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain_fq");
-//        } else {
-//            std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
-//            eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
-//            for (int i = 1; i < eltwiseOpTypes.size(); i++) {
-//                eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
-//            }
-//
-//            ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
-//            function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain");
-//        }
-//    }
-//};
-//
-//TEST_P(EltwiseChainTest, CompareWithRefs) {
-//    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-//
-//    Run();
-//}
-//
-//namespace {
-//
-//std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>> inputShapes = {
-//    { {}, {{{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}}}},
-//    { {}, {{{1, 48, 5, 6}, {1, 48, 1, 1}, {1, 48, 5, 6}, {1, 1, 5, 6}}}},
-//    { {}, {{{1, 72, 28, 28}, {1, 72, 1, 1}, {1, 72, 1, 1}, {1, 72, 1, 1}}}},
-//    { {}, {{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}}}},
-//    { {}, {{{1, 2, 3}, {3}, {3}, {3}}}},
-//    { {}, {{{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}}}},
-//    { {}, {{{3, 12, 5, 5}, {1, 12, 5, 1}, {3, 1, 1, 1}, {3, 12, 5, 5}}}},
-//    { {}, {{{1, 1, 1, 1}, {1, 12, 5, 1}, {3, 12, 1, 5}, {3, 12, 5, 1}}}},
-//    { {}, {{{1, 1, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}}}}
-//};
-//
-//std::vector<std::vector<InferenceEngine::Precision>> inputPrecisions = {
-//        { Precision::FP32, Precision::FP32, Precision::FP32, Precision::FP32 },
-//        { Precision::I32, Precision::I32, Precision::I32, Precision::I32 }
-//};
-//
-//std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
-//        { EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT },
-//        { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD },
-//};
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest,
-//                        ::testing::Combine(
-//                                ::testing::ValuesIn(inputShapes),
-//                                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
-//                                ::testing::ValuesIn(inputPrecisions),
-//                                ::testing::ValuesIn(eltwiseOps),
-//                                ::testing::Values(false),
-//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-//                        EltwiseChainTest::getTestCaseName);
-//
-//std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>> inputShapesFQ = {
-//    { {}, {{{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}}}},
-//    { {}, {{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}}}},
-//    { {}, {{{2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}}}},
-//    { {}, {{{2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}}}},
-//    { {}, {{{2, 5, 7, 5}, {2, 5, 1, 5}, {2, 5, 7, 5}, {2, 5, 7, 5}}}},
-//    { {}, {{{2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}}}},
-//    { {}, {{{2, 256, 7, 5}, {2, 256, 7, 5}, {2, 256, 1, 5}, {2, 256, 7, 5}}}},
-//    { {}, {{{1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}}}},
-//    { {}, {{{1, 12, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}}}},
-//    { {}, {{{1, 12, 1, 1, 6}, {1, 12, 5, 5, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 5, 1}}}},
-//    { {}, {{{1, 12, 1, 1, 1}, {1, 12, 5, 1, 7}, {3, 12, 1, 5, 7}, {3, 12, 5, 1, 7}}}},
-//    { {}, {{{1, 7, 1, 1, 12}, {1, 7, 5, 1, 12}, {3, 7, 1, 5, 12}, {3, 7, 5, 1, 12}}}},
-//    { {}, {{{1, 7, 1, 1, 12, 3, 7}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 3, 7}, {3, 7, 5, 1, 12, 3, 7}}}},
-//    { {}, {{{1, 7, 1, 1, 12, 3, 1}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 1, 7}, {3, 7, 5, 1, 12, 3, 1}}}}
-//};
-//
-//std::vector<std::vector<InferenceEngine::Precision>> inputPrecisionsFQ {
-//        { Precision::FP32, Precision::FP32, Precision::FP32, Precision::FP32 }
-//};
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChainWithFQ, EltwiseChainTest,
-//                    ::testing::Combine(
-//                            ::testing::ValuesIn(inputShapesFQ),
-//                            ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
-//                            ::testing::ValuesIn(inputPrecisionsFQ),
-//                            ::testing::ValuesIn(eltwiseOps),
-//                            ::testing::Values(true),
-//                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-//                        EltwiseChainTest::getTestCaseName);
-//
-//// =============================================== dynamic ==============================================
-//std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>> inputShapes_dyn = {
-//    {
-//        // dynamic
-//        {
-//            {-1, -1, -1},
-//            {-1},
-//            {-1},
-//            {-1}
-//        },
-//        // target
-//        {
-//            {{1, 2, 3}, {3}, {3}, {3}},
-//            {{5, 2, 7}, {7}, {1}, {1}},
-//            {{3, 1, 10}, {1}, {1}, {1}},
-//        }
-//    },
-//    {
-//        // dynamic
-//        {
-//            {-1, -1, -1, -1},
-//            {-1, -1},
-//            {-1, -1, -1},
-//            {-1}
-//        },
-//        // target
-//        {
-//            {{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}},
-//            {{5, 16, 1, 5}, {1, 5}, {1, 5, 1}, {1}},
-//            {{2, 1, 1, 5}, {5, 1}, {16, 5, 5}, {5}},
-//        }
-//    },
-//    {
-//        // dynamic
-//        {
-//            {-1, -1, -1, -1},
-//            {-1, -1, -1, -1},
-//            {-1, -1, -1, -1},
-//            {-1, -1, -1, -1}
-//        },
-//        // target
-//        {
-//            {{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}},
-//            {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
-//            {{2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}},
-//            {{2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}},
-//            {{2, 5, 7, 5}, {2, 5, 1, 5}, {2, 5, 7, 5}, {2, 5, 7, 5}},
-//            {{2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}},
-//            {{2, 256, 7, 5}, {2, 256, 7, 5}, {2, 256, 1, 5}, {2, 256, 7, 5}},
-//            {{1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}}
-//        }
-//    },
-//    {
-//        // dynamic
-//        {
-//            {-1, -1, -1, -1, -1},
-//            {-1, -1, -1, -1, -1},
-//            {-1, -1, -1, -1, -1},
-//            {-1, -1, -1, -1, -1}
-//        },
-//        // target
-//        {
-//            {{1, 12, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}},
-//            {{1, 12, 1, 1, 6}, {1, 12, 5, 5, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 5, 1}},
-//            {{1, 12, 1, 1, 1}, {1, 12, 5, 1, 7}, {3, 12, 1, 5, 7}, {3, 12, 5, 1, 7}},
-//            {{1, 7, 1, 1, 12}, {1, 7, 5, 1, 12}, {3, 7, 1, 5, 12}, {3, 7, 5, 1, 12}}
-//        }
-//    },
-//    {
-//        // dynamic
-//        {
-//            {-1, -1, -1, -1, -1,
-//             -1, -1},
-//            {-1, -1, -1, -1, -1,
-//             -1, -1},
-//            {-1, -1, -1, -1, -1,
-//             -1, -1},
-//            {-1, -1, -1, -1, -1,
-//             -1, -1}
-//        },
-//        // target
-//        {
-//            {{1, 7, 1, 1, 12, 3, 7}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 3, 7}, {3, 7, 5, 1, 12, 3, 7}},
-//            {{1, 7, 1, 1, 12, 3, 1}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 1, 7}, {3, 7, 5, 1, 12, 3, 1}},
-//            {{5, 7, 1, 2, 12, 1, 8}, {1, 7, 5, 1, 12, 3, 8}, {5, 1, 1, 2, 12, 1, 8}, {1, 7, 5, 1, 12, 3, 1}}
-//        }
-//    }
-//};
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_dyn, EltwiseChainTest,
-//                        ::testing::Combine(
-//                                ::testing::ValuesIn(inputShapes_dyn),
-//                                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
-//                                ::testing::ValuesIn(inputPrecisions),
-//                                ::testing::ValuesIn(eltwiseOps),
-//                                ::testing::Values(false),
-//                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-//                        EltwiseChainTest::getTestCaseName);
-//
-//} // namespace
-//} // namespace CPUSubgraphTestsDefinitions
+
+#include <tuple>
+#include <string>
+#include <vector>
+#include <memory>
+#include <debug.h>
+#include <shared_test_classes/base/ov_subgraph.hpp>
+#include <ngraph_functions/builders.hpp>
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/ov_tensor_utils.hpp"
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace CPUTestUtils;
+using ngraph::helpers::EltwiseTypes;
+using namespace ov::test;
+
+namespace CPUSubgraphTestsDefinitions {
+
+typedef std::tuple<
+        std::vector<InputShape>, // Input shapes
+        ngraph::helpers::InputLayerType,                                                       // Secondary input type
+        std::vector<ElementType>,                                                              // Input precisions
+        std::vector<EltwiseTypes>,                                                             // Eltwise operations
+        bool,                                                                                  // With quantization
+        std::string                                                                            // Device name
+> EltwiseChainTuple;
+
+class EltwiseChainTest : public testing::WithParamInterface<EltwiseChainTuple>,
+                         virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<EltwiseChainTuple> &obj) {
+        std::vector<InputShape> inputShapes;
+        ngraph::helpers::InputLayerType secondaryInputType;
+        std::vector<ElementType> inputPrecisions;
+        std::vector<EltwiseTypes> eltwiseOpTypes;
+        bool withQuantization;
+        std::string targetName;
+        std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, targetName) = obj.param;
+        std::ostringstream results;
+
+        results << "IS=(";
+        for (const auto& shape : inputShapes) {
+            results << CommonTestUtils::partialShape2str({shape.first}) << "_";
+        }
+        results << ")_TS=(";
+        for (const auto& shape : inputShapes) {
+            for (const auto& item : shape.second) {
+                results << CommonTestUtils::vec2str(item) << "_";
+            }
+        }
+        for (int i = 0; i < inputPrecisions.size(); i++) {
+            results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i] << "_";
+        }
+        for (int i = 0; i < eltwiseOpTypes.size(); i++) {
+            results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_";
+        }
+        results << "secondaryInputType=" << secondaryInputType << "_";
+        results << "WithQuant=" << withQuantization << "_";
+        results << "targetDevice=" << targetName;
+
+        return results.str();
+    }
+
+    void generate_inputs(const std::vector<ngraph::Shape>& targetInputStaticShapes) override {
+        inputs.clear();
+        const auto& funcInputs = function->inputs();
+        for (int i = 0; i < funcInputs.size(); ++i) {
+            const auto& funcInput = funcInputs[i];
+            ov::runtime::Tensor tensor;
+            tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 10, 1, 1);
+            inputs.insert({funcInput.get_node_shared_ptr(), tensor});
+        }
+    }
+
+protected:
+    void SetUp() override {
+        abs_threshold = 0.1f;
+
+        std::vector<InputShape> inputShapes;
+        ngraph::helpers::InputLayerType secondaryInputType;
+        std::vector<ElementType> inputPrecisions;
+        std::vector<EltwiseTypes> eltwiseOpTypes;
+        bool withQuantization;
+        std::tie(inputShapes, secondaryInputType, inputPrecisions, eltwiseOpTypes, withQuantization, targetDevice) = this->GetParam();
+
+        init_input_shapes(inputShapes);
+
+        ngraph::ParameterVector ngraphParam;
+        std::vector<std::shared_ptr<ngraph::Node>> ngraphInputs;
+        if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
+            for (size_t i = 0; i < inputDynamicShapes.size(); i++) {
+                ngraphParam.push_back(std::make_shared<ngraph::opset1::Parameter>(inputPrecisions[i], inputDynamicShapes[i]));
+                ngraphInputs.push_back(ngraphParam.back());
+            }
+        } else {
+            ngraphParam = ngraph::builder::makeDynamicParams(inputPrecisions[0], {inputDynamicShapes.front()});
+            for (int i = 1; i < inputPrecisions.size(); i++) {
+                std::vector<float> ngraphInput1Data(ngraph::shape_size(targetStaticShapes[0][i]));
+                ngraphInputs.push_back(ngraph::builder::makeConstant(inputPrecisions[i], targetStaticShapes[0][i],
+                                                                     ngraphInput1Data, true));
+            }
+        }
+
+        if (withQuantization) {
+            std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
+            eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
+            for (int i = 1; i < eltwiseOpTypes.size() - 1; i++) {
+                eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
+            }
+
+            std::vector<size_t> constShape(targetStaticShapes[0][0].size(), 1);
+            constShape[1] = targetStaticShapes[0][0][1];
+            auto fq = ngraph::builder::makeFakeQuantize(eltwiseOps[eltwiseOps.size() - 1],
+                                                        ::ngraph::element::Type(::ngraph::element::Type_t::f32),
+                                                        256, constShape);
+
+            eltwiseOps.push_back(ngraph::builder::makeEltwise(fq, ngraphInputs[eltwiseOpTypes.size() - 1], eltwiseOpTypes[eltwiseOpTypes.size() - 1]));
+
+            ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
+            function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain_fq");
+        } else {
+            std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
+            eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
+            for (int i = 1; i < eltwiseOpTypes.size(); i++) {
+                eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
+            }
+
+            ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
+            function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain");
+        }
+    }
+};
+
+TEST_P(EltwiseChainTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    run();
+}
+
+namespace {
+
+std::vector<std::vector<ngraph::Shape>> inputShapes = {
+    {{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}},
+    {{1, 48, 5, 6}, {1, 48, 1, 1}, {1, 48, 5, 6}, {1, 1, 5, 6}},
+    {{1, 72, 28, 28}, {1, 72, 1, 1}, {1, 72, 1, 1}, {1, 72, 1, 1}},
+    {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
+    {{1, 2, 3}, {3}, {3}, {3}},
+    {{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}},
+    {{3, 12, 5, 5}, {1, 12, 5, 1}, {3, 1, 1, 1}, {3, 12, 5, 5}},
+    {{1, 1, 1, 1}, {1, 12, 5, 1}, {3, 12, 1, 5}, {3, 12, 5, 1}},
+    {{1, 1, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}}
+};
+
+std::vector<std::vector<ElementType>> inputPrecisions = {
+        { ElementType::f32, ElementType::f32, ElementType::f32, ElementType::f32 },
+        { ElementType::i32, ElementType::i32, ElementType::i32, ElementType::i32 }
+};
+
+std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
+        { EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT },
+        { EltwiseTypes::DIVIDE, EltwiseTypes::SQUARED_DIFF, EltwiseTypes::ADD },
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain, EltwiseChainTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(static_shapes_to_test_representation(inputShapes)),
+                                ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        EltwiseChainTest::getTestCaseName);
+
+std::vector<std::vector<ngraph::Shape>> inputShapesFQ = {
+    {{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}},
+    {{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
+    {{2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}},
+    {{2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}},
+    {{2, 5, 7, 5}, {2, 5, 1, 5}, {2, 5, 7, 5}, {2, 5, 7, 5}},
+    {{2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}},
+    {{2, 256, 7, 5}, {2, 256, 7, 5}, {2, 256, 1, 5}, {2, 256, 7, 5}},
+    {{1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}},
+    {{1, 12, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}},
+    {{1, 12, 1, 1, 6}, {1, 12, 5, 5, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 5, 1}},
+    {{1, 12, 1, 1, 1}, {1, 12, 5, 1, 7}, {3, 12, 1, 5, 7}, {3, 12, 5, 1, 7}},
+    {{1, 7, 1, 1, 12}, {1, 7, 5, 1, 12}, {3, 7, 1, 5, 12}, {3, 7, 5, 1, 12}},
+    {{1, 7, 1, 1, 12, 3, 7}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 3, 7}, {3, 7, 5, 1, 12, 3, 7}},
+    {{1, 7, 1, 1, 12, 3, 1}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 1, 7}, {3, 7, 5, 1, 12, 3, 1}}
+};
+
+std::vector<std::vector<ElementType>> inputPrecisionsFQ {
+        { ElementType::f32, ElementType::f32, ElementType::f32, ElementType::f32 }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChainWithFQ, EltwiseChainTest,
+                    ::testing::Combine(
+                            ::testing::ValuesIn(static_shapes_to_test_representation(inputShapesFQ)),
+                            ::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
+                            ::testing::ValuesIn(inputPrecisionsFQ),
+                            ::testing::ValuesIn(eltwiseOps),
+                            ::testing::Values(true),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        EltwiseChainTest::getTestCaseName);
+
+// =============================================== dynamic ==============================================
+std::vector<std::vector<InputShape>> inputShapes_dyn = {
+    {
+        // inp1
+        {
+            // dynamic
+            {-1, -1, -1},
+            // target
+            {
+                {1, 2, 3},
+                {5, 2, 7},
+                {3, 1, 10},
+            }
+        },
+        // inp2
+        {
+            // dynamic
+            {-1},
+            // target
+            {
+                {3}, {7}, {1},
+            }
+        },
+        // inp3
+        {
+            // dynamic
+            {-1},
+            // target
+            {
+                {3}, {1}, {1}
+            }
+        },
+        // inp4
+        {
+            // dynamic
+            {-1},
+            // target
+            {
+                {3}, {1}, {1}
+            }
+        }
+    },
+    {
+        // inp1
+        {
+            // dynamic
+            {-1, -1, -1, -1},
+            // target
+            {
+                {1, 12, 5, 5},
+                {5, 16, 1, 5},
+                {2, 1, 1, 5},
+            }
+        },
+        // inp2
+        {
+            // dynamic
+            {-1, -1},
+            // target
+            {
+                {5, 5}, {1, 5}, {5, 1},
+            }
+        },
+        // inp3
+        {
+            // dynamic
+            {-1, -1, -1},
+            // target
+            {
+                {12, 5, 5},
+                {1, 5, 1},
+                {16, 5, 5},
+            }
+        },
+        // inp4
+        {
+            // dynamic
+            {-1},
+            // target
+            {
+                {1}, {1}, {5}
+            }
+        }
+    },
+    {
+        // inp1
+        {
+            // dynamic
+            {-1, -1, -1, -1},
+            // target
+            {
+                {1, 2, 2, 3},
+                {2, 33, 5, 5},
+                {2, 33, 5, 17},
+                {2, 33, 5, 256},
+                {2, 5, 7, 5},
+                {2, 17, 7, 5},
+                {2, 256, 7, 5},
+                {1, 36, 34, 34},
+            }
+        },
+        // inp2
+        {
+            // dynamic
+            {-1, -1, -1, -1},
+            // target
+            {
+                {1, 2, 2, 3},
+                {2, 33, 5, 5},
+                {2, 33, 5, 17},
+                {2, 33, 5, 256},
+                {2, 5, 1, 5},
+                {2, 17, 7, 5},
+                {2, 256, 7, 5},
+                {1, 36, 34, 34},
+            }
+        },
+        // inp3
+        {
+            // dynamic
+            {-1, -1, -1, -1},
+            // target
+            {
+                {1, 2, 2, 3},
+                {2, 33, 1, 5},
+                {2, 33, 5, 17},
+                {2, 33, 5, 256},
+                {2, 5, 7, 5},
+                {2, 17, 7, 5},
+                {2, 256, 1, 5},
+                {1, 36, 34, 34}
+            }
+        },
+        // inp4
+        {
+            // dynamic
+            {-1, -1, -1, -1},
+            // target
+            {
+                {1, 2, 2, 3},
+                {2, 33, 5, 5},
+                {2, 33, 5, 17},
+                {2, 33, 5, 256},
+                {2, 5, 7, 5},
+                {2, 17, 7, 5},
+                {2, 256, 7, 5},
+                {1, 36, 34, 34}
+            }
+        }
+    },
+    {
+        // inp1
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1},
+            // target
+            {
+                {1, 12, 1, 1, 6},
+                {1, 12, 1, 1, 6},
+                {1, 12, 1, 1, 1},
+                {1, 7, 1, 1, 12},
+            }
+        },
+        // inp2
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1},
+            // target
+            {
+                {1, 12, 5, 1, 6},
+                {1, 12, 5, 5, 6},
+                {1, 12, 5, 1, 7},
+                {1, 7, 5, 1, 12},
+            }
+        },
+        // inp3
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1},
+            // target
+            {
+                {3, 12, 1, 5, 1},
+                {3, 12, 1, 5, 1},
+                {3, 12, 1, 5, 7},
+                {3, 7, 1, 5, 12}
+            }
+        },
+        // inp4
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1},
+            // target
+            {
+                {3, 12, 5, 1, 1},
+                {3, 12, 5, 5, 1},
+                {3, 12, 5, 1, 7},
+                {3, 7, 5, 1, 12}
+            }
+        }
+    },
+    {
+        // inp1
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1, -1, -1},
+            // target
+            {
+                {1, 7, 1, 1, 12, 3, 7},
+                {1, 7, 1, 1, 12, 3, 1},
+                {5, 7, 1, 2, 12, 1, 8},
+            }
+        },
+        // inp2
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1, -1, -1},
+            // target
+            {
+                {1, 7, 5, 1, 12, 3, 7},
+                {1, 7, 5, 1, 12, 3, 7},
+                {1, 7, 5, 1, 12, 3, 8},
+            }
+        },
+        // inp3
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1, -1, -1},
+            // target
+            {
+                {3, 7, 1, 5, 12, 3, 7},
+                {3, 7, 1, 5, 12, 1, 7},
+                {5, 1, 1, 2, 12, 1, 8},
+            }
+        },
+        // inp4
+        {
+            // dynamic
+            {-1, -1, -1, -1, -1, -1, -1},
+            // target
+            {
+                {3, 7, 5, 1, 12, 3, 7},
+                {3, 7, 5, 1, 12, 3, 1},
+                {1, 7, 5, 1, 12, 3, 1}
+            }
+        }
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_EltwiseChain_dyn, EltwiseChainTest,
+                        ::testing::Combine(
+                                ::testing::ValuesIn(inputShapes_dyn),
+                                ::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
+                                ::testing::ValuesIn(inputPrecisions),
+                                ::testing::ValuesIn(eltwiseOps),
+                                ::testing::Values(false),
+                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                        EltwiseChainTest::getTestCaseName);
+
+} // namespace
+} // namespace CPUSubgraphTestsDefinitions
--- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
+++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
@ -227,10 +227,14 @@ std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {
    std::string selectedType;
    std::tie(inFmts, outFmts, priority, selectedType) = params;
    if (!inFmts.empty()) {
-        result << "_inFmts=" << fmts2str(inFmts, "");
+        auto str = fmts2str(inFmts, "");
+        std::replace(str.begin(), str.end(), ',', '.');
+        result << "_inFmts=" << str;
    }
    if (!outFmts.empty()) {
-        result << "_outFmts=" << fmts2str(outFmts, "");
+        auto str = fmts2str(outFmts, "");
+        std::replace(str.begin(), str.end(), ',', '.');
+        result << "_outFmts=" << str;
    }
    if (!selectedType.empty()) {
        result << "_primitive=" << selectedType;
--- a/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
+++ b/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
@ -237,6 +237,29 @@ const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_sha
                return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
            }, "Relu"}}), {"FakeQuantize", "Relu"}};

+const auto fusingFQPerChannelSigmoidFQPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            auto localPrc = inpNode->get_element_type();
+            auto shape = inpNode->get_output_partial_shape(0);
+            if (shape.size() == 1)
+                IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
+            ngraph::Shape newShape(shape.size(), 1);
+            newShape[1] = shape[1].get_length();
+            return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"},
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid);
+        }, "Sigmoid"},
+        {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
+            auto localPrc = inpNode->get_element_type();
+            auto shape = inpNode->get_output_partial_shape(0);
+            if (shape.size() == 1)
+                IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
+            ngraph::Shape newShape(shape.size(), 1);
+            newShape[1] = shape[1].get_length();
+            return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize", "Sigmoid", "FakeQuantize"}};
+
 const auto fusingFakeQuantizePerTensorRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
            {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
                auto localPrc = inpNode->get_element_type();
--- a/src/tests/unit/cpu/rt_cache.cpp
+++ b/src/tests/unit/cpu/rt_cache.cpp
@ -0,0 +1,381 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <thread>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+#include "cache/lru_cache.h"
+#include "cache/multi_cache.h"
+
+using namespace MKLDNNPlugin;
+
+namespace {
+struct IntKey {
+    size_t hash() const {
+        return std::hash<int>().operator()(data);
+    }
+    bool operator==(const IntKey& rhs) const noexcept {
+        return this->data == rhs.data;
+    }
+
+    int data;
+};
+} // namespace
+
+TEST(LruCacheTests, Evict) {
+    constexpr size_t capacity = 10;
+    LruCache<IntKey, int> cache(capacity);
+    for (size_t i = 0; i < 2 * capacity; ++i) {
+        ASSERT_NO_THROW(cache.put({10}, 10));
+    }
+    ASSERT_NO_THROW(cache.evict(5));
+    ASSERT_NO_THROW(cache.evict(10));
+    int result = cache.get({10});
+    ASSERT_EQ(result, int());
+    ASSERT_NO_THROW(cache.evict(0));
+}
+
+TEST(LruCacheTests, Put) {
+    constexpr size_t capacity = 10;
+    LruCache<IntKey, int> cache(capacity);
+    for (size_t i = 0; i < 2 * capacity; ++i) {
+        ASSERT_NO_THROW(cache.put({10}, 10));
+    }
+
+    ASSERT_EQ(cache.get({10}), 10);
+}
+
+TEST(LruCacheTests, Get) {
+    constexpr size_t capacity = 10;
+    LruCache<IntKey, int> cache(capacity);
+    for (int i = 1; i < 2 * capacity; ++i) {
+        ASSERT_NO_THROW(cache.put({i}, i));
+    }
+
+    for (int i = 1; i < capacity; ++i) {
+        ASSERT_EQ(cache.get({i}), int());
+    }
+
+    for (int i = capacity; i < 2 * capacity; ++i) {
+        ASSERT_EQ(cache.get({i}), i);
+    }
+}
+
+TEST(LruCacheTests, LruPolicy) {
+    constexpr size_t capacity = 10;
+    LruCache<IntKey, int> cache(capacity);
+    for (int i = 1; i < capacity; ++i) {
+        ASSERT_NO_THROW(cache.put({i}, i));
+    }
+
+    for (int i = 4; i < capacity; ++i) {
+        ASSERT_EQ(cache.get({i}), i);
+    }
+
+    for (int i = 21; i < 25; ++i) {
+        ASSERT_NO_THROW(cache.put({i}, i));
+    }
+
+    for (int i = 1; i < 4; ++i) {
+        ASSERT_EQ(cache.get({i}), int());
+    }
+}
+
+TEST(LruCacheTests, Empty) {
+    constexpr size_t capacity = 0;
+    constexpr size_t attempts = 10;
+    LruCache<IntKey, int> cache(capacity);
+    for (int i = 1; i < attempts; ++i) {
+        ASSERT_NO_THROW(cache.put({i}, i));
+    }
+
+    for (int i = 1; i < attempts; ++i) {
+        ASSERT_EQ(cache.get({i}), int());
+    }
+}
+namespace {
+template<typename T, typename K>
+class mockBuilder {
+public:
+    MOCK_METHOD(T, build, (const K&));
+};
+}// namespace
+
+TEST(CacheEntryTests, GetOrCreate) {
+    using testing::_;
+    using ValueType = std::shared_ptr<int>;
+
+    constexpr size_t capacity = 10;
+
+    mockBuilder<ValueType::element_type, IntKey> builderMock;
+    EXPECT_CALL(builderMock, build(_))
+            .Times(3 * capacity)
+            .WillRepeatedly([](const IntKey& key){return key.data;});
+
+    auto builder = [&](const IntKey& key) { return std::make_shared<int>(builderMock.build(key)); };
+
+    CacheEntry<IntKey, ValueType> entry(capacity);
+
+    //creating so we miss everytime
+    for (int i = 0; i < capacity; ++i) {
+        auto result = entry.getOrCreate({i}, builder);
+        ASSERT_NE(result.first, ValueType());
+        ASSERT_EQ(*result.first, i);
+        ASSERT_EQ(result.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+
+    //always hit
+    for (int i = 0; i < capacity; ++i) {
+        auto result = entry.getOrCreate({i}, builder);
+        ASSERT_NE(result.first, ValueType());
+        ASSERT_EQ(*result.first, i);
+        ASSERT_EQ(result.second, CacheEntryBase::LookUpStatus::Hit);
+    }
+
+    //new values displace old ones
+    for (int i = capacity; i < 2 * capacity; ++i) {
+        auto result = entry.getOrCreate({i}, builder);
+        ASSERT_NE(result.first, ValueType());
+        ASSERT_EQ(*result.first, i);
+        ASSERT_EQ(result.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+
+    //can not hit the old ones
+    for (int i = 0; i < capacity; ++i) {
+        auto result = entry.getOrCreate({i}, builder);
+        ASSERT_NE(result.first, ValueType());
+        ASSERT_EQ(*result.first, i);
+        ASSERT_EQ(result.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+}
+
+TEST(CacheEntryTests, Empty) {
+    using testing::_;
+    using ValueType = std::shared_ptr<int>;
+
+    constexpr size_t capacity = 0;
+    constexpr size_t attempts = 10;
+
+    mockBuilder<ValueType::element_type, IntKey> builderMock;
+    EXPECT_CALL(builderMock, build(_))
+            .Times(2 * attempts)
+            .WillRepeatedly([](const IntKey& key){return key.data;});
+
+    auto builder = [&](const IntKey& key) { return std::make_shared<int>(builderMock.build(key)); };
+
+    CacheEntry<IntKey, ValueType> entry(capacity);
+
+    //creating so we miss everytime
+    for (int i = 0; i < attempts; ++i) {
+        auto result = entry.getOrCreate({i}, builder);
+        ASSERT_NE(result.first, ValueType());
+        ASSERT_EQ(*result.first, i);
+        ASSERT_EQ(result.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+
+    //since the capacity is 0 we will always miss
+    for (int i = 0; i < attempts; ++i) {
+        auto result = entry.getOrCreate({i}, builder);
+        ASSERT_NE(result.first, ValueType());
+        ASSERT_EQ(*result.first, i);
+        ASSERT_EQ(result.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+}
+
+namespace {
+struct StringKey {
+    size_t hash() const {
+        return std::hash<std::string>().operator()(data);
+    }
+    bool operator==(const StringKey& rhs) const noexcept {
+        return this->data == rhs.data;
+    }
+
+    std::string data;
+};
+} // namespace
+
+TEST(MultiCacheTests, GetOrCreate) {
+    using testing::_;
+    using IntValueType = std::shared_ptr<int>;
+    using StrValueType = std::shared_ptr<std::string>;
+
+    constexpr size_t capacity = 10;
+
+    mockBuilder<IntValueType::element_type, IntKey> intBuilderMock;
+    EXPECT_CALL(intBuilderMock, build(_))
+            .Times(3 * capacity)
+            .WillRepeatedly([](const IntKey& key){return key.data;});
+
+    mockBuilder<StrValueType::element_type, StringKey> strBuilderMock;
+    EXPECT_CALL(strBuilderMock, build(_))
+            .Times(3 * capacity)
+            .WillRepeatedly([](const StringKey& key){return key.data;});
+
+    auto intBuilder = [&](const IntKey& key) { return std::make_shared<int>(intBuilderMock.build(key)); };
+    auto strBuilder = [&](const StringKey& key) { return std::make_shared<std::string>(strBuilderMock.build(key)); };
+
+    MultiCache cache(capacity);
+
+    //creating so we miss everytime
+    for (int i = 0; i < capacity; ++i) {
+        auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+        ASSERT_NE(intResult.first, IntValueType());
+        ASSERT_EQ(*intResult.first, i);
+        ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Miss);
+        auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+        ASSERT_NE(strResult.first, StrValueType());
+        ASSERT_EQ(*strResult.first, std::to_string(i));
+        ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+
+    //always hit
+    for (int i = 0; i < capacity; ++i) {
+        auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+        ASSERT_NE(intResult.first, IntValueType());
+        ASSERT_EQ(*intResult.first, i);
+        ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Hit);
+        auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+        ASSERT_NE(strResult.first, StrValueType());
+        ASSERT_EQ(*strResult.first, std::to_string(i));
+        ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Hit);
+    }
+
+    //new values displace old ones
+    for (int i = capacity; i < 2 * capacity; ++i) {
+        auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+        ASSERT_NE(intResult.first, IntValueType());
+        ASSERT_EQ(*intResult.first, i);
+        ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Miss);
+        auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+        ASSERT_NE(strResult.first, StrValueType());
+        ASSERT_EQ(*strResult.first, std::to_string(i));
+        ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+
+    //can not hit the old ones
+    for (int i = 0; i < capacity; ++i) {
+        auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+        ASSERT_NE(intResult.first, IntValueType());
+        ASSERT_EQ(*intResult.first, i);
+        ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Miss);
+        auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+        ASSERT_NE(strResult.first, StrValueType());
+        ASSERT_EQ(*strResult.first, std::to_string(i));
+        ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+}
+
+TEST(MultiCacheTests, Empty) {
+    using testing::_;
+    using IntValueType = std::shared_ptr<int>;
+    using StrValueType = std::shared_ptr<std::string>;
+
+    constexpr size_t capacity = 0;
+    constexpr size_t attempts = 10;
+
+    mockBuilder<IntValueType::element_type, IntKey> intBuilderMock;
+    EXPECT_CALL(intBuilderMock, build(_))
+            .Times(2 * attempts)
+            .WillRepeatedly([](const IntKey& key){return key.data;});
+
+    mockBuilder<StrValueType::element_type, StringKey> strBuilderMock;
+    EXPECT_CALL(strBuilderMock, build(_))
+            .Times(2 * attempts)
+            .WillRepeatedly([](const StringKey& key){return key.data;});
+
+    auto intBuilder = [&](const IntKey& key) { return std::make_shared<int>(intBuilderMock.build(key)); };
+    auto strBuilder = [&](const StringKey& key) { return std::make_shared<std::string>(strBuilderMock.build(key)); };
+
+    MultiCache cache(capacity);
+
+    //creating so we miss everytime
+    for (int i = 0; i < attempts; ++i) {
+        auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+        ASSERT_NE(intResult.first, IntValueType());
+        ASSERT_EQ(*intResult.first, i);
+        ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Miss);
+        auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+        ASSERT_NE(strResult.first, StrValueType());
+        ASSERT_EQ(*strResult.first, std::to_string(i));
+        ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+
+    //since the capacity is 0 we will always miss
+    for (int i = 0; i < attempts; ++i) {
+        auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+        ASSERT_NE(intResult.first, IntValueType());
+        ASSERT_EQ(*intResult.first, i);
+        ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Miss);
+        auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+        ASSERT_NE(strResult.first, StrValueType());
+        ASSERT_EQ(*strResult.first, std::to_string(i));
+        ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Miss);
+    }
+}
+
+namespace {
+class ScopedThread {
+public:
+    explicit ScopedThread(std::thread t) : _t(std::move(t)) {
+        if (!_t.joinable()) {
+            std::logic_error("Thread is not joinable!");
+        }
+    }
+    ~ScopedThread() {
+        _t.join();
+    }
+    ScopedThread(ScopedThread&& rhs) noexcept = default;
+private:
+    std::thread _t;
+};
+}// namespace
+
+
+TEST(MultiCacheTests, SmokeTypeIdSync) {
+    using IntValueType = std::shared_ptr<int>;
+    using StrValueType = std::shared_ptr<std::string>;
+
+    constexpr size_t capacity = 10;
+    constexpr size_t numThreads = 30;
+
+    auto intBuilder = [&](const IntKey& key) { return std::make_shared<int>(key.data); };
+    auto strBuilder = [&](const StringKey& key) { return std::make_shared<std::string>(key.data); };
+
+    std::vector<MultiCache> vecCache(numThreads, MultiCache(capacity));
+
+    auto testRoutine = [&](MultiCache& cache) {
+        //creating so we miss everytime
+        for (int i = 0; i < capacity; ++i) {
+            auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+            ASSERT_NE(intResult.first, IntValueType());
+            ASSERT_EQ(*intResult.first, i);
+            ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Miss);
+            auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+            ASSERT_NE(strResult.first, StrValueType());
+            ASSERT_EQ(*strResult.first, std::to_string(i));
+            ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Miss);
+        }
+
+        //always hit
+        for (int i = 0; i < capacity; ++i) {
+            auto intResult = cache.getOrCreate(IntKey{i}, intBuilder);
+            ASSERT_NE(intResult.first, IntValueType());
+            ASSERT_EQ(*intResult.first, i);
+            ASSERT_EQ(intResult.second, CacheEntryBase::LookUpStatus::Hit);
+            auto strResult = cache.getOrCreate(StringKey{std::to_string(i)}, strBuilder);
+            ASSERT_NE(strResult.first, StrValueType());
+            ASSERT_EQ(*strResult.first, std::to_string(i));
+            ASSERT_EQ(strResult.second, CacheEntryBase::LookUpStatus::Hit);
+        }
+    };
+
+    std::vector<ScopedThread> vecThreads;
+    vecThreads.reserve(numThreads);
+    for (size_t i = 0; i < numThreads; ++i) {
+        vecThreads.emplace_back(std::thread(testRoutine, std::ref(vecCache[i])));
+    }
+}