[CPU] MVN, FQ, Convert dynamic nodes (#7817)

2021-10-28 10:52:14 +03:00 · 2021-10-28 10:52:14 +03:00 · 6416b73855
commit 6416b73855
parent 6908023a42
28 changed files with 1085 additions and 536 deletions
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@ -486,7 +486,7 @@ void MKLDNNGraph::InitEdges() {
                std::string convertName = edge->getParent()->getName() + "_" +
                                          inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();

-                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape().getStaticDims(), inDesc.getPrecision(), outDesc.getPrecision(),
+                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(),
                                                                       convertName, this->getEngine(), this->weightsCache);
                convertNode->setDescs(inDesc, outDesc);
                InsertNode(edge, convertNode, true);
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@ -1609,7 +1609,30 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph

        std::vector<float> scalesBuffer;
        std::vector<float> shiftsBuffer;
-        parent->fillScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get(), scalesBuffer, shiftsBuffer, 1);
+        auto parentEltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(parent);
+        if (!parentEltwise) {
+            IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node";
+        }
+
+        std::tie(scalesBuffer, shiftsBuffer) = parentEltwise->getScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get());
+
+        const auto &outputShape = child->getOutputShapeAtPort(0);
+        VectorDims outputDims = outputShape.getDims();
+        const size_t channelPos = outputDims.size() > 1 ? 1 : 0;
+        if (outputShape.isDynamic()) {
+            if (outputDims[channelPos] == Shape::UNDEFINED_DIM) {
+                if (scalesBuffer.size() > 1) {
+                    outputDims[channelPos] = scalesBuffer.size();
+                } else if (shiftsBuffer.size() > 1) {
+                    outputDims[channelPos] = shiftsBuffer.size();
+                } else {
+                    return false;
+                }
+            }
+        }
+
+        scalesBuffer = makeAlignedBuffer(outputDims[channelPos], scalesBuffer, 1);
+        shiftsBuffer = makeAlignedBuffer(outputDims[channelPos], shiftsBuffer, 1);

        for (int i = 0; i < scalesBuffer.size(); i++)
            if (scalesBuffer[i] == 0.f)
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@ -503,8 +503,9 @@ void MKLDNNNode::execute(mkldnn::stream strm) {
 }

 void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
-    if (needShapeInfer())
+    if (needShapeInfer()) {
        redefineOutputMemory(shapeInfer());
+    }
    if (needPrepareParams()) {
        IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
            " since the input shapes are not defined.";
@ -1045,7 +1046,7 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
    }
 }

-void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
+void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
    IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
 }

@ -1192,7 +1193,7 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
            if (i == fusingPort)
                continue;
            auto& weightShape = getInputShapeAtPort(i).getDims();
-            if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
+            if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
                return false;
        }
        return true;
@ -1213,6 +1214,66 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
            || isConvertablePowerStatic();
 }

+std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
+    std::vector<float> scales, shifts;
+
+    const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
+        auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
+        auto constBlob = constInputNode->getMemoryPtr();
+        const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
+        buffer.resize(elementsCount);
+        cpu_convert(constBlob->GetPtr(),
+                    &buffer[0],
+                    MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
+                    Precision::FP32,
+                    elementsCount);
+    };
+
+    const auto constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
+
+    if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
+        fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
+    } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
+        fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
+    } else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
+        fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
+        fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
+    } else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
+        const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
+        if (!power) {
+            IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
+        }
+        scales.push_back(power->getBeta());
+        shifts.push_back(power->getGamma());
+    } else {
+        IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
+    }
+
+    switch (getAlgorithm()) {
+        case EltwiseAdd: {
+            scales.resize(shifts.size(), 1.0f);
+            break;
+        }
+        case EltwiseSubtract: {
+            scales.resize(shifts.size(), 1.0f);
+            std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
+            break;
+        }
+        case EltwiseMultiply: {
+            shifts.resize(scales.size(), 0.0f);
+            break;
+        }
+        case EltwiseDivide: {
+            shifts.resize(scales.size(), 0.0f);
+            std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
+            break;
+        }
+        default: break;
+    }
+
+    return {scales, shifts};
+}
+
 bool MKLDNNNode::inputShapesDefined() const {
    for (size_t i = 0; i < getParentEdges().size(); i++) {
        if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined())
@ -1307,86 +1368,6 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
    return false;
 }

-void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, int align) {
-    scales.clear();
-    shifts.clear();
-    const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
-        auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
-        auto constBlob = constInputNode->getMemoryPtr();
-        const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
-        buffer.resize(elementsCount);
-        cpu_convert(constBlob->GetPtr(),
-                    &buffer[0],
-                    MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
-                    Precision::FP32,
-                    elementsCount);
-    };
-
-    const size_t constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
-
-    if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
-        fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
-    } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
-        fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
-    } else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
-        fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
-        fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
-    } else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
-        const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
-        if (!power) {
-            IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
-        }
-        scales.push_back(power->getBeta());
-        shifts.push_back(power->getGamma());
-    } else {
-        IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
-    }
-
-    const size_t bufferSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
-    if (align == -1) {
-        align = bufferSize;
-    }
-    const size_t bufferSizeAligned = rnd_up(bufferSize, static_cast<size_t>(align));
-
-    size_t initSize = scales.size();
-    if (initSize > 0) {
-        scales.resize(bufferSizeAligned, 0);
-        if (initSize == 1) {
-            std::fill(scales.begin() + 1, scales.begin() + bufferSize, scales[0]);
-        }
-    }
-
-    initSize = shifts.size();
-    if (initSize > 0) {
-        shifts.resize(bufferSizeAligned, 0);
-        if (initSize == 1) {
-            std::fill(shifts.begin() + 1, shifts.begin() + bufferSize, shifts[0]);
-        }
-    }
-
-    switch (getAlgorithm()) {
-        case EltwiseAdd: {
-            scales.resize(bufferSizeAligned, 1.0f);
-            break;
-        }
-        case EltwiseSubtract: {
-            scales.resize(bufferSizeAligned, 1.0f);
-            std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
-            break;
-        }
-        case EltwiseMultiply: {
-            shifts.resize(bufferSizeAligned, 0.0f);
-            break;
-        }
-        case EltwiseDivide: {
-            shifts.resize(bufferSizeAligned, 0.0f);
-            std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
-            break;
-        }
-        default: break;
-    }
-}
-
 void MKLDNNNode::createShapeInferSubgraph(const std::shared_ptr<ngraph::Node>& op) {
    ngraph::OutputVector inputsForShapeInfer;
    for (size_t i = 0; i < inputShapes.size(); i++) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@ -556,10 +556,18 @@ public:
        return outputShapes[port];
    }

+    /**
+    * @brief Return scales and shift if nodes can be executed as ScaleShift, else raise exception
+    * If node has only scale or shift value, fill missing value with default values
+    * i.e. EltwiseAdd: fill shifts from constant, fill scales with default values = 1.0f
+    * @param parentNode
+    * node from which data comes 
+    * @return pair of scales and shifts
+    */
+    std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const MKLDNNNode *parentNode) const;
+
 protected:
    bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
-    // TODO [mandrono]: place outside of the node API
-    void fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, const int align = -1);

    void setType(Type type) {
        this->type = type;
@ -578,7 +586,7 @@ protected:
     * Seed node should call this routine and pass its post operations list as parameter.
     * @param ops List of fused post operations
     */
-    virtual void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false);
+    virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
    virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() const { return nullptr; }

    typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@ -19,6 +19,7 @@
 #include "cpu/x64/cpu_isa_traits.hpp"
 #include "utils/general_utils.h"
 #include <ngraph/opsets/opset1.hpp>
+#include "utils/cpu_utils.hpp"

 // WA for xbyak.h
 #ifdef _WIN32
@ -1127,16 +1128,19 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
    for (auto &node : fusedWith) {
        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            if (eltwiseNode->isSpecialConvolutionAddFusing())
+            if (eltwiseNode->isSpecialConvolutionAddFusing()) {
                ops.append_sum(1.0);
-            else
-                eltwiseNode->appendPostOps(ops);
+            } else {
+                // TODO [DS]: change to shape from memory
+                constexpr int align = 16;
+                eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
+            }
            continue;
        }

        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
        if (fakeQuantizeNode) {
-            fakeQuantizeNode->appendPostOps(ops);
+            fakeQuantizeNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
            continue;
        }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@ -355,7 +355,7 @@ void MKLDNNConcatNode::createPrimitive() {
            IE_THROW() << "Source memory from " << parent->getName() << " didn't allocate for node "
                               << getName() << ".";
        }
-// DnnlBlockedMemoryDesc
+
        auto desc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
        auto& dims = getInputShapeAtPort(i).getStaticDims();
        for (size_t j = 0; j < dims.size(); j++) {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@ -20,6 +20,7 @@
 #include "common/cpu_convert.h"
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -338,7 +339,8 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
            if (eltwiseNode->isSpecialConvolutionAddFusing()) {
                ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
            } else {
-                eltwiseNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
+                constexpr int align = 16;
+                eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
                if (initBinaryMemory) {
                    if (eltwiseNode->scalesMemory)
                        binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
@ -351,7 +353,9 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe

        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
        if (fakeQuantizeNode) {
-            fakeQuantizeNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
+            constexpr int align = -1;
+            // no need to fill post ops dims for fq, make sense only for bin fq
+            fakeQuantizeNode->appendPostOps(ops, VectorDims{}, align, initAsBinary, initBinaryMemory);
            if (initBinaryMemory) {
                if (fakeQuantizeNode->cropHighMemory)
                    binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
@ -15,11 +15,6 @@ using namespace InferenceEngine;

 bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-
        const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
        if (!convert) {
            errorMessage = "Only opset1 Convert operation is supported";
@ -41,14 +36,20 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, co
    }
 }

-MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
+std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
+    return std::vector<VectorDims>{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
+}
+
+MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
                                     const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
        : MKLDNNNode("Convert", nodeName, eng, cache) {
-    inputShapes.emplace_back(dims);
+    inputShapes.push_back(shape);
    addOriginalInputPrecision(inPrc);
-    outputShapes.emplace_back(dims);
+    outputShapes.push_back(shape);
    addOriginalOutputPrecision(outPrc);

+    isDynamic = shape.isDynamic();
+
    errorPrefix = "Convert node with name '" + getName() + "'";
 }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h
@ -14,13 +14,14 @@ namespace MKLDNNPlugin {
 class MKLDNNConvertNode : public MKLDNNNode {
 public:
    MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
-    MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
+    MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
                      const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);

    void getSupportedDescriptors() override;
    void initSupportedPrimitiveDescriptors() override;
    void createPrimitive() override;
    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
    bool created() const override;
    bool canBeInPlace() const override {
        return false;
@ -38,6 +39,9 @@ public:
    const MemoryDesc& getInput() const { return *input; }
    const MemoryDesc& getOutput() const { return *output; }

+    std::vector<VectorDims> shapeInfer() const override;
+    bool needPrepareParams() const override { return false; }
+
    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

    static bool isSupportedDesc(const MemoryDesc &desc);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@ -18,6 +18,7 @@
 #include <nodes/common/cpu_memcpy.h>
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -273,7 +274,9 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
    for (auto &node : fusedWith) {
        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
+            // TODO [DS]: change to shape from memory
+            constexpr int align = 16;
+            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
            continue;
        }
        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@ -23,6 +23,7 @@
 #include "emitters/jit_bf16_emitters.hpp"
 #include <mkldnn_selective_build.h>
 #include "utils/general_utils.h"
+#include "utils/cpu_utils.hpp"

 #include "ngraph/ngraph.hpp"
 #include <ngraph/opsets/opset1.hpp>
@ -1007,9 +1008,9 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const {
 // TODO [DS]: used only in FuseConvolutionSumAndConvolutionSumActivation
 // fix when reimplement this transformation for dynamic shapes
 bool MKLDNNEltwiseNode::isWithBroadcast() {
-    auto oDims = outputShapes[0].getStaticDims();
+    auto oDims = getOutputShapeAtPort(0).getStaticDims();
    for (size_t i = 0; i < inputShapes.size(); i++) {
-        auto iDims = inputShapes[i].getStaticDims();
+        auto iDims = getInputShapeAtPort(i).getStaticDims();
        if (iDims != oDims)
            return true;
    }
@ -1039,7 +1040,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
        return;

    // if dim rank is greater than the maximum possible, we should use the reference execution
-    canUseOptimizedImpl = mayiuse(x64::sse41) && inputShapes[0].getRank() <= MAX_ELTWISE_DIM_RANK;
+    canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;

    size_t expectedInputsNum = getOpInputsNum();
    for (auto& postOp : fusedWith) {
@ -1246,10 +1247,6 @@ std::vector<VectorDims> MKLDNNEltwiseNode::shapeInfer() const {
 }

 void MKLDNNEltwiseNode::prepareParams() {
-    if (!inputShapesDefined()) {
-        IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
-    }
-
    if (memPtrs.empty()) {
        for (auto i = 0; i < inputNum; i++)
            memPtrs.push_back(getParentEdgeAt(i)->getMemoryPtr());
@ -1520,7 +1517,7 @@ void MKLDNNEltwiseNode::offset_in_calc(VectorDims& offset, VectorDims& dims_in,
    }
 }

-void MKLDNNEltwiseNode::executeOptimized6D(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
+void MKLDNNEltwiseNode::executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
                                           const VectorDims &dims_out) const {
    parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4],
        [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
@ -1535,7 +1532,7 @@ void MKLDNNEltwiseNode::executeOptimized6D(const std::shared_ptr<jit_uni_eltwise
        });
 }

-void MKLDNNEltwiseNode::executeOptimizedGeneric(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
+void MKLDNNEltwiseNode::executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
                                                const VectorDims &dims_out, const size_t schedulerWorkAmount) const {
    parallel_nt(0, [&](const int ithr, const int nthr) {
        size_t start = 0, end = 0;
@ -1690,19 +1687,14 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
    specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
            getInputShapeAtPort(0) == getInputShapeAtPort(1);
    if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
-        if ((parentNode->getType() == FullyConnected) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
-                EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
-            fillScalesAndShifts(parentNode.get(), scales, shifts);
-        } else {
-            fillScalesAndShifts(parentNode.get(), scales, shifts, 16);
-        }
-        scalesSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
+        std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
    }
    MKLDNNNode::fuseInto(parentNode);
 }

-void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
+void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
    const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
+
    if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
        switch (getMKLDNNAlgorithm()) {
            case mkldnn::algorithm::eltwise_relu:
@ -1730,18 +1722,21 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
            default: IE_THROW() << errorPrefix << "as post operation is not supported";
        }
    } else {
+        const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
+        scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
+        if (getAlgorithm() != EltwisePrelu) {
+            shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
+        }
+
        if (initAsBinary) {
            auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
                if (data.empty())
                    IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";

-                auto outShape = outputShapes[0].getStaticDims();
-                auto chIdx = outputShapes[0].getRank() > 1 ? 1 : 0;
+                std::vector<size_t> binaryDims(postOpDims.size(), 1);
+                binaryDims[chIdx] = postOpDims[chIdx];

-                std::vector<size_t> binaryShape(outShape.size(), 1);
-                binaryShape[chIdx] = outShape[chIdx];
-
-                DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
+                DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
                ops.append_binary(alg, memoryDesc.getDnnlDesc());

                if (initBinaryMemory) {
@ -1752,19 +1747,19 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
            switch (getAlgorithm()) {
                case EltwiseAdd:
                case EltwiseSubtract:
-                    appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
+                    appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
                    break;
                case EltwiseMultiply:
                case EltwiseDivide:
-                    appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
+                    appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
                    break;
                case EltwiseMulAdd:
                case EltwisePowerStatic:
-                    appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
-                    appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
+                    appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
+                    appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
                    break;
                case EltwisePrelu:
-                    appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
+                    appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
                    break;
                default:
                    IE_THROW() << errorPrefix << "as post operation is not supported";
@ -1777,14 +1772,14 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
                case EltwiseDivide:
                case EltwiseMulAdd:
                case EltwisePowerStatic:
-                    if (scales.empty() || shifts.empty())
+                    if (scalesBuffer.empty() || shiftsBuffer.empty())
                        IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
-                    ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]);
+                    ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
                    break;
                case EltwisePrelu:
-                    if (scales.empty())
+                    if (scalesBuffer.empty())
                        IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
-                    ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr);
+                    ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
                    break;
                default:
                    IE_THROW() << errorPrefix << "as post operation is not supported";
@ -1810,7 +1805,7 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
        return true;
    };

-    if (!mayiuse(x64::sse41) || inputShapes[0].getRank() > MAX_ELTWISE_DIM_RANK)
+    if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK)
        return false;

    if (!isSuitableNode(this)) {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
@ -75,7 +75,7 @@ public:
    bool created() const override;
    bool canBeInPlace() const override;
    bool canFuse(const MKLDNNNodePtr& node) const override;
-    void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false) override;
+    void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
    void fuseInto(MKLDNNNodePtr& parentNode) override;
    InferenceEngine::Precision getRuntimePrecision() const override;

@ -116,7 +116,7 @@ private:
        void exec(const MKLDNNEltwiseNode& node, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override;
        const jit_eltwise_params& getJep() const override;

-        std::shared_ptr<jit_uni_eltwise_kernel> pKernel;
+        std::unique_ptr<jit_uni_eltwise_kernel> pKernel;
        size_t schedulerWorkAmount = 0;
    };

@ -149,15 +149,16 @@ private:

    std::vector<float> scales = {};
    std::vector<float> shifts = {};
-    size_t scalesSize = 0;
+    std::vector<float> scalesBuffer = {};
+    std::vector<float> shiftsBuffer = {};

    std::vector<MKLDNNMemoryPtr> memPtrs = {};

    static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>> initializers;

-    void executeOptimized6D(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
+    void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
                            const VectorDims &dims_out) const;
-    void executeOptimizedGeneric(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
+    void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
                                 const VectorDims &dims_out, const size_t schedulerWorkAmount) const;
    void executeReference(const jit_eltwise_params &jep, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out,
                          const size_t fullWorkAmount) const;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp
@ -22,6 +22,7 @@
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "utils/ngraph_utils.hpp"
+#include "common/cpu_memcpy.h"

 // Quantization ranges validation is switched off by default in order to avoid regressions on user side
 // #define VALIDATE_QUANTIZATION_RANGES
@ -825,23 +826,19 @@ private:

 bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-
        const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(op);
        if (!fq) {
            errorMessage = "Only opset1 FakeQuantize operation is supported";
            return false;
        }
-        if (fq->get_input_shape(0).size() < 2 || fq->get_input_shape(0).size() > 5) {
-            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(fq->get_input_shape(0).size());
+        const auto dataRank = fq->get_input_partial_shape(0).rank().get_length();
+        if (dataRank < 2 || dataRank > 5) {
+            errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(dataRank);
            return false;
        }
        for (size_t i = 1; i < fq->get_input_size(); i++) {
-            if (fq->get_input_shape(i).size() > 5) {
-                errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_shape(i).size());
+            if (fq->get_input_partial_shape(i).rank().get_length() > 5) {
+                errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_partial_shape(i).rank().get_length());
                return false;
            }
        }
@ -853,7 +850,7 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ng
        }
        for (size_t i = 1; i < fq->get_input_size(); i++) {
            size_t count_not_unit_axis = 0;
-            auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), fq->get_input_shape(0).size());
+            auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), dataRank);

            if (ngraph::shape_size(shape) != 1) {
                size_t not_unit_axis = 0;
@ -892,12 +889,12 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
        if (levels <= 1)
            IE_THROW() << errorPrefix << "supports 'levels' attribute greater than or equal to 2";

-        if (fq->get_input_size() != 5)
-            IE_THROW() << errorPrefix << "has incorrect number of input edges: " << fq->get_input_size();
-        if (fq->get_output_size() != 1)
-            IE_THROW() << errorPrefix << "has incorrect number of output edges: " << fq->get_output_size();
+        if (inputShapes.size() != 5)
+            IE_THROW() << errorPrefix << "has incorrect number of input edges: " << inputShapes.size();
+        if (outputShapes.size() != 1)
+            IE_THROW() << errorPrefix << "has incorrect number of output edges: " << outputShapes.size();

-        auto initAxisIdx = [&](const ngraph::Shape& inputDims) {
+        auto initAxisIdx = [&](const VectorDims& inputDims) {
            size_t axisIdx = 0;
            for (int i = 1; i < inputDims.size(); i++) {
                if (inputDims[i] > 1) {
@ -908,11 +905,11 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
            return axisIdx;
        };

-        const size_t dataNDims = fq->get_input_shape(0).size();
-        axis = dataNDims == 1 ? 0 : 1;
+        const size_t dataRank = getInputShapeAtPort(0).getRank();
+        axis = dataRank == 1 ? 0 : 1;
        int axisSize = -1;

-        const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataNDims);
+        const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataRank);
        auto inputLowAxis = initAxisIdx(ilShape);
        isInputLowBroadcasted = (ngraph::is_scalar(ilShape) || ilShape[inputLowAxis] == 1);
        if (!isInputLowBroadcasted) {
@ -920,7 +917,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
            axisSize = ilShape[inputLowAxis];
        }

-        const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataNDims);
+        const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataRank);
        auto inputHighAxis = initAxisIdx(ihShape);
        isInputHighBroadcasted = (ngraph::is_scalar(ihShape) || ihShape[inputHighAxis] == 1);
        if (!isInputHighBroadcasted) {
@ -928,7 +925,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
            axisSize = ihShape[inputHighAxis];
        }

-        const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataNDims);
+        const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataRank);
        auto outputLowAxis = initAxisIdx(olShape);
        isOutputLowBroadcasted = (ngraph::is_scalar(olShape) || olShape[outputLowAxis] == 1);
        if (!isOutputLowBroadcasted) {
@ -936,7 +933,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
            axisSize = olShape[outputLowAxis];
        }

-        const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataNDims);
+        const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataRank);
        auto outputHighAxis = initAxisIdx(ohShape);
        isOutputHighBroadcasted = (ngraph::is_scalar(ohShape) || ohShape[outputHighAxis] == 1);
        if (!isOutputHighBroadcasted) {
@ -949,11 +946,9 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
        auto outputLowAxisSize = ngraph::is_scalar(olShape) ? 1 : olShape[outputLowAxis];
        auto outputHighAxisSize = ngraph::is_scalar(ohShape) ? 1 : ohShape[outputHighAxis];

-        int axisRealSize = static_cast<int>(fq->get_input_shape(0)[axis]);
-        size_t axisPaddedSize = static_cast<size_t>(rnd_up(fq->get_input_shape(0)[axis], 16));
-
-        if (axisSize != -1 && axisSize != axisRealSize)
+        if (axisSize != -1 && !dimsEqualWeak(axisSize, getInputShapeAtPort(0).getDims()[axis])) {
            IE_THROW() << errorPrefix << "has different quantization axis size on 'data' and 'range' inputs";
+        }

        const auto inputLowNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(1));
        auto inputLowData = inputLowNode->cast_vector<float>();
@ -995,12 +990,24 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
        if (binarization) {
            algorithm = FQBinarization;

-            binarizationThresholds.resize(axisPaddedSize);
-            binarizationOutputMask.resize(axisPaddedSize);
+            if (isInputLowBroadcasted) {
+                binarizationThresholds.push_back(inputLowData[0]);
+            } else {
+                IE_ASSERT(axisSize != -1);
+                binarizationThresholds.resize(rnd_up(axisSize, 16));
+                for (int i = 0; i < axisSize; i++) {
+                    binarizationThresholds[i] = inputLowData[i];
+                }
+            }

-            for (int i = 0; i < axisRealSize; i++) {
-                binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i];
-                binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000;
+            if (isOutputHighBroadcasted) {
+                binarizationOutputMask.push_back(outputHighData[0] == 1.f ? 0xffffffff : 0x00000000);
+            } else {
+                IE_ASSERT(axisSize != -1);
+                binarizationOutputMask.resize(rnd_up(axisSize, 16));
+                for (int i = 0; i < axisSize; i++) {
+                    binarizationOutputMask[i] = outputHighData[i] == 1.f ? 0xffffffff : 0x00000000;
+                }
            }
        } else {
            auto allElementsAreEqual = [&](const std::vector<float> &data, size_t size) {
@ -1117,13 +1124,14 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod

 std::vector<LayoutType> MKLDNNFakeQuantizeNode::getDataFormats() const {
    // Special case for first FQ in the network
-    if (getInputShapeAtPort(0).getStaticDims()[getAxis()] == 3) {
+    const auto &dims = getInputShapeAtPort(0).getDims();
+    if (dims[getAxis()] == 3) {
        return { LayoutType::ncsp };
    } else {
        if (isBinarization()) {
            return { LayoutType::nspc };
        } else {
-            if (one_of(getInputShapeAtPort(0).getRank(), 4, 5)) {
+            if (one_of(dims.size(), 4, 5)) {
                if (getAxis() == 1) {
                    auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c;
                    return { blkFormat, LayoutType::nspc, LayoutType::ncsp };
@ -1235,81 +1243,139 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() {
    }
 }

-void MKLDNNFakeQuantizeNode::createPrimitive() {
-    auto config = getSelectedPrimitiveDescriptor()->getConfig();
-
-    auto inDims = config.inConfs[0].desc->getShape().getStaticDims();
-    jqp.c = inDims.size() > 1 ? inDims[1] : 1;
-
-    jqp.src_prc = config.inConfs[0].desc->getPrecision();
-    jqp.wei_prc = Precision::FP32;
-    jqp.dst_prc = config.outConfs[0].desc->getPrecision();
-
-    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    jqp.s_str = srcDesc->getStrides();
-
-    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    jqp.d_str = dstDesc->getStrides();
-
-    jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5);
-
-    jqp.op_type = getAlgorithm();
-
+bool MKLDNNFakeQuantizeNode::needPrepareParams() const {
    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
    if (!selectedPrimitiveDescriptor)
        IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";

-    if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
-        if (mayiuse(cpu::x64::avx512_common)) {
-            if (isBinarization())
-                quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(jqp));
-            else
-                quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx512_common>(jqp));
-        } else if (mayiuse(cpu::x64::avx2)) {
-            if (isBinarization())
-                quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx2>(jqp));
-            else
-                quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx2>(jqp));
-        } else if (mayiuse(cpu::x64::sse41)) {
-            if (isBinarization())
-                quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::sse41>(jqp));
-            else
-                quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::sse41>(jqp));
+    if (internalBlobMemory.empty() || (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref && inputShapesModified())) {
+        return true;
+    }
+
+    const auto axisSize = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[getAxis()];
+    const auto newPaddedSize = rnd_up(axisSize, 16);
+    const auto currPaddedSize = rnd_up(currentAxisSize, 16);
+
+    return newPaddedSize != currPaddedSize || (isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) &&
+                                                 axisSize != currentAxisSize);
+}
+
+void MKLDNNFakeQuantizeNode::prepareParams() {
+    const size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
+    const size_t newPaddedSize = rnd_up(axisSize, 16);
+    IE_ASSERT(newPaddedSize != 0);
+
+    if (internalBlobMemory.empty() || newPaddedSize != rnd_up(currentAxisSize, 16) ||
+            (isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) && axisSize != currentAxisSize)) {
+        DnnlBlockedMemoryDesc weightsDataDesc(Shape(VectorDims{newPaddedSize}), memory::data_type::f32, memory::format_tag::x);
+
+        if (isBinarization()) {
+            constexpr size_t numBinFqIntBlob = 2;
+            bool needUpdThr = false, needUpdMask = false;
+            if (isInputLowBroadcasted && axisSize != currentAxisSize) {
+                binarizationThresholds.resize(newPaddedSize);
+                std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + axisSize, binarizationThresholds[0]);
+                std::fill(binarizationThresholds.begin() + axisSize, binarizationThresholds.end(), 0);
+                needUpdThr = true;
+            }
+
+            if (isOutputHighBroadcasted && axisSize != currentAxisSize) {
+                binarizationOutputMask.resize(newPaddedSize);
+                std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + axisSize, binarizationOutputMask[0]);
+                std::fill(binarizationOutputMask.begin() + axisSize, binarizationOutputMask.end(), 0);
+                needUpdMask = true;
+            }
+
+            if (internalBlobMemory.empty() || needUpdThr) {
+                auto binarizationThresholdsDataMem = std::make_shared<MKLDNNMemory>(getEngine());
+                binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr());
+                if (internalBlobMemory.empty()) {
+                    internalBlobMemory.push_back(binarizationThresholdsDataMem);
+                } else {
+                    internalBlobMemory[0] = binarizationThresholdsDataMem;
+                }
+            }
+
+            if (internalBlobMemory.size() == (numBinFqIntBlob - 1) || needUpdMask) {
+                auto binarizationMaskDataMem = std::make_shared<MKLDNNMemory>(getEngine());
+                binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr());
+                if (internalBlobMemory.size() == (numBinFqIntBlob - 1)) {
+                    internalBlobMemory.push_back(binarizationMaskDataMem);
+                } else {
+                    internalBlobMemory[1] = binarizationMaskDataMem;
+                }
+            }
+        } else if (levels != 2) {
+            constexpr size_t numFqIntBlob = 6;
+
+            auto pushInternalBlob = [&](std::vector<float>& data, size_t idx) {
+                auto memory = std::make_shared<MKLDNNMemory>(getEngine());
+                bool needOverwrite = getInputShapeAtPort(0).getDims()[getAxis()] == Shape::UNDEFINED_DIM && data.size() == 1;
+                if (needOverwrite) {
+                    memory->Create(weightsDataDesc);
+                    float *ptr = reinterpret_cast<float *>(memory->GetPtr());
+                    std::fill(ptr, ptr + newPaddedSize, data[0]);
+                } else {
+                    if (data.size() == 1) {
+                        data.resize(newPaddedSize, data[0]);
+                    } else {
+                        data.resize(newPaddedSize);
+                    }
+                    memory->Create(weightsDataDesc, &data[0]);
+                }
+
+                if (internalBlobMemory.size() != numFqIntBlob) {
+                    internalBlobMemory.push_back(memory);
+                } else if (needOverwrite) {
+                    internalBlobMemory[idx] = memory;
+                }
+            };
+
+            pushInternalBlob(cropLow, 0);
+            pushInternalBlob(cropHigh, 1);
+            pushInternalBlob(inputScale, 2);
+            pushInternalBlob(inputShift, 3);
+            pushInternalBlob(outputScale, 4);
+            pushInternalBlob(outputShift, 5);
+        } else {
+            IE_THROW() << "Can't fill internal blob for FakeQuantize node with name: " << getName();
        }
    }
-    if (quantize_kernel)
-        quantize_kernel->create_ker();
+    currentAxisSize = axisSize;

-    size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
-    size_t axisPaddedSize = rnd_up(axisSize, 16);
+    auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
+    if (!selectedPrimitiveDescriptor)
+        IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
+    if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
+        const auto& config = getSelectedPrimitiveDescriptor()->getConfig();

-    DnnlBlockedMemoryDesc weightsDataDesc(Shape(InferenceEngine::SizeVector{axisPaddedSize}), memory::data_type::f32, memory::format_tag::x);
+        const auto& inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims();

-    if (isBinarization()) {
-        auto binarizationThresholdsDataMem = std::make_shared<MKLDNNMemory>(getEngine());
-        binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr());
-        internalBlobMemory.push_back(binarizationThresholdsDataMem);
+        jit_quantize_params jqp = {};
+        jqp.c = inDims.size() > 1 ? inDims[1] : 1;

-        auto binarizationMaskDataMem = std::make_shared<MKLDNNMemory>(getEngine());
-        binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr());
-        internalBlobMemory.push_back(binarizationMaskDataMem);
-    } else if (levels != 2) {
-        auto pushInternalBlob = [&](std::vector<float>& data) {
-            if (data.size() == 1)
-                data.resize(axisPaddedSize, data[0]);
-            else
-                data.resize(axisPaddedSize);
-            auto memory = std::make_shared<MKLDNNMemory>(getEngine());
-            memory->Create(weightsDataDesc, &data[0]);
-            internalBlobMemory.push_back(memory);
-        };
+        jqp.src_prc = config.inConfs[0].desc->getPrecision();
+        jqp.wei_prc = Precision::FP32;
+        jqp.dst_prc = config.outConfs[0].desc->getPrecision();

-        pushInternalBlob(cropLow);
-        pushInternalBlob(cropHigh);
-        pushInternalBlob(inputScale);
-        pushInternalBlob(inputShift);
-        pushInternalBlob(outputScale);
-        pushInternalBlob(outputShift);
+        auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+        jqp.s_str = srcDesc->getStrides();
+
+        auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+        jqp.d_str = dstDesc->getStrides();
+
+        jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5);
+
+        jqp.op_type = getAlgorithm();
+
+        execPtr = std::make_shared<FakeQuantizeJitExecutor>(jqp);
+    }
+}
+
+void MKLDNNFakeQuantizeNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        prepareParams();
+        updateLastInputDims();
    }
 }

@ -1322,8 +1388,8 @@ void MKLDNNFakeQuantizeNode::executeReference() {
    auto srcDims = srcMemory->getStaticDims();
    auto dstDims = dstMemory->getStaticDims();

-    auto s_str = jqp.s_str;
-    auto d_str = jqp.d_str;
+    auto s_str = srcMemory->GetDescWithType<BlockedMemoryDesc>()->getStrides();
+    auto d_str = dstMemory->GetDescWithType<BlockedMemoryDesc>()->getStrides();

    const int N = srcDims[0];
    const int C = srcDims.size() > 1 ? srcDims[1] : 1;
@ -1331,7 +1397,7 @@ void MKLDNNFakeQuantizeNode::executeReference() {
    const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1;
    const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1;

-    if (jqp.op_type == FQBinarization) {
+    if (isBinarization()) {
        size_t tmp = s_str[s_str.size() - 1];
        for (int i = s_str.size() - 1; i > 1; i--) {
            s_str[i] = s_str[i - 1];
@ -1430,7 +1496,7 @@ void MKLDNNFakeQuantizeNode::executeReference() {
    }
 }

-void MKLDNNFakeQuantizeNode::executeBinarization() {
+void MKLDNNFakeQuantizeNode::executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
    auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
    auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();

@ -1442,6 +1508,7 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {

    auto src_dims = srcMemory->getStaticDims();

+    const auto &jqp = pKernel->jqp_;
    std::vector<size_t> s_str = jqp.s_str;
    size_t tmp = s_str[s_str.size() - 1];
    for (int i = s_str.size() - 1; i > 1; i--) {
@ -1465,11 +1532,11 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {
        arg.output_mask = &output_mask[0];
        arg.work_amount = (size_t)C;

-        (*quantize_kernel)(&arg);
+        (*pKernel)(&arg);
    });
 }

-void MKLDNNFakeQuantizeNode::executeQuantization() {
+void MKLDNNFakeQuantizeNode::executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
    auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
    auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();

@ -1490,6 +1557,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
    int blk_size = (srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5))
                    ? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8;

+    const auto &jqp = pKernel->jqp_;
    auto src_type_size = jqp.src_prc.size();
    auto dst_type_size = jqp.dst_prc.size();

@ -1536,7 +1604,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
            arg.block_size = (size_t) blk_size;
            arg.work_amount = (size_t)H;

-            (*quantize_kernel)(&arg);
+            (*pKernel)(&arg);
        });
    } else {
        parallel_nd(N, CB, D, H, [&](int n, int cb, int d, int h) {
@ -1564,7 +1632,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
            arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t) blk_size : nstl::min(blk_size, C - c);
            arg.work_amount = (size_t) W;

-            (*quantize_kernel)(&arg);
+            (*pKernel)(&arg);
        });
    }
 }
@ -1575,29 +1643,40 @@ void MKLDNNFakeQuantizeNode::execute(mkldnn::stream strm) {
        IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";

    if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
-        if (jqp.op_type == FQBinarization)
-            executeBinarization();
-        else
-            executeQuantization();
+        execPtr->exec(*this);
    } else {
        executeReference();
    }
 }

-void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
+void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
    // MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
    // by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
    // Otherwise it can lead to buffer over-read and performance penalties due to denormals.
    const size_t bufferAlignment = 16;

    if (getAlgorithm() == FQBinarization) {
+        const auto realAxisSize = postOpDims[postOpDims.size() > 1 ? 1 : 0];
+        const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment);
        if (!isPostOpDataInitialized) {
-            size_t paddedSize = rnd_up(binarizationThresholds.size(), bufferAlignment);
-            binarizationThresholds.resize(paddedSize, 0);
-            binarizationOutputMask.resize(paddedSize, 0);
+            binarizationThresholds.resize(axisPaddedSize, 0);
+            binarizationOutputMask.resize(axisPaddedSize, 0);
+
+            if (isInputLowBroadcasted) {
+                std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
+                std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
+            }
+            if (isOutputHighBroadcasted) {
+                std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
+                std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
+            }
        }

        ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);
+
+        if (!isInputLowBroadcasted && !isOutputHighBroadcasted) {
+            isPostOpDataInitialized = true;
+        }
    } else {
        if (!isPostOpDataInitialized) {
            if (cropLow.size() > 1)
@ -1626,10 +1705,10 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBin

        if (initAsBinary) {
            auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) {
-                auto outShape = outputShapes[0].getStaticDims();
-                auto chIdx = outputShapes[0].getRank() > 1 ? 1 : 0;
+                const auto rank = getOutputShapeAtPort(0).getRank();
+                auto chIdx = rank > 1 ? 1 : 0;

-                std::vector<size_t> binaryShape(outShape.size(), 1);
+                std::vector<size_t> binaryShape(rank, 1);
                binaryShape[chIdx] = dataSize;

                DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
@ -1654,10 +1733,45 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBin
        } else {
            ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData);
        }
-    }

-    if (!isPostOpDataInitialized)
        isPostOpDataInitialized = true;
+    }
+}
+
+MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) {
+    bool isBinarization = _jqp.op_type == FQBinarization;
+    if (mayiuse(cpu::x64::avx512_common)) {
+        if (isBinarization)
+            pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(_jqp));
+        else
+            pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx512_common>(_jqp));
+    } else if (mayiuse(cpu::x64::avx2)) {
+        if (isBinarization)
+            pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx2>(_jqp));
+        else
+            pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx2>(_jqp));
+    } else if (mayiuse(cpu::x64::sse41)) {
+        if (isBinarization)
+            pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::sse41>(_jqp));
+        else
+            pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::sse41>(_jqp));
+    } else {
+        IE_THROW() << "Can't create jit fake quantize kernel";
+    }
+    if (pKernel) {
+        pKernel->create_ker();
+    }
+}
+
+void MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::exec(const MKLDNNFakeQuantizeNode& node) {
+    if (!pKernel)
+        IE_THROW() << "Can't execute, kernel for fake quantize node is not compiled";
+
+    if (pKernel->jqp_.op_type == FQBinarization) {
+        node.executeBinarization(pKernel);
+    } else {
+        node.executeQuantization(pKernel);
+    }
 }

 bool MKLDNNFakeQuantizeNode::created() const {
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h
@ -73,11 +73,15 @@ public:
    void createPrimitive() override;
    bool created() const override;
    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }

    size_t getAxis() const { return axis; }

    bool isBinarization() const { return getAlgorithm() == Algorithm::FQBinarization; }

+    bool needPrepareParams() const override;
+    void prepareParams() override;
+
    const float* getBinarizationTresholdsPtr() const { return &binarizationThresholds[0]; }
    const float* getBinarizationOutputMaskPtr() const { return reinterpret_cast<const float*>(&binarizationOutputMask[0]); }
    size_t getBinarizationTresholdsSize() const { return binarizationThresholds.size(); }
@ -117,7 +121,8 @@ public:
    InferenceEngine::Precision getInputPrecision() const { return inputPrecision; }
    InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; }

-    void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false) override;
+    void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = -1, bool initAsBinary = false,
+                       bool initBinaryMemory = false) override;

    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;

@ -129,11 +134,24 @@ public:
    MKLDNNMemoryPtr outputShiftMemory;

 private:
+    struct FakeQuantizeExecutor {
+        virtual void exec(const MKLDNNFakeQuantizeNode& node) = 0;
+        virtual ~FakeQuantizeExecutor() = default;
+    };
+    using executorPtr = std::shared_ptr<FakeQuantizeExecutor>;
+    executorPtr execPtr = nullptr;
+
+    struct FakeQuantizeJitExecutor : public FakeQuantizeExecutor {
+        FakeQuantizeJitExecutor(const jit_quantize_params &_jqp);
+        void exec(const MKLDNNFakeQuantizeNode& node) override;
+        std::unique_ptr<jit_uni_quantize_kernel> pKernel;
+    };
+
    void init() override;
    std::vector<LayoutType> getDataFormats() const;
    void executeReference();
-    void executeBinarization();
-    void executeQuantization();
+    void executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
+    void executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;

    size_t levels = 0;

@ -170,15 +188,12 @@ private:
    bool isOutputLowBroadcasted = false;
    bool isOutputHighBroadcasted = false;

+    size_t currentAxisSize = 0;
    size_t axis = 0;

    InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
    InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;

-    jit_quantize_params jqp = {};
-
-    std::shared_ptr<jit_uni_quantize_kernel> quantize_kernel = nullptr;
-
    std::string errorPrefix;
 };

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@ -14,6 +14,7 @@
 #include "utils/general_utils.h"
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -189,7 +190,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
    for (auto &node : fusedWith) {
        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
        if (fakeQuantizeNode) {
-            fakeQuantizeNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
+            // no need to fill post ops dims for fq, make sense only for bin fq
+            fakeQuantizeNode->appendPostOps(ops, VectorDims{}, -1, initAsBinary, initBinaryMemory);
            if (initBinaryMemory) {
                if (fakeQuantizeNode->cropHighMemory)
                    binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
@ -209,7 +211,9 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini

        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
+            // TODO [DS]: change to shape from memory
+            constexpr int align = -1;
+            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
            if (initBinaryMemory) {
                if (eltwiseNode->scalesMemory)
                    binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp
@ -25,6 +25,7 @@

 #include <ngraph/opsets/opset1.hpp>
 #include <ngraph/opsets/opset4.hpp>
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -2394,7 +2395,9 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe

        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
+            constexpr int align = 16;
+            // TODO [DS]: change to shape from memory
+            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
            continue;
        }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp
@ -24,6 +24,7 @@
 #include "utils/general_utils.h"
 #include "memory_desc/cpu_memory_desc_utils.h"
 #include "mkldnn_extension_utils.h"
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -89,7 +90,8 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights

    for (auto &node : fusedWith) {
        if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
-            eltwiseNode->appendPostOps(ops);
+            // TODO [DS]: change to shape from memory
+            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
            continue;
        }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
@ -24,6 +24,7 @@

 #include <ngraph/opsets/opset6.hpp>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -604,11 +605,6 @@ private:

 bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
-            return false;
-        }
-
        if (op->get_output_partial_shape(0).rank().is_dynamic()) {
            errorMessage = "Unsupported dynamic input rank.";
            return false;
@ -680,7 +676,6 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
        IE_THROW(NotImplemented) << errorMessage;
    }

-    const ngraph::Shape& inDataShape = op->input_value(0).get_shape();
    if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v6::MVN>(op)) {
        normalizeVariance_ = mvnOp->get_normalize_variance();
        epsValue_ = mvnOp->get_eps();
@ -689,27 +684,25 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
            epsMode_ = OUTSIDE_SQRT;
        }

-        acrossChannels_ = false;
-        const auto& inDataShapeSize = inDataShape.size();
+        initAcrossChannels_ = false;
+        const auto& inDataShapeSize = getInputShapeAtPort(0).getRank();
        if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1)
-            acrossChannels_ = true;
+            initAcrossChannels_ = true;
    } else if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v0::MVN>(op)) {
        normalizeVariance_ = mvnOp->get_normalize_variance();
        epsValue_ = mvnOp->get_eps();
        epsMode_ = INSIDE_SQRT;
-        acrossChannels_ = mvnOp->get_across_channels();
+        initAcrossChannels_ = mvnOp->get_across_channels();
    }
+    execAcrossChannels_ = initAcrossChannels_;
 }

-void MKLDNNMVNNode::getSupportedDescriptors() {
-}
+void MKLDNNMVNNode::getSupportedDescriptors() {}

 void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
    if (!supportedPrimitiveDescriptors.empty())
        return;

-    setPostOps(attr, true);
-
    Precision inputPrecision = getOriginalInputPrecisionAtPort(0);
    Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
    if (!mayiuse(avx512_core)) {
@ -729,7 +722,8 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
    src_data_size = inputPrecision.size();
    dst_data_size = outputPrecision.size();

-    bool canBeInplace = (src_data_size == dst_data_size) &&
+    // TODO [DS]: inplace
+    bool canBeInplace = !isDynamicNode() && (src_data_size == dst_data_size) &&
                        (getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) &&
                        !getParentEdgeAt(0)->getParent()->isConstant();

@ -788,7 +782,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
    pushDesc(LayoutType::ncsp, impl_type);
 }

-void MKLDNNMVNNode::createPrimitive() {
+void MKLDNNMVNNode::prepareParams() {
    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@ -800,74 +794,87 @@ void MKLDNNMVNNode::createPrimitive() {

    const SizeVector in_dims = srcMemPtr->getStaticDims();
    transformTo5DCase(in_dims);
-    auto selectedPD = getSelectedPrimitiveDescriptor();
-    auto jcp = jit_mvn_config_params();
-    jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
-    jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
-    jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
-    jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
-    jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
-    jcp.normalize_variance = normalizeVariance_;
-    jcp.across_channels = acrossChannels_;
-    int N = 0;
-    std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = shape5D;

-    if (mayiuse(cpu::x64::avx512_common)) {
-        mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
+    setPostOps(attr, true);

-        jcp.normalize_variance = false;
-        mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
-        if (normalizeVariance_) {
-            jcp.normalize_variance = true;
-            mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
+    if (mayiuse(cpu::x64::sse41)) {
+        auto selectedPD = getSelectedPrimitiveDescriptor();
+        auto jcp = jit_mvn_config_params();
+        jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
+        jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
+        jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
+        jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
+        jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
+        jcp.normalize_variance = normalizeVariance_;
+        jcp.across_channels = execAcrossChannels_;
+        int N = 0;
+        std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = shape5D;
+
+        if (mayiuse(cpu::x64::avx512_common)) {
+            mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
+
+            jcp.normalize_variance = false;
+            mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
+            if (normalizeVariance_) {
+                jcp.normalize_variance = true;
+                mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
+            }
+        } else if (mayiuse(cpu::x64::avx2)) {
+            mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
+
+            jcp.normalize_variance = false;
+            mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
+            if (normalizeVariance_) {
+                jcp.normalize_variance = true;
+                mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
+            }
+        } else if (mayiuse(cpu::x64::sse41)) {
+            mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
+
+            jcp.normalize_variance = false;
+            mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
+            if (normalizeVariance_) {
+                jcp.normalize_variance = true;
+                mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
+            }
        }
-    } else if (mayiuse(cpu::x64::avx2)) {
-        mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));

-        jcp.normalize_variance = false;
-        mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
-        if (normalizeVariance_) {
-            jcp.normalize_variance = true;
-            mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
-        }
-    } else if (mayiuse(cpu::x64::sse41)) {
-        mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
+        if (mvn_kernel)
+            mvn_kernel->create_ker();

-        jcp.normalize_variance = false;
-        mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
-        if (normalizeVariance_) {
-            jcp.normalize_variance = true;
-            mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
+        if (mvn_mean_kernel)
+            mvn_mean_kernel->create_ker();
+
+        if (mvn_variance_kernel)
+            mvn_variance_kernel->create_ker();
        }
+}
+
+void MKLDNNMVNNode::createPrimitive() {
+    if (inputShapesDefined()) {
+        if (needPrepareParams())
+            prepareParams();
+        updateLastInputDims();
    }
-
-    if (mvn_kernel)
-        mvn_kernel->create_ker();
-
-    if (mvn_mean_kernel)
-        mvn_mean_kernel->create_ker();
-
-    if (mvn_variance_kernel)
-        mvn_variance_kernel->create_ker();
 }

 void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) {
    switch (shape.size()) {
-        // for 1 and 2 rank, if acrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure.
+        // for 1 and 2 rank, if initAcrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure.
        // otherwise there are not enough data in spatial dimension to process in one kernel.
        case 1 :  // C
-            if (acrossChannels_) {
+            if (initAcrossChannels_) {
                shape5D = std::make_tuple(1, 1, 1, 1, shape[0]);
-                acrossChannels_ = false;
+                execAcrossChannels_ = false;
                break;
            } else {
                shape5D = std::make_tuple(1, shape[0], 1, 1, 1);
                break;
            }
        case 2 :  // NC
-            if (acrossChannels_) {
+            if (initAcrossChannels_) {
                shape5D = std::make_tuple(1, shape[0], 1, shape[1], 1);
-                acrossChannels_ = false;
+                execAcrossChannels_ = false;
                break;
            } else {
                shape5D = std::make_tuple(shape[0], shape[1], 1, 1, 1);
@ -882,6 +889,8 @@ void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) {

 void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
    mkldnn::post_ops ops;
+    VectorDims postOpDims(5);
+    std::tie(postOpDims[0], postOpDims[1], postOpDims[2], postOpDims[3], postOpDims[4]) = shape5D;
    for (auto &node : fusedWith) {
        auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
        if (fakeQuantizeNode) {
@ -891,7 +900,8 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {

        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
+            constexpr int align = 16;
+            eltwiseNode->appendPostOps(ops, postOpDims, align);
            continue;
        }
        IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
@ -906,22 +916,21 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) {
    uint8_t *dst_data = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
    uint8_t *src_data = reinterpret_cast<uint8_t*>(srcMemPtr->GetPtr());

-    auto dim = srcMemPtr->getStaticDims();
    if (mayiuse(cpu::x64::sse41)) {
        if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) {
            IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform.";
        }
        if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) {
-            mvn_pln(src_data, dst_data, dim);
+            mvn_pln(src_data, dst_data);
        } else {
-            mvn_blk(src_data, dst_data, dim);
+            mvn_blk(src_data, dst_data);
        }
    } else {
-        mvn_ref(src_data, dst_data, dim);
+        mvn_ref(src_data, dst_data);
    }
 }

-void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
+void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data) {
    size_t blk_size = 1;  // blk size in vmm
    if (mayiuse(cpu::x64::avx512_common)) {
        blk_size = 16;
@ -943,7 +952,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si

    for (size_t b = 0lu; b < N; b++) {
        size_t cb = b * C3;
-        if (acrossChannels_) {
+        if (execAcrossChannels_) {
            // Calculate mean value for one instance in batch
            // Parallel sum for each channel
            float C3inv = 1.f / static_cast<float>(C3);
@ -1056,7 +1065,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
    }
 }

-void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
+void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data) {
    const float *src_data_ptr = reinterpret_cast<const float *>(src_data);
    float *dst_data_ptr = reinterpret_cast<float *>(dst_data);
    size_t N = 0; size_t C = 0; size_t D = 0; size_t H = 0; size_t W = 0;
@ -1068,7 +1077,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si

    for (size_t b = 0lu; b < N; b++) {
        size_t cb = b * C3;
-        if (acrossChannels_) {
+        if (execAcrossChannels_) {
            // Parallel sum for each channel for mean
            float C3inv = 1.f / static_cast<float>(C3);
            float mean_temp = 0.0f;
@ -1154,7 +1163,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
    }
 }

-void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
+void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data) {
    size_t blk_size = 1;  // channel blk for memory layout
    if (mayiuse(cpu::x64::avx512_common)) {
        blk_size = 16;
@ -1176,7 +1185,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
    size_t C5 = C * D * H * W;

    size_t threads_num = parallel_get_num_threads();
-    size_t aux_buffer_size = acrossChannels_ ? blk_size : rnd_up(C, blk_size);
+    size_t aux_buffer_size = execAcrossChannels_ ? blk_size : rnd_up(C, blk_size);
    std::vector<float> mean_buffer(aux_buffer_size * threads_num);
    std::vector<float> variance_buffer(aux_buffer_size * threads_num);

@ -1185,7 +1194,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si

    for (size_t b = 0lu; b < N; b++) {
        size_t b_offset = is_nhwc ? b * C5 : b * C3;
-        if (acrossChannels_) {
+        if (execAcrossChannels_) {
            // mean for this instance in batch
            float C5inv = 1.f / static_cast<float>(C5);
            float mean_temp = 0.0f;
@ -1213,7 +1222,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
                arg.src_stride = src_stride_size;
                arg.work_amount = static_cast<size_t>(W);
                arg.oc_off = static_cast<size_t>(cb * blk_size * sizeof(float));  // for tail process
-                (*mvn_mean_kernel)(&arg);  // for W * blk
+                (*mvn_mean_kernel)(&arg); // for W * blk

                size_t min_cb = (std::min)(blk_size, C - cb * blk_size);
                for (int i = 0; i < min_cb; i++)
@ -1401,7 +1410,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const {
                                            EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
                                            EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu);
    if ((inputRank == 1 && !unaryEltwise) ||
-        (inputRank == 2 && !unaryEltwise && acrossChannels_)) {
+        (inputRank == 2 && !unaryEltwise && initAcrossChannels_)) {
        return false;
    }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h
@ -80,12 +80,13 @@ public:
    void createPrimitive() override;
    bool created() const override;
    void execute(mkldnn::stream strm) override;
+    void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
    bool canBeInPlace() const override {
        return false;
    }

    inline bool getAcrossChannels() const {
-        return acrossChannels_;
+        return initAcrossChannels_;
    }

    inline bool getNormalizeVariance() const {
@ -94,12 +95,14 @@ public:

    bool canFuse(const MKLDNNNodePtr& node) const override;

+    void prepareParams() override;
+
 private:
-    void mvn_pln(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
+    void mvn_pln(const uint8_t *src_data, uint8_t *dst_data);

-    void mvn_blk(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
+    void mvn_blk(const uint8_t *src_data, uint8_t *dst_data);

-    void mvn_ref(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
+    void mvn_ref(const uint8_t *src_data, uint8_t *dst_data);

    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false);

@ -107,7 +110,8 @@ private:

    std::tuple<size_t, size_t, size_t, size_t, size_t> shape5D;

-    bool acrossChannels_ = false;
+    bool initAcrossChannels_ = false;
+    bool execAcrossChannels_ = false;
    bool normalizeVariance_ = true;
    float epsValue_ = 1e-9f;
    // Defines way to add epsilon: inside sqrt or outside.
@ -122,8 +126,6 @@ private:

    mkldnn::primitive_attr attr;

-    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
-
    std::shared_ptr<jit_uni_mvn_mean_variance_kernel> mvn_mean_kernel;
    std::shared_ptr<jit_uni_mvn_mean_variance_kernel> mvn_variance_kernel;
    std::shared_ptr<jit_uni_mvn_kernel> mvn_kernel;
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@ -22,6 +22,7 @@

 #include <ngraph/opsets/opset1.hpp>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "utils/cpu_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@ -811,7 +812,9 @@ void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr &attr, bool initWe

        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
        if (eltwiseNode) {
-            eltwiseNode->appendPostOps(ops);
+            // TODO [DS]: change to shape from memory
+            constexpr int align = 16;
+            eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
            continue;
        }

--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@ -102,10 +102,6 @@ void MKLDNNReorderNode::createPrimitive() {

 void MKLDNNReorderNode::prepareParams() {
    if (!isOptimized) {
-        if (!inputShapesDefined()) {
-            IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
-        }
-
        auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
        auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
        if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp
@ -124,10 +124,6 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() {
 }

 void MKLDNNSelectNode::prepareParams() {
-    if (!inputShapesDefined()) {
-        IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
-    }
-
    const auto &_conditionDims = getParentEdgesAtPort(CONDITION)[0]->getMemory().getStaticDims();
    const auto &_thenDims = getParentEdgesAtPort(THEN)[0]->getMemory().getStaticDims();
    const auto &_elseDims = getParentEdgesAtPort(ELSE)[0]->getMemory().getStaticDims();
--- a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
+++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp
@ -32,9 +32,13 @@ inline std::vector<size_t> getNormalizedDimsBySize(const InferenceEngine::SizeVe
 * shape on which should be broadcastable
 * @param secondInputDims
 * shape which should be broadcastable
+* @param weakComparison
+* flag which specify how we compare C dims if value is undefined (weak or strong)
 * @return true if broadcastable, false otherwise.
 */
-inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims) {
+inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims,
+                                                 bool weakComparison = false) {
+    bool (*dimsEqual)(size_t, size_t) = weakComparison ? static_cast<bool (*)(size_t, size_t)>(dimsEqualWeak) : dimsEqualStrong;
    if (secondInputDims.size() > firstInputDims.size())
        return false;
    if (std::accumulate(secondInputDims.begin(), secondInputDims.end(), 1, std::multiplies<size_t>()) == 1)
@ -42,7 +46,7 @@ inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVect

    std::vector<size_t> normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size());
    for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) {
-        if ((i == 1 && normalizedSecondInputDims[i] != firstInputDims[1]) || (i != 1 && normalizedSecondInputDims[i] != 1))
+        if ((i == 1 && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[1])) || (i != 1 && normalizedSecondInputDims[i] != 1))
            return false;
    }
    return true;
@ -90,4 +94,34 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
    return precision;
 }

+/**
+* @brief Return aligned buffer by targetSize.
+* If buffer has size 1, values are broadcasted with targetSize size.
+* If aligned buffer size > targetSize, other values filled by zero.
+* @param targetSize
+* target size buffer
+* @param buffer
+* buffer to be aligned
+* @param align
+* alignment for targetSize
+* @return aligned buffer
+*/
+inline std::vector<float> makeAlignedBuffer(size_t targetSize, const std::vector<float> &buffer, int align = -1) {
+    if (buffer.empty()) {
+        IE_THROW() << "Can't align buffer, becuase buffer is empty";
+    }
+
+    auto alignedBuffer = buffer;
+    if (align == -1) {
+        align = targetSize;
+    }
+    const size_t bufferSizeAligned = rnd_up(targetSize, align);
+
+    alignedBuffer.resize(bufferSizeAligned, 0);
+    if (buffer.size() == 1) {
+        std::fill(alignedBuffer.begin() + 1, alignedBuffer.begin() + targetSize, buffer[0]);
+    }
+    return alignedBuffer;
+}
+
 }  // namespace MKLDNNPlugin
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@ -143,6 +143,15 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*CanSetInBlobWithDifferentPrecision/netPRC=BIN.*)",
        R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=(I4|U4).*)",
        R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=BIN.*)",
+
+        // Issue: 69086
+        // need to add support convert BIN -> FP32
+        // if we set output precision as BIN, when we create output blob precision looks like UNSPECIFIED
+        R"(.*smoke_FakeQuantizeLayerCPUTest.*bin.*)",
+        // Issue: 69088
+        // bad accuracy
+        R"(.*smoke_FakeQuantizeLayerCPUTest_Decompos.
+            *IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
    };

 #define FIX_62820 0
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp
@ -2,33 +2,106 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include <shared_test_classes/single_layer/conversion.hpp>
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"

-using namespace LayerTestsDefinitions;
 using namespace InferenceEngine;
+using namespace ngraph;
+using namespace CPUTestUtils;

-namespace CPULayerTestsDefinitions  {
+namespace CPULayerTestsDefinitions {

-class ConvertCPULayerTest : public ConversionLayerTest {};
+using convertLayerShapeDefinition = std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>;
+
+using convertLayerTestParamsSet = std::tuple<convertLayerShapeDefinition,  // input shapes
+                                        InferenceEngine::Precision,        // input precision
+                                        InferenceEngine::Precision,        // output precision
+                                        CPUSpecificParams>;
+
+class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
+                            virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
+        convertLayerShapeDefinition shapes;
+        InferenceEngine::Precision inPrc, outPrc;
+        CPUSpecificParams cpuParams;
+        std::tie(shapes, inPrc, outPrc, cpuParams) = obj.param;
+
+        std::ostringstream result;
+        if (!shapes.first.empty()) {
+            result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
+        }
+        result << "TS=";
+        for (const auto& shape : shapes.second) {
+            result << CommonTestUtils::vec2str(shape) << "_";
+        }
+        result << "inputPRC=" << inPrc.name() << "_";
+        result << "targetPRC=" << outPrc.name() << "_";
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        convertLayerShapeDefinition shapes;
+        InferenceEngine::Precision inPrc, outPrc;
+        CPUSpecificParams cpuParams;
+        std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+
+        selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
+
+        for (size_t i = 0; i < shapes.second.size(); i++) {
+            targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
+        }
+        inputDynamicShapes = shapes.first;
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
+        auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
+        auto params = ngraph::builder::makeParams(ngPrc, {targetStaticShapes[0][0]});
+        auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
+
+        function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
+    }
+};

 TEST_P(ConvertCPULayerTest, CompareWithRefs) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED()

-    ConversionParamsTuple params = GetParam();
-    inPrc = std::get<2>(params);
-    outPrc = std::get<3>(params);
-
    Run();
+
+    CheckPluginRelatedResults(executableNetwork, "Convert");
 }

-namespace {
-const std::vector<ngraph::helpers::ConversionTypes> conversionOpTypes = {
-    ngraph::helpers::ConversionTypes::CONVERT,
-    ngraph::helpers::ConversionTypes::CONVERT_LIKE,
+std::vector<convertLayerShapeDefinition> inShapes_4D = {
+        {{}, {{1, 2, 3, 4}}},
+        {
+            // dynamic
+            {{-1, -1, -1, -1}},
+            // target
+            {
+                {2, 4, 4, 1},
+                {2, 17, 5, 4},
+                {1, 2, 3, 4}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
+            // target
+            {
+                {2, 17, 5, 4},
+                {5, 2, 3, 2},
+                {1, 10, 4, 1},
+            }
+        }
 };

-const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
-
 // List of precisions natively supported by mkldnn.
 const std::vector<Precision> precisions = {
        Precision::U8,
@ -38,26 +111,19 @@ const std::vector<Precision> precisions = {
        Precision::BF16
 };

-INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest_From_BF16, ConvertCPULayerTest,
-                        ::testing::Combine(
-                                ::testing::ValuesIn(conversionOpTypes),
-                                ::testing::Values(inShape),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::ValuesIn(precisions),
-                                ::testing::Values(Layout::ANY),
-                                ::testing::Values(Layout::ANY),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConversionLayerTest::getTestCaseName);
+std::vector<CPUSpecificParams> memForm4D = {
+        CPUSpecificParams({nchw}, {nchw}, {}, {}),
+        CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
+        CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
+        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
+};

-INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest_To_BF16, ConvertCPULayerTest,
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
                        ::testing::Combine(
-                                ::testing::ValuesIn(conversionOpTypes),
-                                ::testing::Values(inShape),
+                                ::testing::ValuesIn(inShapes_4D),
                                ::testing::ValuesIn(precisions),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::Values(Layout::ANY),
-                                ::testing::Values(Layout::ANY),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConversionLayerTest::getTestCaseName);
-} // namespace
-} // namespace CPULayerTestsDefinitions
+                                ::testing::ValuesIn(precisions),
+                                ::testing::ValuesIn(memForm4D)),
+                        ConvertCPULayerTest::getTestCaseName);
+
+} // namespace CPULayerTestsDefinitions
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp
@ -12,15 +12,18 @@ using namespace CPUTestUtils;

 namespace CPULayerTestsDefinitions {

+using inputShapes = std::tuple<std::vector<ngraph::PartialShape>, // dynamic input shapes
+                               std::vector<ngraph::Shape>,        // target input shapes
+                               std::vector<SizeVector>>;          // range input shapes
+
 using fqSpecificParams = std::tuple<int64_t,                  // 'data' input low bounds
                                    int64_t,                  // 'data' input high bounds
                                    std::vector<float>,       // output low
                                    std::vector<float>,       // output high
-                                    std::vector<SizeVector>,  // 'range' inputs shapes
                                    size_t>;                  // levels

 using fqLayerTestParamsSet = std::tuple<fqSpecificParams,
-                                        SizeVector,                                        // 'data' input shape
+                                        inputShapes,                                       // input shapes
                                        Precision,                                         // input precision
                                        std::pair<std::vector<float>, std::vector<float>>, // il and ih values
                                        bool,                                              // should be decomposed
@ -31,30 +34,39 @@ class FakeQuantizeLayerCPUTest : public testing::WithParamInterface<fqLayerTestP
 public:
    static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
        fqSpecificParams fqParams;
-        SizeVector inDataShape;
+        inputShapes testShapes;
        Precision inPrec;
        std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
        bool shouldBeDecomposed;
        CPUSpecificParams cpuParams;
-        std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
+        std::tie(fqParams, testShapes, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
+
+        std::vector<ngraph::PartialShape> dynamicShapes;
+        std::vector<ngraph::Shape> targetShapes;
+        std::vector<SizeVector> ranges;
+        std::tie(dynamicShapes, targetShapes, ranges) = testShapes;

        int64_t inDataLowBounds, inDataHighBounds;
        std::vector<float> inputLow, inputHigh, outputLow, outputHigh;
-        std::vector<SizeVector> inRangesShapes;
        size_t levels;
        inputLow = inputRangesValues.first;
        inputHigh = inputRangesValues.second;
-        std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, inRangesShapes, levels) = fqParams;
+        std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, levels) = fqParams;

        std::ostringstream result;
-        result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_";
+        if (!dynamicShapes.empty()) {
+            result << "IS=" << CommonTestUtils::partialShape2str(dynamicShapes) << "_";
+        }
+        result << "TS=";
+        for (const auto& shape : targetShapes) {
+            result << "(" << CommonTestUtils::vec2str(shape) << ")_";
+        }
+        result << "RS=";
+        for (const auto& data : ranges) {
+            result << "(" << CommonTestUtils::vec2str(data) << ")_";
+        }
        result << "inPrec=" << inPrec.name() << "_";

-        std::string rs = "";
-        for (size_t i = 0; i < inRangesShapes.size(); i++) {
-            rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_";
-        }
-        result << "RS=" << rs;
        result << "LOW_BOUNDS=" << inDataLowBounds << "_";
        result << "HIGH_BOUNDS=" << inDataHighBounds << "_";
        result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_";
@ -75,7 +87,9 @@ public:
        const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo();
        auto input = inDataMap.begin();

-        Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds);
+        const auto td = input->second->getTensorDesc();
+        Blob::Ptr blob = FuncTestUtils::createAndFillBlob(InferenceEngine::TensorDesc(td.getPrecision(), targetStaticShapes[index][0], td.getLayout()),
+                                                          inDataHighBounds - inDataLowBounds, inDataLowBounds);
        inferRequest.SetBlob(input->second->name(), blob);
        inputs.push_back(blob);

@ -88,30 +102,37 @@ protected:
    void SetUp() override {
        targetDevice = CommonTestUtils::DEVICE_CPU;
        fqSpecificParams fqParams;
-        SizeVector inDataShape;
+        inputShapes testShapes;
        Precision inPrec;
        std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
        bool shouldBeDecomposed;
        CPUSpecificParams cpuParams;
-        std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();
+        std::tie(fqParams, testShapes, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();

        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;

-        std::vector<SizeVector> inRangesShapes;
+        std::vector<ngraph::Shape> targetShapes;
+        std::vector<SizeVector> ranges;
+        std::tie(inputDynamicShapes, targetShapes, ranges) = testShapes;
+
+        for (size_t i = 0; i < targetShapes.size(); i++) {
+            targetStaticShapes.push_back(std::vector<ov::Shape>{targetShapes});
+        }
+
        size_t levels;
        std::vector<std::vector<float>> rangesBounds(RANGES_INPUT_NUMBER);
        rangesBounds[0] = inputRangesValues.first;
        rangesBounds[1] = inputRangesValues.second;
-        std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams;
+        std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], levels) = fqParams;

        auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
-        ParameterVector params = builder::makeParams(ngInPrec, {inDataShape});
+        ParameterVector params = builder::makeParams(ngInPrec, {targetStaticShapes[0][0]});
        auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset5::Parameter>(params));

-        auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty());
-        auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty());
-        auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty());
-        auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty());
+        auto il = builder::makeConstant(ngInPrec, ranges[0], rangesBounds[0], rangesBounds[0].empty());
+        auto ih = builder::makeConstant(ngInPrec, ranges[1], rangesBounds[1], rangesBounds[1].empty());
+        auto ol = builder::makeConstant(ngInPrec, ranges[2], rangesBounds[2], rangesBounds[2].empty());
+        auto oh = builder::makeConstant(ngInPrec, ranges[3], rangesBounds[3], rangesBounds[3].empty());
        auto fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], il, ih, ol, oh, levels);

        layerName = shouldBeDecomposed ? "" : "FakeQuantize";
@ -120,9 +141,7 @@ protected:
           selectedType = getPrimitiveType() + "_" + inPrec.name();
        }

-        fq->get_rt_info() = getCPUInfo();
-
-        function = std::make_shared<Function>(fq, params, "FakeQuantizeCPU");
+        function = makeNgraphFunction(ngInPrec, params, fq, "FakeQuantizeCPU");
    }

 private:
@ -132,6 +151,7 @@ private:
 };

 TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
    Run();

    CheckPluginRelatedResults(executableNetwork, layerName);
@ -149,6 +169,12 @@ const std::vector<std::pair<std::vector<float>, std::vector<float>>> input_range

 const std::vector<float> outputLow{5.0f}, outputHigh{25.0f};

+const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
+                                               ::testing::Values(dataHighBounds),
+                                               ::testing::Values(outputLow),
+                                               ::testing::Values(outputHigh),
+                                               ::testing::ValuesIn(levels));
+
 namespace fqImpl {

 std::vector<CPUSpecificParams> memForm4D_jit = {
@ -157,19 +183,31 @@ std::vector<CPUSpecificParams> memForm4D_jit = {
        CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
 };

-const std::vector<std::vector<SizeVector>> rangesShapes4D_jit = {
-    {{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}},
-    {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+std::vector<inputShapes> rangesShapes4D_jit = {
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}}
+    },
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1}},
+        {{4, 5, 6, 7}, {1, 12, 1, 1}, {4, 1, 8, 2}, {1, 16, 6, 1}},
+        {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1}},
+        {{4, 16, 6, 7}, {1, 16, 1, 1}, {7, 16, 1, 2}, {1, 16, 6, 1}},
+        {{1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}}
+    },
 };

-const auto specificParams4D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
-                                                     ::testing::Values(dataHighBounds),
-                                                     ::testing::Values(outputLow),
-                                                     ::testing::Values(outputHigh),
-                                                     ::testing::ValuesIn(rangesShapes4D_jit),
-                                                     ::testing::ValuesIn(levels));
-const auto testParams4D_jit = ::testing::Combine(specificParams4D_jit,
-                                                 ::testing::Values(SizeVector{4, 5, 6, 7}),
+const auto testParams4D_jit = ::testing::Combine(specificParams,
+                                                 ::testing::ValuesIn(rangesShapes4D_jit),
                                                 ::testing::Values(Precision::FP32),
                                                 ::testing::ValuesIn(input_ranges),
                                                 ::testing::Values(false),
@ -181,18 +219,21 @@ std::vector<CPUSpecificParams> memForm4D_ref = {
        CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"})
 };

-const std::vector<std::vector<SizeVector>> rangesShapes4D_ref = {
-    {{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
+std::vector<inputShapes> rangesShapes4D_ref = {
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1}},
+        {{4, 16, 6, 7}, {4, 1, 1, 1}, {4, 16, 1, 2}, {4, 16, 6, 1}},
+        {{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
+    },
 };

-const auto specificParams4D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
-                                                     ::testing::Values(dataHighBounds),
-                                                     ::testing::Values(outputLow),
-                                                     ::testing::Values(outputHigh),
-                                                     ::testing::ValuesIn(rangesShapes4D_ref),
-                                                     ::testing::ValuesIn(levels));
-const auto testParams4D_ref = ::testing::Combine(specificParams4D_ref,
-                                                 ::testing::Values(SizeVector{4, 5, 6, 7}),
+const auto testParams4D_ref = ::testing::Combine(specificParams,
+                                                 ::testing::ValuesIn(rangesShapes4D_ref),
                                                 ::testing::Values(Precision::FP32),
                                                 ::testing::ValuesIn(input_ranges),
                                                 ::testing::Values(false),
@ -206,19 +247,31 @@ std::vector<CPUSpecificParams> memForm5D_jit = {
        CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {})
 };

-const std::vector<std::vector<SizeVector>> rangesShapes5D_jit = {
-    {{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}},
-    {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
+std::vector<inputShapes> rangesShapes5D_jit = {
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}}
+    },
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1, -1}},
+        {{3, 4, 5, 6, 7}, {1, 12, 1, 1, 1}, {4, 1, 8, 2, 7}, {1, 16, 6, 5, 1}},
+        {{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1, -1}},
+        {{4, 16, 6, 7, 8}, {1, 16, 1, 1, 1}, {7, 16, 1, 2, 5}, {1, 16, 6, 1, 7}},
+        {{1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}}
+    },
 };

-const auto specificParams5D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
-                                                     ::testing::Values(dataHighBounds),
-                                                     ::testing::Values(outputLow),
-                                                     ::testing::Values(outputHigh),
-                                                     ::testing::ValuesIn(rangesShapes5D_jit),
-                                                     ::testing::ValuesIn(levels));
-const auto testParams5D_jit = ::testing::Combine(specificParams5D_jit,
-                                                 ::testing::Values(SizeVector{3, 4, 5, 6, 7}),
+const auto testParams5D_jit = ::testing::Combine(specificParams,
+                                                 ::testing::ValuesIn(rangesShapes5D_jit),
                                                 ::testing::Values(Precision::FP32),
                                                 ::testing::ValuesIn(input_ranges),
                                                 ::testing::Values(false),
@ -231,18 +284,21 @@ std::vector<CPUSpecificParams> memForm5D_ref = {
        CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"})
 };

-const std::vector<std::vector<SizeVector>> rangesShapes5D_ref = {
-    {{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
+std::vector<inputShapes> rangesShapes5D_ref = {
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1, -1}},
+        {{3, 16, 6, 7, 8}, {3, 16, 1, 1, 1}, {3, 16, 1, 2, 5}, {3, 16, 6, 1, 7}},
+        {{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
+    },
 };

-const auto specificParams5D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
-                                                     ::testing::Values(dataHighBounds),
-                                                     ::testing::Values(outputLow),
-                                                     ::testing::Values(outputHigh),
-                                                     ::testing::ValuesIn(rangesShapes5D_ref),
-                                                     ::testing::ValuesIn(levels));
-const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,
-                                                 ::testing::Values(SizeVector{3, 4, 5, 6, 7}),
+const auto testParams5D_ref = ::testing::Combine(specificParams,
+                                                 ::testing::ValuesIn(rangesShapes5D_ref),
                                                 ::testing::Values(Precision::FP32),
                                                 ::testing::ValuesIn(input_ranges),
                                                 ::testing::Values(false),
@ -250,32 +306,115 @@ const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,

 INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_5D_ref, FakeQuantizeLayerCPUTest, testParams5D_ref, FakeQuantizeLayerCPUTest::getTestCaseName);

+const auto specificParamsBin = ::testing::Combine(::testing::Values(dataLowBounds),
+                                                  ::testing::Values(dataHighBounds),
+                                                  ::testing::Values(std::vector<float>{0.0f}),
+                                                  ::testing::Values(std::vector<float>{1.0f}),
+                                                  ::testing::Values(2));
+
+const auto testParamsBin4D = ::testing::Combine(specificParamsBin,
+                                                 ::testing::ValuesIn(rangesShapes4D_jit),
+                                                 ::testing::Values(Precision::FP32),
+                                                 ::testing::Values(std::pair<std::vector<float>, std::vector<float>>{{3.0f}, {3.f}}),
+                                                 ::testing::Values(false),
+                                                 ::testing::Values(CPUSpecificParams()));
+
+INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_4D_bin, FakeQuantizeLayerCPUTest, testParamsBin4D, FakeQuantizeLayerCPUTest::getTestCaseName);
+
 } // namespace fqImpl

-const std::vector<SizeVector> dataShapes = {
-    {4, 5, 6, 7},
-    {3, 4, 5, 6, 7},
-    {2, 3, 4, 5, 6, 7},
-};
-
-const std::vector<std::vector<SizeVector>> rangesShapes = {
-    {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}},
-    {{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
-    {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
-    {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}},
-    {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
-};
-
 namespace fqDecompos {

-const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
-                                               ::testing::Values(dataHighBounds),
-                                               ::testing::Values(outputLow),
-                                               ::testing::Values(outputHigh),
-                                               ::testing::ValuesIn(rangesShapes),
-                                               ::testing::ValuesIn(levels));
+std::vector<inputShapes> decomposeShapes = {
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+    },
+    inputShapes{
+        {},
+        {{4, 5, 6, 7}},
+        {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
+    },
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+    },
+    inputShapes{
+        {},
+        {{3, 4, 5, 6, 7}},
+        {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
+    },
+    inputShapes{
+        {},
+        {{2, 3, 4, 5, 6, 7}},
+        {{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{2, 3, 4, 5, 6, 7}},
+        {{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{2, 3, 4, 5, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
+    },
+    inputShapes{
+        {},
+        {{2, 3, 4, 5, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+    },
+    inputShapes{
+        {},
+        {{2, 3, 4, 5, 6, 7}},
+        {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1}},
+        {{4, 5, 6, 7}, {1, 5, 6, 7}, {7, 5, 6, 7}},
+        {{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
+    },
+    inputShapes{
+        {{-1, -1, -1, -1, -1}},
+        {{8, 4, 5, 6, 7}, {1, 1, 5, 6, 7}, {1, 1, 1, 6, 7}},
+        {{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
+    },
+};
+
 const auto testParams = ::testing::Combine(specificParams,
-                                           ::testing::ValuesIn(dataShapes),
+                                           ::testing::ValuesIn(decomposeShapes),
                                           ::testing::Values(Precision::FP32),
                                           ::testing::ValuesIn(input_ranges),
                                           ::testing::Values(true),
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp
@ -12,8 +12,16 @@ using namespace CPUTestUtils;

 namespace CPULayerTestsDefinitions {

+using basicCpuMvnParams = std::tuple<
+        std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>, // Input shapes
+        InferenceEngine::Precision,                                               // Input precision
+        ngraph::AxisSet,                                                          // Reduction axes
+        bool,                                                                     // Across channels
+        bool,                                                                     // Normalize variance
+        double>;                                                                  // Epsilon
+
 typedef std::tuple<
-        LayerTestsDefinitions::mvn1Params,
+        basicCpuMvnParams,
        CPUSpecificParams,
        fusingSpecificParams,
        Precision, // CNNNetwork input precision
@ -24,16 +32,35 @@ class MvnLayerCPUTest : public testing::WithParamInterface<MvnLayerCPUTestParamS
                        virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<MvnLayerCPUTestParamSet> obj) {
-        LayerTestsDefinitions::mvn1Params basicParamsSet;
+        basicCpuMvnParams basicParamsSet;
        CPUSpecificParams cpuParams;
        fusingSpecificParams fusingParams;
        Precision inputPrecision, outputPrecision;
        std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param;

-        std::ostringstream result;
-        result << LayerTestsDefinitions::Mvn1LayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvn1Params>(
-                basicParamsSet, 0));
+        std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>> inputShapes;
+        InferenceEngine::Precision netPrecision;
+        ngraph::AxisSet axes;
+        bool acrossChanels, normalizeVariance;
+        double eps;
+        std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet;

+        std::ostringstream result;
+        if (!inputShapes.first.empty()) {
+            result << "IS=" << CommonTestUtils::partialShape2str(inputShapes.first) << "_";
+        }
+        result << "TS=";
+        for (const auto& shape : inputShapes.second) {
+            result << "(" << CommonTestUtils::vec2str(shape) << ")_";
+        }
+        result << "Precision=" << netPrecision.name() << "_";
+        if (!axes.empty()) {
+            result << "ReductionAccess=" << CommonTestUtils::vec2str(axes.to_vector()) << "_";
+        } else {
+            result << "AcrossChannels=" << (acrossChanels ? "TRUE" : "FALSE") << "_";
+        }
+        result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_";
+        result << "Epsilon=" << eps;
        result << "_" << "CNNInpPrc=" << inputPrecision.name();
        result << "_" << "CNNOutPrc=" << outputPrecision.name();

@ -45,7 +72,9 @@ public:
    }
 protected:
    void SetUp() override {
-        LayerTestsDefinitions::mvn1Params basicParamsSet;
+        targetDevice = CommonTestUtils::DEVICE_CPU;
+
+        basicCpuMvnParams basicParamsSet;
        CPUSpecificParams cpuParams;
        fusingSpecificParams fusingParams;
        std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam();
@ -53,14 +82,20 @@ protected:
        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
        std::tie(postOpMgrPtr, fusedOps) = fusingParams;

-        InferenceEngine::SizeVector inputShapes;
+        std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>> inputShapes;
        InferenceEngine::Precision netPrecision;
        ngraph::AxisSet axes;
        bool acrossChanels, normalizeVariance;
        double eps;
-        std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
+        std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet;
+
+        for (size_t i = 0; i < inputShapes.second.size(); i++) {
+            targetStaticShapes.push_back({inputShapes.second[i]});
+        }
+        inputDynamicShapes = inputShapes.first;
+
        auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        auto param = ngraph::builder::makeParams(netPrc, {inputShapes});
+        auto param = ngraph::builder::makeParams(netPrc, {targetStaticShapes[0].front()});
        auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
        auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps);
        if (!axes.empty()) {
@ -82,40 +117,141 @@ TEST_P(MvnLayerCPUTest, CompareWithRefs) {
 }

 namespace {
-const std::vector<std::vector<size_t>> inputShapes_1D = {
-        {5},
-        {16},
+
+const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_1D = {
+        { {}, {{5}}},
+        { {}, {{16}}},
+        {
+            // dynamic
+            {{-1}},
+            // target
+            {
+                {2},
+                {16},
+                {1}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 20}}},
+            // target
+            {
+                {1},
+                {16},
+                {4}
+            }
+        }
 };

-const std::vector<std::vector<size_t>> inputShapes_2D = {
-        {1, 32},
-        {16, 64},
+const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_2D = {
+        { {}, {{1, 32}}},
+        { {}, {{16, 64}}},
+        {
+            // dynamic
+            {{-1, -1}},
+            // target
+            {
+                {2, 16},
+                {4, 16},
+                {1, 16}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 5}, {1, 20}}},
+            // target
+            {
+                {1, 1},
+                {2, 16},
+                {4, 16}
+            }
+        }
 };

-const std::vector<std::vector<size_t>> inputShapes_3D = {
-        {1, 32, 17},
-        {1, 37, 9},
-        {1, 16, 4},
+const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_3D = {
+        { {}, {{1, 32, 17}}},
+        { {}, {{1, 37, 9}}},
+        { {}, {{1, 16, 4}}},
+        {
+            // dynamic
+            {{-1, -1, -1}},
+            // target
+            {
+                {2, 16, 6},
+                {4, 16, 2},
+                {1, 16, 4}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 5}, {1, 20}, {1, 7}}},
+            // target
+            {
+                {1, 1, 1},
+                {2, 16, 6},
+                {4, 16, 2}
+            }
+        }
 };

-const std::vector<std::vector<size_t>> inputShapes_4D = {
-        {1, 16, 5, 8},
-        {2, 19, 5, 10},
-        {7, 32, 2, 8},
-        {5, 8, 3, 5},
-        {1, 2, 7, 5},
-        {1, 4, 5, 5},
-        {1, 7, 3, 5},
-        {1, 15, 9, 5},
-        {4, 41, 6, 9}
+const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_4D = {
+        { {}, {{1, 16, 5, 8}}},
+        { {}, {{2, 19, 5, 10}}},
+        { {}, {{7, 32, 2, 8}}},
+        { {}, {{5, 8, 3, 5}}},
+        { {}, {{1, 2, 7, 5}}},
+        { {}, {{1, 4, 5, 5}}},
+        { {}, {{1, 7, 3, 5}}},
+        { {}, {{1, 15, 9, 5}}},
+        { {}, {{4, 41, 6, 9}}},
+        {
+            // dynamic
+            {{-1, -1, -1, -1}},
+            // target
+            {
+                {2, 16, 10, 6},
+                {4, 16, 2, 2},
+                {1, 16, 8, 4}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 5}, {1, 20}, {1, 10}, {1, 7}}},
+            // target
+            {
+                {1, 1, 1, 1},
+                {2, 16, 10, 6},
+                {4, 16, 2, 2}
+            }
+        }
 };

-const std::vector<std::vector<size_t>> inputShapes_5D = {
-        {1, 32, 8, 1, 6},
-        {1, 9, 1, 15, 9},
-        {6, 64, 6, 1, 18},
-        {2, 31, 2, 9, 1},
-        {10, 16, 5, 10, 6}
+const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_5D = {
+        { {}, {{1, 32, 8, 1, 6}}},
+        { {}, {{1, 9, 1, 15, 9}}},
+        { {}, {{6, 64, 6, 1, 18}}},
+        { {}, {{2, 31, 2, 9, 1}}},
+        { {}, {{10, 16, 5, 10, 6}}},
+        {
+            // dynamic
+            {{-1, -1, -1, -1, -1}},
+            // target
+            {
+                {2, 16, 5, 10, 6},
+                {4, 16, 7, 2, 2},
+                {1, 16, 11, 8, 4}
+            }
+        },
+        {
+            // dynamic
+            {{{1, 5}, {1, 20}, {1, 7}, {1, 10}, {1, 7}}},
+            // target
+            {
+                {1, 1, 1, 1, 1},
+                {2, 16, 5, 10, 6},
+                {4, 16, 7, 2, 2}
+            }
+        }
 };

 const std::vector<bool> acrossChannels = {
@ -162,6 +298,7 @@ std::vector<fusingSpecificParams> fusingParamsSet {
        fusingFakeQuantizePerTensorRelu,
        /* another patterns */
        fusingScaleShift,
+        fusingAddPerTensor
 };

 const auto Mvn3D = ::testing::Combine(
@ -171,8 +308,7 @@ const auto Mvn3D = ::testing::Combine(
            ::testing::ValuesIn(emptyReductionAxes),
            ::testing::ValuesIn(acrossChannels),
            ::testing::ValuesIn(normalizeVariance),
-            ::testing::ValuesIn(epsilon),
-            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+            ::testing::ValuesIn(epsilon)),
        ::testing::Values(emptyCPUSpec),
        ::testing::ValuesIn(fusingParamsSet),
        ::testing::ValuesIn(inpPrc),
@ -187,8 +323,7 @@ const auto Mvn4D = ::testing::Combine(
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
-                ::testing::ValuesIn(epsilon),
-                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                ::testing::ValuesIn(epsilon)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
        ::testing::ValuesIn(fusingParamsSet),
        ::testing::ValuesIn(inpPrc),
@ -203,8 +338,7 @@ const auto Mvn5D = ::testing::Combine(
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
-                ::testing::ValuesIn(epsilon),
-                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                ::testing::ValuesIn(epsilon)),
        ::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
        ::testing::ValuesIn(fusingParamsSet),
        ::testing::ValuesIn(inpPrc),
@ -228,8 +362,7 @@ const auto Mvn1D = ::testing::Combine(
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::ValuesIn(acrossChannels),
                ::testing::ValuesIn(normalizeVariance),
-                ::testing::ValuesIn(epsilon),
-                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                ::testing::ValuesIn(epsilon)),
        ::testing::Values(emptyCPUSpec),
        ::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
        ::testing::ValuesIn(inpPrc),
@ -245,8 +378,7 @@ const auto Mvn2D = ::testing::Combine(
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::Values(false),
                ::testing::ValuesIn(normalizeVariance),
-                ::testing::ValuesIn(epsilon),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                ::testing::ValuesIn(epsilon)),
        ::testing::Values(emptyCPUSpec),
        ::testing::ValuesIn(fusingParamsSet),
        ::testing::ValuesIn(inpPrc),
@ -262,8 +394,7 @@ const auto Mvn2DTrans = ::testing::Combine(
                ::testing::ValuesIn(emptyReductionAxes),
                ::testing::Values(true),
                ::testing::ValuesIn(normalizeVariance),
-                ::testing::ValuesIn(epsilon),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                ::testing::ValuesIn(epsilon)),
        ::testing::Values(emptyCPUSpec),
        ::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
        ::testing::ValuesIn(inpPrc),
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/select.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/select.cpp
@ -23,7 +23,9 @@ public:
        std::tie(shapes, broadcast) = obj.param;

        std::ostringstream result;
-        result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
+        if (!shapes.first.empty()) {
+            result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
+        }
        result << "TS=";
        for (const auto& shape : shapes.second) {
            result << "(";