[CPU] Dynamic shapes. Transpose node. (#7859)

2021-10-21 13:28:50 +03:00 · 2021-10-21 13:28:50 +03:00 · 2952ba70af
commit 2952ba70af
parent d7fbd6f7ab
4 changed files with 253 additions and 121 deletions
--- a/inference-engine/src/mkldnn_plugin/nodes/common/permute_kernel.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/permute_kernel.h
@ -57,6 +57,9 @@ public:

    void execute(const uint8_t* src_data, uint8_t* dst_data);
    void execute(const uint8_t* src_data, uint8_t* dst_data, const int mb);
+    const PermuteParams& getPermuteParams() const {
+        return params;
+    }

 private:
    void prepareParams();
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
@ -3,37 +3,26 @@
 //

 #include "mkldnn_transpose_node.h"
+#include "ie_parallel.hpp"

 #include <algorithm>
 #include <string>
-#include <mkldnn_extension_utils.h>
-#include <mkldnn_selective_build.h>
-#include "ie_parallel.hpp"
-#include "utils/bfloat16.hpp"
-#include <utils/general_utils.h>
-#include "utils/ngraph_utils.hpp"

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;


-bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
    try {
-        if (isDynamicNgraphNode(op)) {
-            errorMessage = "Doesn't support op with dynamic shapes";
+        if (!one_of(op->get_type_info(),
+                ov::op::v1::Transpose::type_info)) {
+            errorMessage = "Node is not an instance of the Transpose operation from opset1.";
            return false;
        }

-        const auto transposeOp = ngraph::as_type_ptr<const ngraph::op::v1::Transpose>(op);
-        if (!transposeOp) {
-            errorMessage = "Node is not an instance of the Transpose operation.";
-            return false;
-        }
-
-        auto orderOp = ngraph::as_type_ptr<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
-        if (!orderOp) {
-            errorMessage = "Constant expected as the second input.";
+        if (!isDynamicNgraphNode(op) && op->get_input_node_ptr(INPUT_ORDER_IDX)->get_type_info() != ov::op::v0::Constant::type_info) {
+            errorMessage = "Constant expected as the second input for static shapes.";
            return false;
        }
    } catch (...) {
@ -42,22 +31,24 @@ bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr<const ngrap
    return true;
 }

-MKLDNNTransposeNode::MKLDNNTransposeNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
+MKLDNNTransposeNode::MKLDNNTransposeNode(const std::shared_ptr<ov::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
        : MKLDNNNode(op, eng, cache) {
    std::string errorMessage;
    if (!isSupportedOperation(op, errorMessage)) {
        IE_THROW(NotImplemented) << errorMessage;
    }

-    auto orderOp = ngraph::as_type_ptr<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
-    order = orderOp->cast_vector<size_t>();
+    if (op->get_input_node_ptr(INPUT_ORDER_IDX)->get_type_info() == ov::op::v0::Constant::type_info) {
+        constMap[INPUT_ORDER_IDX] = true;
+        order = ov::as_type<ov::op::v0::Constant>(op->get_input_node_ptr(INPUT_ORDER_IDX))->cast_vector<size_t>();

        if (order.empty()) {
-        size_t rank = op->get_input_shape(0).size();
+            size_t rank = getInputShapeAtPort(INPUT_DATA_IDX).getRank();
            for (size_t i = 1lu; i <= rank; ++i) {
                order.emplace_back(rank - i);
            }
        }
+    }
 }

 void MKLDNNTransposeNode::getSupportedDescriptors() {
@ -75,24 +66,26 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() {
    config.dynBatchSupport = true;
    config.inConfs.resize(2);
    config.outConfs.resize(1);
-    config.inConfs[0].inPlace = -1;
-    config.inConfs[0].constant = false;
+    config.inConfs[INPUT_DATA_IDX].inPlace = -1;
+    config.inConfs[INPUT_DATA_IDX].constant = false;
+    config.inConfs[INPUT_ORDER_IDX].constant = constMap[INPUT_ORDER_IDX];
+    config.inConfs[INPUT_ORDER_IDX].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(
+            getOriginalInputPrecisionAtPort(INPUT_ORDER_IDX), getInputShapeAtPort(INPUT_ORDER_IDX));
    config.outConfs[0].inPlace = -1;
    config.outConfs[0].constant = false;
-    config.inConfs[1].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(getOriginalInputPrecisionAtPort(1), getInputShapeAtPort(1));

    if (getInputShapeAtPort(0).getRank() == 4 || getInputShapeAtPort(0).getRank() == 5) {
        config.inConfs[0].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(prec, getInputShapeAtPort(0));
        config.outConfs[0].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(prec, getOutputShapeAtPort(0));
        supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});

-        auto srcDims = getInputShapeAtPort(0).getStaticDims();
-        if (srcDims[1] % 8 == 0) {
+        auto srcDims = getInputShapeAtPort(0).getDims();
+        if (srcDims[1] != Shape::UNDEFINED_DIM && srcDims[1] % 8 == 0) {
            config.inConfs[0].desc = creatorsMap.at(LayoutType::nCsp8c)->createSharedDesc(prec, getInputShapeAtPort(0));
            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
        }

-        if (srcDims[1] % 16 == 0) {
+        if (srcDims[1] != Shape::UNDEFINED_DIM && srcDims[1] % 16 == 0) {
            config.inConfs[0].desc = creatorsMap.at(LayoutType::nCsp16c)->createSharedDesc(prec, getInputShapeAtPort(0));
            supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown});
        }
@ -110,34 +103,55 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() {
    }
 }

+bool MKLDNNTransposeNode::needPrepareParams() const {
+    if (isOptimized)
+        return false;
+    return MKLDNNNode::needPrepareParams();
+}
+
+void MKLDNNTransposeNode::prepareParams() {
+    auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    params.src_block_dims = srcDesc->getBlockDims();
+    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
+    params.dst_block_dims = dstDesc->getBlockDims();
+    if (!constMap[INPUT_ORDER_IDX]) {
+        auto orderPtr = reinterpret_cast<const int32_t*>(getParentEdgeAt(0)->getMemoryPtr()->GetPtr());
+        auto orderLen = getParentEdgeAt(0)->getMemoryPtr()->GetSize();
+        params.order.assign(orderPtr, orderPtr + orderLen);
+    }
+
+    execPtr = std::make_shared<TransposeJitExecutor>(params);
+}
+
 void MKLDNNTransposeNode::createPrimitive() {
    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
-    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
+    auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr();
    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Destination memory didn't allocate.";
+        IE_THROW() << "Destination memory was not allocated.";
    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
-        IE_THROW() << "Input memory didn't allocate.";
+        IE_THROW() << "Input memory was not allocated.";
    if (getSelectedPrimitiveDescriptor() == nullptr)
-        IE_THROW() << "Preferable primitive descriptor is not set.";
+        IE_THROW() << "Preferable primitive descriptor was not set.";

-    if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) &&
+    if (getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) &&
            std::find(optimizedOrders.begin(), optimizedOrders.end(), order) != optimizedOrders.end()) {
        isOptimized = true;
+        execPtr = std::make_shared<TransposeRefExecutor>();
        return;
    }

-    PermuteParams params;
    params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size();
+    if (constMap[INPUT_ORDER_IDX])
        params.order = order;
-    auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    params.src_block_dims = srcDesc->getBlockDims();
+    auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType<BlockedMemoryDesc>();
    params.src_block_order = srcDesc->getOrder();
-
    auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
-    params.dst_block_dims = dstDesc->getBlockDims();
    params.dst_block_order = dstDesc->getOrder();

-    permuteKernel = std::unique_ptr<PermuteKernel>(new PermuteKernel(params));
+    if (inputShapesDefined()) {
+        prepareParams();
+        updateLastInputDims();
+    }
 }

 template <typename T>
@ -242,27 +256,52 @@ void MKLDNNTransposeNode::optimizedExecute(const int MB, const MKLDNNMemoryPtr&
 }

 void MKLDNNTransposeNode::execute(mkldnn::stream strm) {
+    if (execPtr) {
        auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
-    auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
-    int MB = batchToProcess();
+        auto &srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr();

-    if (isOptimized) {
-        const size_t dataSize = getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size();
-        TransposeContext ctx = {this, srcMemPtr, dstMemPtr, MB};
+        int MB = 0;
+        if (isDynamicNode()) {
+            MB = srcMemPtr->getStaticDims()[0];
+        } else {
+            MB = batchToProcess();
+        }
+
+        execPtr->exec(this, srcMemPtr, dstMemPtr, MB);
+    } else {
+        IE_THROW() << "Could not execute Transpose node. Primitive was not created.";
+    }
+}
+
+void MKLDNNTransposeNode::executeDynamicImpl(mkldnn::stream strm) {
+    execute(strm);
+}
+
+MKLDNNTransposeNode::TransposeJitExecutor::TransposeJitExecutor(const PermuteParams& params) {
+    pKernel = std::make_shared<PermuteKernel>(params);
+}
+
+void MKLDNNTransposeNode::TransposeJitExecutor::exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) {
+    if (!pKernel)
+        IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled.";
+
+    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(srcMemPtr->GetPtr());
+    uint8_t* dstData = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
+
+    pKernel->execute(srcData, dstData, MB);
+}
+
+void MKLDNNTransposeNode::TransposeRefExecutor::exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) {
+    const size_t dataSize = srcMemPtr->getDesc().getPrecision().size();
+    TransposeContext ctx = {node, srcMemPtr, dstMemPtr, MB};
    OV_SWITCH(MKLDNNPlugin, TransposeOptimizedEmitter, ctx, dataSize,
              OV_CASE(1, PrecisionTrait<Precision::U8>::value_type),
              OV_CASE(2, PrecisionTrait<Precision::U16>::value_type),
              OV_CASE(4, PrecisionTrait<Precision::I32>::value_type));
-
-        return;
-    }
-
-    const uint8_t* srcData = reinterpret_cast<const uint8_t*>(srcMemPtr->GetPtr());
-    uint8_t* dstData = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
-    permuteKernel->execute(srcData, dstData, MB);
 }

 bool MKLDNNTransposeNode::created() const {
    return getType() == Transpose;
 }
+
 REG_MKLDNN_PRIM_FOR(MKLDNNTransposeNode, Transpose);
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h
@ -4,15 +4,13 @@

 #pragma once

-#include <ie_common.h>
-#include <mkldnn_node.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <map>
-#include <memory>
 #include "common/permute_kernel.h"

+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
 namespace MKLDNNPlugin {

 class MKLDNNTransposeNode : public MKLDNNNode {
@ -33,7 +31,33 @@ public:
        return order;
    }

+    bool needPrepareParams() const override;
+    void prepareParams() override;
+
+protected:
+    void executeDynamicImpl(mkldnn::stream strm) override;
+
 private:
+    struct TransposeExecutor {
+        TransposeExecutor() = default;
+        virtual void exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) = 0;
+        virtual ~TransposeExecutor() = default;
+    };
+    using executorPtr = std::shared_ptr<TransposeExecutor>;
+    executorPtr execPtr = nullptr;
+
+    struct TransposeJitExecutor : public TransposeExecutor {
+        TransposeJitExecutor(const PermuteParams& params);
+        void exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) override;
+
+        std::shared_ptr<PermuteKernel> pKernel;
+    };
+
+    struct TransposeRefExecutor : public TransposeExecutor {
+        TransposeRefExecutor() = default;
+        void exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) override;
+    };
+
    template<typename T> void optimizedExecute(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr);

    InferenceEngine::SizeVector order;
@ -46,7 +70,7 @@ private:
            std::vector<size_t>{0, 5, 1, 2, 3, 4},
    };

-    std::unique_ptr<PermuteKernel> permuteKernel;
+    PermuteParams params;

    struct TransposeContext {
        MKLDNNTransposeNode* nodePtr;
@ -61,7 +85,11 @@ private:
            ctx.nodePtr->optimizedExecute<T>(ctx.MB, ctx.srcMemPtr, ctx.dstMemPtr);
        }
    };
+
+    bool constMap[3] = { false };
+
+    static constexpr size_t INPUT_DATA_IDX = 0lu;
+    static constexpr size_t INPUT_ORDER_IDX = 1lu;
 };

 }  // namespace MKLDNNPlugin
-
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp
@ -2,9 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include <shared_test_classes/single_layer/transpose.hpp>
-#include "ngraph_functions/builders.hpp"
 #include "test_utils/cpu_test_utils.hpp"
+#include "ngraph_functions/builders.hpp"

 // Since the Transpose ngraph operation is converted to the transpose node, we will use it in the transpose test

@ -13,10 +12,12 @@ using namespace CPUTestUtils;

 namespace CPULayerTestsDefinitions {

+using inputShapesPair = std::pair<std::vector<ov::PartialShape>, std::vector<std::vector<ov::Shape>>>;
+
 typedef std::tuple<
+        inputShapesPair,                    // Input shapes
        std::vector<size_t>,                // Input order
        InferenceEngine::Precision,         // Net precision
-        std::vector<size_t>,            // Input shapes
        std::string,                        // Target device name
        std::map<std::string, std::string>, // Additional network configuration
        CPUSpecificParams> TransposeLayerCPUTestParamSet;
@ -26,14 +27,16 @@ class TransposeLayerCPUTest : public testing::WithParamInterface<TransposeLayerC
 public:
    static std::string getTestCaseName(testing::TestParamInfo<TransposeLayerCPUTestParamSet> obj) {
        Precision netPrecision;
-        std::vector<size_t> inputShape, inputOrder;
+        inputShapesPair inputShapes;
+        std::vector<size_t> inputOrder;
        std::string targetDevice;
        CPUSpecificParams cpuParams;
        std::map<std::string, std::string> additionalConfig;
-        std::tie(inputOrder, netPrecision, inputShape, targetDevice, additionalConfig, cpuParams) = obj.param;
+        std::tie(inputShapes, inputOrder, netPrecision, targetDevice, additionalConfig, cpuParams) = obj.param;

        std::ostringstream result;
-        result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_";
+        result << "DynShapes=" << CommonTestUtils::partialShape2str(inputShapes.first) << "_";
+        result << "StatShapes=" << CommonTestUtils::vec2str(inputShapes.second) << "_";
        result << "inputOrder=" << CommonTestUtils::vec2str(inputOrder) << "_";
        result << "netPRC=" << netPrecision.name() << "_";
        result << "trgDev=" << targetDevice;
@ -43,10 +46,11 @@ public:
 protected:
    void SetUp() override {
        Precision netPrecision;
-        std::vector<size_t> inputShape, inputOrder;
+        inputShapesPair inputShapes;
+        std::vector<size_t> inputOrder;
        CPUSpecificParams cpuParams;
        std::map<std::string, std::string> additionalConfig;
-        std::tie(inputOrder, netPrecision, inputShape, targetDevice, additionalConfig, cpuParams) = this->GetParam();
+        std::tie(inputShapes, inputOrder, netPrecision, targetDevice, additionalConfig, cpuParams) = this->GetParam();
        configuration.insert(additionalConfig.begin(), additionalConfig.end());
        inPrc = outPrc = netPrecision; // since the layer does not convert precisions

@ -54,19 +58,27 @@ protected:

        selectedType = std::string("unknown_") + inPrc.name();

-        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
-        auto paramOuts = ngraph::helpers::convert2OutputVector(
-                ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+        targetStaticShapes.reserve(inputShapes.second.size());
+        for (const auto& staticShape : inputShapes.second) {
+            targetStaticShapes.push_back({staticShape});
+        }
+        inputDynamicShapes = { inputShapes.first };

-        const auto inOrderShape = inputOrder.empty() ? ngraph::Shape({0}) : ngraph::Shape({inputShape.size()});
-        const auto inputOrderOp = std::make_shared<ngraph::opset3::Constant>(ngraph::element::i64,
+        ov::Shape inputDataShape = targetStaticShapes.front().front();
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputDataShape});
+        auto paramOuts = ngraph::helpers::convert2OutputVector(
+                ngraph::helpers::castOps2Nodes<ov::op::v0::Parameter>(params));
+
+        const auto inOrderShape = inputOrder.empty() ? ov::Shape({0}) : ov::Shape({inputDataShape.size()});
+        const auto inputOrderOp = std::make_shared<ov::op::v0::Constant>(ov::element::i64,
                                                                             inOrderShape,
                                                                             inputOrder);
-        const auto transpose = std::make_shared<ngraph::opset3::Transpose>(paramOuts.at(0), inputOrderOp);
+        const auto transpose = std::make_shared<ov::op::v1::Transpose>(paramOuts.at(0), inputOrderOp);
        transpose->get_rt_info() = getCPUInfo();
-        const ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(transpose)};
-        function = std::make_shared<ngraph::Function>(results, params, "Transpose");
+        const ov::ResultVector results{std::make_shared<ov::op::v0::Result>(transpose)};
+        function = std::make_shared<ov::Function>(results, params, "Transpose");
    }
 };

@ -103,8 +115,13 @@ const std::vector<InferenceEngine::Precision> netPrecisionsPerChannels = {
        Precision::FP32
 };

-const std::vector<std::vector<size_t>> inputShapes4D = {
-    {2, 32, 10, 20}
+const std::vector<inputShapesPair>
+    staticInputShapes4D = {
+        {{}, {{{2, 32, 10, 20}}}}
+};
+const std::vector<inputShapesPair>
+    dynamicInputShapes4D = {
+        {{{2, ov::Dimension(20, 40), 10, 20}}, {{{2, 32, 10, 20}, {2, 10, 10, 20}}}}
 };

 const std::vector<std::vector<size_t>> inputOrder4D = {
@ -128,28 +145,53 @@ const std::vector<CPUSpecificParams> CPUParams4D = {
        cpuParams_nchw,
 };

-const auto params4D = ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4D_Transpose, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(staticInputShapes4D),
                            ::testing::ValuesIn(inputOrder4D),
                            ::testing::ValuesIn(netPrecisions),
-        ::testing::ValuesIn(inputShapes4D),
                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
                            ::testing::Values(additional_config),
-        ::testing::ValuesIn(CPUParams4D));
+                            ::testing::ValuesIn(CPUParams4D)),
+                        TransposeLayerCPUTest::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_Transpose4D_CPU, TransposeLayerCPUTest, params4D, TransposeLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes4D_Transpose, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(dynamicInputShapes4D),
+                            ::testing::ValuesIn(inputOrder4D),
+                            ::testing::ValuesIn(netPrecisions),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                            ::testing::Values(additional_config),
+                            ::testing::ValuesIn(std::vector<CPUSpecificParams>{})),
+                        TransposeLayerCPUTest::getTestCaseName);

-const auto paramsPerChannels4D = ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(smoke_staticShapes4D_PermutePerChannels, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(staticInputShapes4D),
                            ::testing::ValuesIn(inputOrderPerChannels4D),
                            ::testing::ValuesIn(netPrecisionsPerChannels),
-        ::testing::ValuesIn(inputShapes4D),
                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
                            ::testing::Values(additional_config),
-        ::testing::Values(cpuParams_nhwc));
+                            ::testing::Values(cpuParams_nhwc)),
+                        TransposeLayerCPUTest::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_PermutePerChannels4D_CPU, TransposeLayerCPUTest, paramsPerChannels4D, TransposeLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes4D_PermutePerChannels, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(dynamicInputShapes4D),
+                            ::testing::ValuesIn(inputOrderPerChannels4D),
+                            ::testing::ValuesIn(netPrecisionsPerChannels),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                            ::testing::Values(additional_config),
+                            ::testing::Values(cpuParams_nhwc)),
+                        TransposeLayerCPUTest::getTestCaseName);

-const std::vector<std::vector<size_t>> inputShapes5D = {
-        {2, 32, 5, 10, 20}
+const std::vector<inputShapesPair>
+    staticInputShapes5D = {
+        {{}, {{{2, 32, 5, 10, 20}}}}
+};
+const std::vector<inputShapesPair>
+    dynamicInputShapes5D = {
+        {{{2, ov::Dimension(20, 40), 5, 10, 20}}, {{{2, 32, 5, 10, 20}, {2, 20, 5, 10, 20}}}}
 };

 const std::vector<std::vector<size_t>> inputOrder5D = {
@ -181,25 +223,45 @@ const std::vector<CPUSpecificParams> CPUParams5D = {
        cpuParams_ncdhw,
 };

-const auto params5D = ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(smoke_staticShapes5D_Transpose, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(staticInputShapes5D),
                            ::testing::ValuesIn(inputOrder5D),
                            ::testing::ValuesIn(netPrecisions),
-        ::testing::ValuesIn(inputShapes5D),
                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
                            ::testing::Values(additional_config),
-        ::testing::ValuesIn(CPUParams5D));
+                            ::testing::ValuesIn(CPUParams5D)),
+                        TransposeLayerCPUTest::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_Transpose5D_CPU, TransposeLayerCPUTest, params5D, TransposeLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes5D_Transpose, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(dynamicInputShapes5D),
+                            ::testing::ValuesIn(inputOrder5D),
+                            ::testing::ValuesIn(netPrecisions),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                            ::testing::Values(additional_config),
+                            ::testing::ValuesIn(std::vector<CPUSpecificParams>{})),
+                        TransposeLayerCPUTest::getTestCaseName);

-const auto paramsPerChannels5D = ::testing::Combine(
+INSTANTIATE_TEST_SUITE_P(smoke_staticShapes5D_PermutePerChannels, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(staticInputShapes5D),
                            ::testing::ValuesIn(inputOrderPerChannels5D),
                            ::testing::ValuesIn(netPrecisionsPerChannels),
-        ::testing::ValuesIn(inputShapes5D),
                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
                            ::testing::Values(additional_config),
-        ::testing::Values(cpuParams_ndhwc));
+                            ::testing::Values(cpuParams_ndhwc)),
+                        TransposeLayerCPUTest::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_PermutePerChannels5D_CPU, TransposeLayerCPUTest, paramsPerChannels5D, TransposeLayerCPUTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_dynamicShapes5D_PermutePerChannels, TransposeLayerCPUTest,
+                        ::testing::Combine(
+                            ::testing::ValuesIn(dynamicInputShapes5D),
+                            ::testing::ValuesIn(inputOrderPerChannels5D),
+                            ::testing::ValuesIn(netPrecisionsPerChannels),
+                            ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                            ::testing::Values(additional_config),
+                            ::testing::Values(cpuParams_ndhwc)),
+                        TransposeLayerCPUTest::getTestCaseName);

 } // namespace
 } // namespace CPULayerTestsDefinitions