[CPU] SoftMax cache (#9480)

* [CPUCache]SoftMax cache * [CpuCache]fix bf16 tests * [CPUCache]apply review comments * [CPUCache]fix compilation
2022-01-10 23:46:57 +08:00 · 2022-01-10 23:46:57 +08:00 · c1206ef447
commit c1206ef447
parent af105b86f8
2 changed files with 276 additions and 239 deletions
--- a/src/plugins/intel_cpu/src/nodes/mkldnn_softmax_node.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mkldnn_softmax_node.cpp
@ -10,11 +10,45 @@
 #include <memory_desc/cpu_memory_desc_utils.h>
 #include <ngraph/opsets/opset1.hpp>
 #include "memory_desc/dnnl_blocked_memory_desc.h"
+#include <common/primitive_hashing_utils.hpp>

 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;

+namespace {
+struct SoftmaxKey {
+    DnnlMemoryDescCPtr inp0;
+    impl_desc_type implType;
+    size_t axis;
+
+    size_t hash() const;
+    bool operator==(const SoftmaxKey& rhs) const;
+};
+
+size_t SoftmaxKey::hash() const {
+    using namespace dnnl::impl;
+    using namespace dnnl::impl::primitive_hashing;
+
+    size_t seed = 0;
+
+    seed = hash_combine(seed, get_md_hash(inp0->getDnnlDesc().data));
+    seed = hash_combine(seed, implType);
+    seed = hash_combine(seed, axis);
+    return seed;
+}
+
+bool SoftmaxKey::operator==(const SoftmaxKey& rhs) const {
+    bool retVal = true;
+    if (inp0 != rhs.inp0) {
+        retVal = retVal && inp0 && rhs.inp0 && inp0->getDnnlDesc() == rhs.inp0->getDnnlDesc();
+    }
+
+    retVal = retVal && implType == rhs.implType && axis == rhs.axis;
+    return retVal;
+}
+}  // namespace
+
 bool MKLDNNSoftMaxNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
    try {
        if (!std::dynamic_pointer_cast<const ngraph::opset1::Softmax>(op)) {
@ -108,32 +142,44 @@ void MKLDNNSoftMaxNode::createDescriptor(const std::vector<MemoryDescPtr> &input

 void MKLDNNSoftMaxNode::prepareParams() {
    auto inpDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<DnnlMemoryDesc>();
-    const auto& in_candidate = inpDesc->getDnnlDesc();
-    MKLDNNDescriptor desc(std::shared_ptr<softmax_forward::desc>(
-            new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis)));
+    const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor();

-    const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor();
    if (selected_pd == nullptr)
        IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << ".";

+    SoftmaxKey key = {inpDesc, selected_pd->getImplementationType(), axis};
+    auto engine = getEngine();
+    auto builder = [&engine](const SoftmaxKey& key) -> std::shared_ptr<mkldnn::primitive> {
        softmax_forward::primitive_desc prim_desc;
-    primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine());
+        MKLDNNDescriptor desc(std::shared_ptr<softmax_forward::desc>(
+            new softmax_forward::desc(prop_kind::forward_scoring, key.inp0->getDnnlDesc(), key.axis)));
+        primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine);

        while (itpd) {
            impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
-        if (impl_type == selected_pd->getImplementationType() ||
-            // At least for oneDNN v2.4 the softmax primitive is optimized for the cases where the dimension of the softmax axis is physically dense.
-            // There could be situations where it is not possible to detect the optimized case in advance in case of dynamic shapes, but
-            // in runtime the shape could be suitable for the optimized implementation, so we have to select the optimized one.
-            (ref_any == selected_pd->getImplementationType() && (impl_type & jit))) {
+            if (impl_type == key.implType ||
+                // At least for oneDNN v2.4 the softmax primitive is optimized for the cases where the dimension of the
+                // softmax axis is physically dense. There could be situations where it is not possible to detect the
+                // optimized case in advance in case of dynamic shapes, but in runtime the shape could be suitable for
+                // the optimized implementation, so we have to select the optimized one.
+                (ref_any == key.implType && (impl_type & jit))) {
                prim_desc = itpd.get();
                break;
            }
            if (!itpd.next_impl())
+                return nullptr;
+        }
+        return std::make_shared<softmax_forward>(prim_desc);
+    };
+
+    auto cache = getRuntimeCache();
+    auto result = cache->getOrCreate(key, builder);
+
+    if (!result.first) {
        IE_THROW() << "Primitive descriptor was not found for node " << getName() << ".";
    }

-    prim.reset(new softmax_forward(prim_desc));
+    prim = result.first;

    auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
    auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
--- a/src/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
+++ b/src/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
@ -1,222 +1,213 @@
-//// Copyright (C) 2018-2021 Intel Corporation
-//// SPDX-License-Identifier: Apache-2.0
-////
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
-//#include <ngraph_functions/builders.hpp>
-//#include "test_utils/cpu_test_utils.hpp"
-//
-//using namespace InferenceEngine;
-//using namespace CPUTestUtils;
-//
-//namespace CPULayerTestsDefinitions {
-//using ShapesDefenition = std::pair<std::vector<ngraph::PartialShape>, std::vector<std::vector<ngraph::Shape>>>;
-//
-//struct SoftMaxConfig {
-//    ShapesDefenition  inputShapes;
-//    size_t axis;
-//};
-//
-//typedef std::tuple<
-//    InferenceEngine::Precision,         // netPrecision
-//    SoftMaxConfig,                      // softmaxTestConfig
-//    std::string,                        // targetDevice
-//    CPUSpecificParams
-//> softmaxCPUTestParams;
-//
-//class SoftMaxLayerCPUTest : public testing::WithParamInterface<softmaxCPUTestParams>,
-//                            virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
-//public:
-//    static std::string getTestCaseName(const testing::TestParamInfo<softmaxCPUTestParams>& obj) {
-//        CPUSpecificParams cpuParams;
-//        InferenceEngine::Precision netPrecision;
-//        SoftMaxConfig config;
-//        std::string targetDevice;
-//        std::tie(netPrecision, config, targetDevice, cpuParams) = obj.param;
-//
-//        std::ostringstream result;
-//        result << "netPRC=" << netPrecision.name() << "_";
-//        if (!config.inputShapes.first.empty()) {
-//            result << "IS=" << CommonTestUtils::partialShape2str(config.inputShapes.first) << "_";
-//        }
-//        result << "TS=";
-//        for (const auto& shape : config.inputShapes.second) {
-//            result << "(";
-//            if (!shape.empty()) {
-//                auto itr = shape.begin();
-//                do {
-//                    result << CommonTestUtils::vec2str(*itr);
-//                } while (++itr != shape.end() && result << "_");
-//            }
-//            result << ")_";
-//        }
-//        result << "axis=" << config.axis << "_";
-//        result << "trgDev=" << targetDevice;
-//        result << CPUTestsBase::getTestCaseName(cpuParams);
-//
-//        return result.str();
-//    }
-//
-//protected:
-//    void SetUp() override {
-//        InferenceEngine::Precision netPrecision;
-//        SoftMaxConfig config;
-//        CPUSpecificParams cpuParams;
-//        std::tie(netPrecision, config, targetDevice, cpuParams) = this->GetParam();
-//
-//        inPrc = outPrc = netPrecision;
-//
-//        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
-//        if (selectedType.empty()) {
-//            selectedType = getPrimitiveType();
-//        }
-//        selectedType.push_back('_');
-//        selectedType += inPrc.name();
-//
-//        const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
-//
-//        targetStaticShapes = config.inputShapes.second;
-//        inputDynamicShapes = config.inputShapes.first;
-//
-//        auto inputShape = targetStaticShapes.front().front();
-//
-//        auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
-//
-//        const auto paramOuts =
-//                ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
-//
-//        const auto softMax = std::make_shared<ngraph::opset1::Softmax>(paramOuts.at(0), config.axis);
-//
-//        function = makeNgraphFunction(ngPrc, params, softMax, "SoftMax");
-//    }
-//};
-//
-//TEST_P(SoftMaxLayerCPUTest, CompareWithRefs) {
-//    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-//
-//    Run();
-//    CheckPluginRelatedResults(executableNetwork, "Softmax");
-//}
-//
-//namespace {
-////not optimized cpu spec
-//const auto notOptimizedCPUSpec = CPUSpecificParams{{}, {}, {}, "ref_any"};
-//
-//const std::vector<SoftMaxConfig> optimizedConfigsFP32 = {
-//        //Static shapes
-//        {ShapesDefenition{{}, {{{1, 100}}}}, 1},
-//        {ShapesDefenition{{}, {{{10, 10}}}}, 1},
-//        {ShapesDefenition{{}, {{{100, 1}}}}, 0},
-//        {ShapesDefenition{{}, {{{100, 1}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 1}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 5}}}}, 2},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5}}}}, 0},
-//        {ShapesDefenition{{}, {{{5, 5, 1, 1}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 1}}}}, 2},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5}}}}, 2},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5}}}}, 3},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5}}}}, 0},
-//        {ShapesDefenition{{}, {{{5, 5, 1, 1, 1}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 1, 1}}}}, 2},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5}}}}, 2},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 1, 1}}}}, 3},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5}}}}, 3},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 1}}}}, 4},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5}}}}, 4},
-//        //Dynamic shapes
-//        {ShapesDefenition{
-//            { //dynamic shape
-//                {-1, -1}
-//            },
-//            { //target static shapes
-//                {{10, 10}},
-//                {{15, 15}},
-//                {{10, 5}}
-//            }}, 1},
-//        {ShapesDefenition{
-//            { //dynamic shape
-//                {{1, 100}, {1, 100}}
-//            },
-//            { //target static shapes
-//                {{10, 10}},
-//                {{15, 15}},
-//                {{10, 5}}
-//            }}, 1},
-//        {ShapesDefenition{
-//            { //dynamic shape
-//                {-1, -1, 1, 1, 1}
-//            },
-//            { //target static shapes
-//                {{5, 5, 1, 1, 1}},
-//                {{10, 7, 1, 1, 1}}
-//            }}, 1},
-//};
-//
-//const std::vector<SoftMaxConfig> notOptimizedConfigsFP32 {
-//        //Static shapes
-//        {ShapesDefenition{{}, {{{1, 100}}}}, 0},
-//        {ShapesDefenition{{}, {{{10, 10}}}}, 0},
-//        {ShapesDefenition{{}, {{{10, 10, 10}}}}, 0},
-//        {ShapesDefenition{{}, {{{10, 10, 10}}}}, 1},
-//        //Dynamic shapes
-//        {ShapesDefenition{
-//            { //dynamic shape
-//                {-1, -1}
-//            },
-//            { //target static shapes
-//                {{10, 1}}, {{15, 15}}, {{10, 5}}
-//            }}, 0},
-//        {ShapesDefenition{
-//            { //dynamic shape
-//                {{1, 100}, {1, 100}, -1}
-//            },
-//            { //target static shapes
-//                {{10, 10, 10}}, {{10, 10, 1}}, {{10, 5, 10}}
-//            }}, 1},
-//};
-//
-//const std::vector<SoftMaxConfig> unsupportedConfigsFP32 {
-//        //Static shapes
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5, 5}}}}, 0},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5, 5}}}}, 1},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5, 5}}}}, 2},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5, 5}}}}, 3},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5, 5}}}}, 4},
-//        {ShapesDefenition{{}, {{{5, 5, 5, 5, 5, 5}}}}, 5},
-//        //Dynamic shapes
-//        {ShapesDefenition{
-//            { //dynamic shape
-//                {-1, -1, -1, -1, -1, -1}
-//            },
-//            { //target static shapes
-//                {{5, 5, 5, 5, 5, 5}}, {{7, 7, 7, 7, 7, 7}}
-//            }}, 4},
-//};
-//
-//const auto OptimizedParams = testing::Combine(
-//        testing::Values(Precision::FP32, Precision::BF16),
-//        testing::ValuesIn(optimizedConfigsFP32),
-//        testing::Values(CommonTestUtils::DEVICE_CPU),
-//        testing::Values(emptyCPUSpec));
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Optimized_CPU, SoftMaxLayerCPUTest, OptimizedParams, SoftMaxLayerCPUTest::getTestCaseName);
-//
-//const auto NotOptimizedParams = testing::Combine(
-//        testing::Values(Precision::FP32, Precision::BF16),
-//        testing::ValuesIn(notOptimizedConfigsFP32),
-//        testing::Values(CommonTestUtils::DEVICE_CPU),
-//        testing::Values(notOptimizedCPUSpec));
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_CPU, SoftMaxLayerCPUTest, NotOptimizedParams, SoftMaxLayerCPUTest::getTestCaseName);
-//
-//const auto UnsupportedParams = testing::Combine(
-//        testing::Values(Precision::FP32, Precision::BF16),
-//        testing::ValuesIn(unsupportedConfigsFP32),
-//        testing::Values(CommonTestUtils::DEVICE_CPU),
-//        testing::Values(notOptimizedCPUSpec));
-//
-//INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Unsupported_CPU, SoftMaxLayerCPUTest, UnsupportedParams, SoftMaxLayerCPUTest::getTestCaseName);
-//
-//} // namespace
-//} // namespace CPULayerTestsDefinitions
+
+#include <ngraph_functions/builders.hpp>
+
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+using namespace ov::test;
+
+namespace CPULayerTestsDefinitions {
+
+struct SoftMaxConfig {
+    ov::test::InputShape inputShape;
+    size_t axis;
+};
+
+typedef std::tuple<ElementType,    // netPrecision
+                   SoftMaxConfig,  // softmaxTestConfig
+                   std::string,    // targetDevice
+                   CPUSpecificParams>
+    softmaxCPUTestParams;
+
+class SoftMaxLayerCPUTest : public testing::WithParamInterface<softmaxCPUTestParams>,
+                            virtual public SubgraphBaseTest,
+                            public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<softmaxCPUTestParams>& obj) {
+        CPUSpecificParams cpuParams;
+        ElementType inType;
+        SoftMaxConfig config;
+        std::string targetDevice;
+        std::tie(inType, config, targetDevice, cpuParams) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << inType << "_";
+        result << "IS=" << CommonTestUtils::partialShape2str({config.inputShape.first}) << "_";
+        result << "TS=";
+        for (const auto& shape : config.inputShape.second) {
+            result << "(";
+            result << CommonTestUtils::vec2str(shape);
+            result << ")_";
+        }
+        result << "axis=" << config.axis << "_";
+        result << "trgDev=" << targetDevice;
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() override {
+        ElementType inType;
+        SoftMaxConfig config;
+        CPUSpecificParams cpuParams;
+        std::tie(inType, config, targetDevice, cpuParams) = this->GetParam();
+
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        if (selectedType.empty()) {
+            selectedType = getPrimitiveType();
+        }
+
+        if (inType == ElementType::bf16) {
+            rel_threshold = 1e-2f;
+        }
+        selectedType = makeSelectedTypeStr(selectedType, inType);
+        init_input_shapes({config.inputShape});
+        auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes);
+
+        const auto paramOuts =
+            ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
+
+        const auto softMax = std::make_shared<ngraph::opset1::Softmax>(paramOuts.at(0), config.axis);
+
+        function = makeNgraphFunction(inType, params, softMax, "SoftMax");
+    }
+};
+
+TEST_P(SoftMaxLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    run();
+    CheckPluginRelatedResults(executableNetwork, "Softmax");
+}
+
+namespace {
+// not optimized cpu spec
+const auto notOptimizedCPUSpec = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"};
+
+const std::vector<SoftMaxConfig> optimizedConfigsFP32 = {
+    // Static shapes
+    {ov::test::InputShape{ov::PartialShape{1, 100}, {ov::Shape{1, 100}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{10, 10}, {ov::Shape{10, 10}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{100, 1}, {ov::Shape{100, 1}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{100, 1}, {ov::Shape{100, 1}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 1}, {ov::Shape{5, 5, 1}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5}, {ov::Shape{5, 5, 5}}}, 2},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 1, 1}, {ov::Shape{5, 5, 1, 1}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 1}, {ov::Shape{5, 5, 5, 1}}}, 2},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5}}}, 2},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5}}}, 3},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 1, 1, 1}, {ov::Shape{5, 5, 1, 1, 1}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 1, 1}, {ov::Shape{5, 5, 5, 1, 1}}}, 2},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5}}}, 2},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 1, 1}, {ov::Shape{5, 5, 5, 1, 1}}}, 3},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5}}}, 3},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 1}, {ov::Shape{5, 5, 5, 5, 1}}}, 4},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5}}}, 4},
+    // Dynamic shapes
+    {ov::test::InputShape{// dynamic shape
+                          ov::PartialShape{-1, -1},
+                          {// target static shapes
+                           ov::Shape{10, 10},
+                           ov::Shape{15, 15},
+                           ov::Shape{10, 10},
+                           ov::Shape{10, 5}}},
+     1},
+    {ov::test::InputShape{// dynamic shape
+                          ov::PartialShape{-1, -1, 1, 1, 1},
+                          {// target static shapes
+                           ov::Shape{5, 5, 1, 1, 1},
+                           ov::Shape{10, 7, 1, 1, 1},
+                           ov::Shape{5, 5, 1, 1, 1}}},
+     1},
+};
+
+const std::vector<SoftMaxConfig> notOptimizedConfigsFP32{
+    // Static shapes
+    {ov::test::InputShape{ov::PartialShape{1, 100}, {ov::Shape{1, 100}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{10, 10}, {ov::Shape{10, 10}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{10, 10, 10}, {ov::Shape{10, 10, 10}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{10, 10, 10}, {ov::Shape{10, 10, 10}}}, 1},
+    // Dynamic shapes
+    {ov::test::InputShape{// dynamic shape
+                          ov::PartialShape{-1, -1},
+                          {// target static shapes
+                           ov::Shape{10, 1},
+                           ov::Shape{15, 15},
+                           ov::Shape{10, 5},
+                           ov::Shape{15, 15}}},
+     0},
+    {ov::test::InputShape{// dynamic shape
+                          ov::PartialShape{ov::Dimension{1, 100}, ov::Dimension{1, 100}, -1},
+                          {// target static shapes
+                           ov::Shape{10, 10, 10},
+                           ov::Shape{10, 10, 1},
+                           ov::Shape{10, 5, 10},
+                           ov::Shape{10, 10, 1}}},
+     1},
+};
+
+const std::vector<SoftMaxConfig> unsupportedConfigsFP32{
+    // Static shapes
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5, 5}}}, 0},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5, 5}}}, 1},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5, 5}}}, 2},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5, 5}}}, 3},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5, 5}}}, 4},
+    {ov::test::InputShape{ov::PartialShape{5, 5, 5, 5, 5, 5}, {ov::Shape{5, 5, 5, 5, 5, 5}}}, 5},
+    // Dynamic shapes
+    {ov::test::InputShape{// dynamic shape
+                          ov::PartialShape{-1, -1, -1, -1, -1, -1},
+                          {// target static shapes
+                           ov::Shape{5, 5, 5, 5, 5, 5},
+                           ov::Shape{7, 7, 7, 7, 7, 7},
+                           ov::Shape{5, 5, 5, 5, 5, 5}}},
+     4},
+};
+
+const auto avx512 = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
+const auto avx2 = CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
+const auto sse42 = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
+const auto ref = CPUSpecificParams{{}, {}, {"ref_any"}, "ref_any"};
+
+const std::vector<CPUSpecificParams> vecCpuConfigs = {ref, sse42, avx2, avx512};
+const auto OptimizedParams = testing::Combine(testing::Values(ElementType::f32, ElementType::bf16),
+                                              testing::ValuesIn(optimizedConfigsFP32),
+                                              testing::Values(CommonTestUtils::DEVICE_CPU),
+                                              testing::ValuesIn(filterCPUInfoForDevice(vecCpuConfigs)));
+
+INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Optimized_CPU,
+                         SoftMaxLayerCPUTest,
+                         OptimizedParams,
+                         SoftMaxLayerCPUTest::getTestCaseName);
+
+const auto NotOptimizedParams = testing::Combine(testing::Values(ElementType::f32, ElementType::bf16),
+                                                 testing::ValuesIn(notOptimizedConfigsFP32),
+                                                 testing::Values(CommonTestUtils::DEVICE_CPU),
+                                                 testing::Values(notOptimizedCPUSpec));
+
+INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_CPU,
+                         SoftMaxLayerCPUTest,
+                         NotOptimizedParams,
+                         SoftMaxLayerCPUTest::getTestCaseName);
+
+const auto UnsupportedParams = testing::Combine(testing::Values(ElementType::f32, ElementType::bf16),
+                                                testing::ValuesIn(unsupportedConfigsFP32),
+                                                testing::Values(CommonTestUtils::DEVICE_CPU),
+                                                testing::Values(notOptimizedCPUSpec));
+
+INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Unsupported_CPU,
+                         SoftMaxLayerCPUTest,
+                         UnsupportedParams,
+                         SoftMaxLayerCPUTest::getTestCaseName);
+
+}  // namespace
+}  // namespace CPULayerTestsDefinitions