Fix ConcatConvSumInPlaceTest

2022-05-19 02:55:58 +03:00
parent f5ea549d97
commit c20d762af8
6 changed files with 172 additions and 180 deletions
--- a/src/plugins/intel_cpu/src/nodes/conv.cpp
+++ b/src/plugins/intel_cpu/src/nodes/conv.cpp
@@ -553,6 +553,7 @@ void Convolution::setPostOps(dnnl::primitive_attr &attr, const VectorDims &dims,
        }

        if (auto* fakeQuantizeNode = dynamic_cast<FakeQuantize *>(node.get())) {
+            const Dim OC = dims[1];
            if (i == 0) {
                bool hasSubsequentSum = false;
                bool hasSubsequentFQ = false;
@@ -576,7 +577,6 @@ void Convolution::setPostOps(dnnl::primitive_attr &attr, const VectorDims &dims,
                    std::vector<float> fqScale = fakeQuantizeNode->getFQScales();
                    if (!fqScale.empty()) {
                        size_t size = fqScale.size();
-                        size_t OC = getOutputShapeAtPort(0).getStaticDims()[1];
                        if (size == 1) {
                            fqScale.resize(OC);
                            for (size_t k = 0; k < OC; k++)
@@ -603,7 +603,6 @@ void Convolution::setPostOps(dnnl::primitive_attr &attr, const VectorDims &dims,
                            std::vector<float> outScale = isc;
                            if (!outScale.empty()) {
                                size_t size = outScale.size();
-                                size_t OC = getOutputShapeAtPort(0).getStaticDims()[1];
                                if (size == 1) {
                                    outScale.resize(OC);
                                    for (size_t k = 0; k < OC; k++)
@@ -638,7 +637,6 @@ void Convolution::setPostOps(dnnl::primitive_attr &attr, const VectorDims &dims,
                            std::vector<float> outScale = isc;
                            if (!outScale.empty()) {
                                size_t size = outScale.size();
-                                size_t OC = getOutputShapeAtPort(0).getStaticDims()[1];
                                if (size == 1) {
                                    outScale.resize(OC);
                                    for (size_t k = 0; k < OC; k++)
--- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp
@@ -2075,19 +2075,10 @@ void Eltwise::fuseInto(NodePtr& parentNode) {
                                    || parentNode->getType() == Type::BinaryConvolution)
                                        && getAlgorithm() == Algorithm::EltwiseAdd &&
            dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims());
-    if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
+    if ((scales.empty() && shifts.empty()) &&
+        !specialConvolutionAddFusing &&
+        canBePerformedAsScaleShift(parentNode.get())) {
        std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
-        if ((parentNode->getType() == Type::FullyConnected
-                || parentNode->getType() == Type::MatMul)
-            && one_of(getAlgorithm(), Algorithm::EltwiseAdd,
-                                      Algorithm::EltwiseSubtract,
-                                      Algorithm::EltwiseMultiply,
-                                      Algorithm::EltwiseDivide,
-                                      Algorithm::EltwiseMulAdd,
-                                      Algorithm::EltwisePowerStatic,
-                                       Algorithm::EltwisePrelu)) {
-            std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
-        }
    }
    Node::fuseInto(parentNode);
 }
--- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp
@@ -1750,18 +1750,16 @@ void FakeQuantize::initializePostOpData(const VectorDims &dims, const size_t buf
    if (getAlgorithm() == Algorithm::FQBinarization) {
        const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0];
        const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment);
-        if (!isPostOpDataInitialized) {
-            binarizationThresholds.resize(axisPaddedSize, 0);
-            binarizationOutputMask.resize(axisPaddedSize, 0);
+        binarizationThresholds.resize(axisPaddedSize, 0);
+        binarizationOutputMask.resize(axisPaddedSize, 0);

-            if (isInputLowBroadcasted) {
-                std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
-                std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
-            }
-            if (isOutputHighBroadcasted) {
-                std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
-                std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
-            }
+        if (isInputLowBroadcasted) {
+            std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
+            std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
+        }
+        if (isOutputHighBroadcasted) {
+            std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
+            std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
        }
    } else {
        if (cropLow.size() > 1)
@@ -1789,25 +1787,25 @@ void FakeQuantize::initializePostOpData(const VectorDims &dims, const size_t buf
 }

 void FakeQuantize::initializePostOpDataLegacy(const VectorDims &dims, const size_t bufferAlignment) {
-    if (isPostOpDataInitialized)
+    if (isLegacyPostOpDataInitialized)
        return;

    if (getAlgorithm() == Algorithm::FQBinarization) {
        const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0];
        const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment);
-        if (!isPostOpDataInitialized) {
-            binarizationThresholds.resize(axisPaddedSize, 0);
-            binarizationOutputMask.resize(axisPaddedSize, 0);

-            if (isInputLowBroadcasted) {
-                std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
-                std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
-            }
-            if (isOutputHighBroadcasted) {
-                std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
-                std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
-            }
+        binarizationThresholds.resize(axisPaddedSize, 0);
+        binarizationOutputMask.resize(axisPaddedSize, 0);
+
+        if (isInputLowBroadcasted) {
+            std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
+            std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
        }
+        if (isOutputHighBroadcasted) {
+            std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
+            std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
+        }
+
    } else {
        quantizationData.insert(quantizationData.end(), cropLow.begin(), cropLow.end());
        quantizationData.insert(quantizationData.end(), cropHigh.begin(), cropHigh.end());
@@ -1821,7 +1819,7 @@ void FakeQuantize::initializePostOpDataLegacy(const VectorDims &dims, const size
        quantizationData.resize(quantizationDataSize + bufferPaddingSize, 0);
    }

-    isPostOpDataInitialized = true;
+    isLegacyPostOpDataInitialized = true;
 }

 void FakeQuantize::appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector<MemoryPtr>& postOpsMem) {
--- a/src/plugins/intel_cpu/src/nodes/fake_quantize.h
+++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.h
@@ -201,8 +201,11 @@ private:

    std::vector<float> fqScales;

-    // onednn style post ops data representation
+
    bool isPostOpDataInitialized = false;
+    bool isLegacyPostOpDataInitialized = false;
+
+    // onednn style post ops data representation
    dnnl::impl::shifts_t<float> cropLowData;
    dnnl::impl::shifts_t<float> cropHighData;
    dnnl::impl::scales_t inputScaleData;
--- a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp
+++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp
@@ -134,13 +134,13 @@ protected:
    const size_t _convOutChannels = 64;
 };

-// TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) {
-//     SKIP_IF_CURRENT_TEST_IS_DISABLED()
+TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()

-//     run();
+    run();

-//     CheckPluginRelatedResults(compiledModel, "Convolution");
-// }
+    CheckPluginRelatedResults(compiledModel, "Convolution");
+}

 class ConcatConvSumInPlaceTestInt8 : public ConcatConvSumInPlaceTest {
 public:
@@ -200,155 +200,155 @@ public:
    }
 };

-// TEST_P(ConcatConvSumInPlaceTestInt8, CompareWithRefs) {
-//     SKIP_IF_CURRENT_TEST_IS_DISABLED()
+TEST_P(ConcatConvSumInPlaceTestInt8, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()

-//     run();
+    run();

-//     CheckPluginRelatedResults(compiledModel, "Convolution");
-// }
+    CheckPluginRelatedResults(compiledModel, "Convolution");
+}

-//namespace {
-// const auto fusingMulAddFQMullAdd = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-//         {[](postNodeConfig& cfg) {
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
-//             return std::make_shared<ngraph::opset1::Multiply>(cfg.input, constNode);
-//         }, "Multiply(PerChannel)"},
-//         {[](postNodeConfig& cfg) {
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
-//             return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
-//         }, "Add(PerChannel)"},
-//         {[](postNodeConfig& cfg){
-//             auto localPrc = cfg.input->get_element_type();
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
-//         }, "FakeQuantize(PerChannel)"},
-//         {[](postNodeConfig& cfg) {
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
-//             return std::make_shared<ngraph::opset1::Multiply>(cfg.input, constNode);
-//         }, "Multiply(PerChannel)"},
-//         {[](postNodeConfig& cfg) {
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
-//             return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
-//         }, "Add(PerChannel)"}}), {"Add"} };
+namespace {
+const auto fusingMulAddFQMullAdd = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](postNodeConfig& cfg) {
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Multiply>(cfg.input, constNode);
+        }, "Multiply(PerChannel)"},
+        {[](postNodeConfig& cfg) {
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
+        }, "Add(PerChannel)"},
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"},
+        {[](postNodeConfig& cfg) {
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Multiply>(cfg.input, constNode);
+        }, "Multiply(PerChannel)"},
+        {[](postNodeConfig& cfg) {
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Add>(cfg.input, constNode);
+        }, "Add(PerChannel)"}}), {"Add"} };

-// const auto fusingDivSubFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-//         {[](postNodeConfig& cfg){
-//             ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.input);
-//             auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector<float>{}, true);
-//             return std::make_shared<ngraph::opset1::Divide>(cfg.input, secondMultInput);
-//         }, "Divide(PerChannel)"},
-//         {[](postNodeConfig& cfg){
-//             ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.input);
-//             auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector<float>{}, true);
-//             return std::make_shared<ngraph::opset1::Subtract>(cfg.input, secondMultInput);
-//         }, "Subtract(PerChannel)"},
-//         {[](postNodeConfig& cfg){
-//             auto localPrc = cfg.input->get_element_type();
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
-//         }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };
+const auto fusingDivSubFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](postNodeConfig& cfg){
+            ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.input);
+            auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Divide>(cfg.input, secondMultInput);
+        }, "Divide(PerChannel)"},
+        {[](postNodeConfig& cfg){
+            ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.input);
+            auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector<float>{}, true);
+            return std::make_shared<ngraph::opset1::Subtract>(cfg.input, secondMultInput);
+        }, "Subtract(PerChannel)"},
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };

-// const auto fusingSigmoidFQFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-//         {[](postNodeConfig& cfg){
-//             return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sigmoid);
-//         }, "Sigmoid"},
-//         {[](postNodeConfig& cfg){
-//             auto localPrc = cfg.input->get_element_type();
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
-//         }, "FakeQuantize(PerChannel)"},
-//         {[](postNodeConfig& cfg){
-//             auto localPrc = cfg.input->get_element_type();
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
-//         }, "FakeQuantize(PerChannel)"}}), {"Sigmoid", "FakeQuantize", "FakeQuantize"} };
+const auto fusingSigmoidFQFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](postNodeConfig& cfg){
+            return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sigmoid);
+        }, "Sigmoid"},
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"},
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"}}), {"Sigmoid", "FakeQuantize", "FakeQuantize"} };

-// const auto fusingClampFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
-//         {[](postNodeConfig& cfg){
-//             return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
-//         }, "Clamp"},
-//         {[](postNodeConfig& cfg){
-//             auto localPrc = cfg.input->get_element_type();
-//             ngraph::Shape newShape = generatePerChannelShape(cfg.input);
-//             return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
-//         }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };
+const auto fusingClampFQ = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
+        {[](postNodeConfig& cfg){
+            return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Clamp, {}, {3.0f, 6.0f});
+        }, "Clamp"},
+        {[](postNodeConfig& cfg){
+            auto localPrc = cfg.input->get_element_type();
+            ngraph::Shape newShape = generatePerChannelShape(cfg.input);
+            return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape);
+        }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} };



-// const std::vector<fusingSpecificParams> fusingParamsSet{
-//         emptyFusingSpec,
-//         fusingSigmoid,
-//         fusingFakeQuantizePerTensorRelu,
-//         fusingFakeQuantizePerChannelRelu,
-//         fusingFQPerChannelSigmoidFQPerChannel,
-//         fusingReluScaleShift,
-//         fusingMulAddFQMullAdd,
-//         fusingSigmoidFQFQ,
-//         fusingDivSubFQ
-// };
+const std::vector<fusingSpecificParams> fusingParamsSet{
+        emptyFusingSpec,
+        fusingSigmoid,
+        fusingFakeQuantizePerTensorRelu,
+        fusingFakeQuantizePerChannelRelu,
+        fusingFQPerChannelSigmoidFQPerChannel,
+        fusingReluScaleShift,
+        fusingMulAddFQMullAdd,
+        fusingSigmoidFQFQ,
+        fusingDivSubFQ
+};

-// const std::vector<fusingSpecificParams> fusingParamsSetBF16{
-//         emptyFusingSpec,
-//         fusingSigmoid,
-//         fusingReluScaleShift
-// };
+const std::vector<fusingSpecificParams> fusingParamsSetBF16{
+        emptyFusingSpec,
+        fusingSigmoid,
+        fusingReluScaleShift
+};

-// InputShape convInpShape = {
-//         //dynamic shapes
-//         {-1, 32, -1, -1},
-//         { //target static shapes
-//             {1, 32, 10, 10},
-//             {1, 32, 10, 10},
-//             {1, 32, 10, 10},
-//             {1, 32, 3, 3},
-//             {1, 32, 3, 10}
-//         }
-// };
+InputShape convInpShape = {
+        //dynamic shapes
+        {-1, 32, -1, -1},
+        { //target static shapes
+            {1, 32, 10, 10},
+            {1, 32, 10, 10},
+            {1, 32, 10, 10},
+            {1, 32, 3, 3},
+            {1, 32, 3, 10}
+        }
+};

-// InputShape secondInp = {
-//         //dynamic shapes
-//         {-1, -1, -1, -1},
-//         { //target static shapes
-//             {1, 64, 1, 8},
-//             {1, 64, 1, 8},
-//             {1, 64, 8, 8},
-//             {1, 64, 8, 8},
-//             {1, 64, 8, 1}
-//         }
-// };
+InputShape secondInp = {
+        //dynamic shapes
+        {-1, -1, -1, -1},
+        { //target static shapes
+            {1, 64, 1, 8},
+            {1, 64, 1, 8},
+            {1, 64, 8, 8},
+            {1, 64, 8, 8},
+            {1, 64, 8, 1}
+        }
+};

-// TODO lc: crash
-// INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_FP32, ConcatConvSumInPlaceTest,
-//                          ::testing::Combine(
-//                                  ::testing::Values(convInpShape),
-//                                  ::testing::Values(secondInp),
-//                                  ::testing::Values(true, false),
-//                                  ::testing::ValuesIn(fusingParamsSet),
-//                                  ::testing::Values(cpuEmptyPluginConfig)),
-//                          ConcatConvSumInPlaceTest::getTestCaseName);
+//TODO lc: crash
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_FP32, ConcatConvSumInPlaceTest,
+                         ::testing::Combine(
+                                 ::testing::Values(convInpShape),
+                                 ::testing::Values(secondInp),
+                                 ::testing::Values(true, false),
+                                 ::testing::ValuesIn(fusingParamsSet),
+                                 ::testing::Values(cpuFP32PluginConfig)),
+                         ConcatConvSumInPlaceTest::getTestCaseName);

-// INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_BF16, ConcatConvSumInPlaceTest,
-//                          ::testing::Combine(
-//                                  ::testing::Values(convInpShape),
-//                                  ::testing::Values(secondInp),
-//                                  ::testing::Values(true, false),
-//                                  ::testing::ValuesIn(fusingParamsSetBF16),
-//                                  ::testing::Values(cpuBF16PluginConfig)),
-//                          ConcatConvSumInPlaceTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_BF16, ConcatConvSumInPlaceTest,
+                         ::testing::Combine(
+                                 ::testing::Values(convInpShape),
+                                 ::testing::Values(secondInp),
+                                 ::testing::Values(true, false),
+                                 ::testing::ValuesIn(fusingParamsSetBF16),
+                                 ::testing::Values(cpuBF16PluginConfig)),
+                         ConcatConvSumInPlaceTest::getTestCaseName);

-// INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_INT8, ConcatConvSumInPlaceTestInt8,
-//                          ::testing::Combine(
-//                                  ::testing::Values(convInpShape),
-//                                  ::testing::Values(secondInp),
-//                                  ::testing::Values(true, false),
-//                                  ::testing::ValuesIn(fusingParamsSet),
-//                                  ::testing::Values(cpuEmptyPluginConfig)),
-//                          ConcatConvSumInPlaceTest::getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_INT8, ConcatConvSumInPlaceTestInt8,
+                         ::testing::Combine(
+                                 ::testing::Values(convInpShape),
+                                 ::testing::Values(secondInp),
+                                 ::testing::Values(true, false),
+                                 ::testing::ValuesIn(fusingParamsSet),
+                                 ::testing::Values(cpuEmptyPluginConfig)),
+                         ConcatConvSumInPlaceTest::getTestCaseName);

-//} // namespace
+} // namespace
 } // namespace SubgraphTestsDefinitions
--- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
+++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
@@ -162,6 +162,8 @@ protected:
 // common parameters
 const auto emptyCPUSpec = CPUSpecificParams{{}, {}, {}, {}};
 const std::map<std::string, std::string> cpuEmptyPluginConfig;
+const std::map<std::string, std::string> cpuFP32PluginConfig =
+        { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } };
 const std::map<std::string, std::string> cpuBF16PluginConfig =
        { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES } };