[CPU] Reorganize function tests. Remove legacy bfloat16 tests (#17130)

2023-04-25 07:32:54 +02:00 · 2023-04-25 07:32:54 +02:00 · 478725c719
commit 478725c719
parent e79db660ce
32 changed files with 0 additions and 4778 deletions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/bf16_network_restoring.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/bf16_network_restoring.cpp
@ -1,213 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class BF16NetworkRestore1 : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //   +   Power1(FP32)
-        //        |
-        //   +  AvgPooling1(BF16)
-        //        |
-        //   + Convolution1(BF16)
-        //        |
-        //   +    ReLU1(Fused)
-        //        |------------------------
-        //        |                        \
-        //   +   Convolution2(BF16)      Convolution 3 (BF16)
-        //           |                     /              \
-        //   +        |                  ReLU2(FP32)     Normalize (FP32)
-        //            \              /                      |
-        //              Eltwise (Fused to Conv2)     ------/
-        //                |                         /
-        //              ReLU3  (Fused to Conv2)   /
-        //                |                     /
-        //             MaxPooling1 (BF16)      /
-        //                   \            /
-        //                      Eltwise
-        //                         |
-
-
-        // STAGE1: construction of the GRAPH
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{1, 3, 224, 224});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Power1");
-
-        // AvgPooling
-        auto avgpoolNode = std::make_shared<opset1::AvgPool>(addNode,
-                                                             Strides{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{2, 2},
-                                                             true,
-                                                             op::RoundingType::FLOOR);
-        avgpoolNode->set_friendly_name("AvgPooling1");
-
-        // convolution1
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { 3, 3, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(3 * 3 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(3 * 3 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            avgpoolNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution1");
-
-        // ReLU1
-        auto reluNode = std::make_shared<opset1::Relu>(convNode1);
-        reluNode->set_friendly_name("ReLU1");
-
-        // convolution2
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            reluNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("Convolution2");
-
-        // convolution3
-        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
-            reluNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode3->set_friendly_name("Convolution3");
-
-        // ReLU1
-        auto reluNode2 = std::make_shared<opset1::Relu>(convNode3);
-        reluNode2->set_friendly_name("ReLU2");
-
-        // Norm1
-        // normalize
-        const auto axes = make_shared<op::Constant>(element::i64, Shape{2}, vector<int64_t>{2});
-        float eps{1e-6f};
-        auto eps_mode = op::EpsMode::ADD;
-
-        auto normNode =  std::make_shared<opset1::NormalizeL2>(convNode3, axes, eps, eps_mode);
-        normNode->set_friendly_name("Norm1");
-
-
-
-        // Eltwise1
-        auto eltNode1 = std::make_shared<opset1::Add>(convNode2, reluNode2);
-        eltNode1->set_friendly_name("Eltwise1");
-
-        // ReLU3
-        auto reluNode3 = std::make_shared<opset1::Relu>(eltNode1);
-        reluNode3->set_friendly_name("ReLU3");
-
-        // maxPooling1
-        auto maxPoolNode = std::make_shared<opset1::MaxPool>(reluNode3,
-                                                             Strides{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{0, 0},
-                                                             Shape{2, 2},
-                                                             op::RoundingType::FLOOR);
-        maxPoolNode->set_friendly_name("maxPooling1");
-
-        // Eltwise2
-        auto eltNode2 = std::make_shared<opset1::Add>(maxPoolNode, normNode);
-        eltNode2->set_friendly_name("Eltwise2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode2}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        threshold = 0.4f;  // max value in the latest tensor for FP32 network is 10.83
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Power1"] = "FP32";
-        expectedPrecisions["AvgPooling1"] = "BF16";
-        expectedPrecisions["Convolution1"] = "BF16";
-        expectedPrecisions["ReLU1"] = "ndef";
-        expectedPrecisions["Convolution2"] = "BF16";
-        expectedPrecisions["Convolution3"] = "BF16";
-        expectedPrecisions["ReLU2"] = "BF16";
-        expectedPrecisions["Norm1"] = "BF16";
-        expectedPrecisions["Eltwise1"] = "ndef";
-        expectedPrecisions["ReLU3"] = "ndef";
-        expectedPrecisions["maxPooling1"] = "BF16";
-        expectedPrecisions["Eltwise2"] = "BF16";
-    }
-};
-
-TEST_P(BF16NetworkRestore1, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, BF16NetworkRestore1,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 224, 224 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        BF16NetworkRestore1::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/bfloat16_helpers.hpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/bfloat16_helpers.hpp
@ -1,260 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <inference_engine.hpp>
-#include "ie_common.h"
-#include <ie_blob.h>
-#include <math.h>
-#include <map>
-#include <string>
-#include <utility>
-#include <memory>
-#include <tuple>
-#include <vector>
-
-#include "ngraph/opsets/opset1.hpp"
-#include "shared_test_classes/base/layer_test_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-#include "functional_test_utils/blob_utils.hpp"
-#include <ie_system_conf.h>
-
-namespace LayerTestsDefinitions {
-
-/**
- * class providing static helpers for bfloat16 functional tests
- * using functions you can fill the tensor content by some periodic law or compare
- *
- */
-class BFloat16Helpers {
-public:
-    static std::pair<std::string, std::string> matchPerfCountPrecisionVsExpected(
-        const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfCounts,
-        const std::map<std::string, std::string>& expected) {
-        for (auto e : expected) {
-            auto it = perfCounts.find(e.first);
-            if (it == perfCounts.end()) {
-                return std::pair<std::string, std::string>(e.first, "NOT_FOUND_IN_PERF_COUNTS");
-            }
-            // get the latest n symbols by number of e.second
-            std::string execType = it->second.exec_type;
-            std::string pfPrecision = execType.substr(execType.length() - e.second.length(), e.second.length());
-            if (pfPrecision != e.second) {
-                return std::pair<std::string, std::string>(e.first, pfPrecision);
-            }
-        }
-        return std::pair<std::string, std::string>("", "");
-    }
-
-    static float getMaxAbsValue(const float* data, size_t size) {
-        float maxVal = 0.f;
-        for (size_t i = 0; i < size; i++) {
-            if (fabs(data[i]) > maxVal) {
-                maxVal = fabs(data[i]);
-            }
-        }
-        return maxVal;
-    }
-
-    static float reducePrecisionBitwise(const float in) {
-        float f = in;
-        int* i = reinterpret_cast<int*>(&f);
-        int t2 = *i & 0xFFFF0000;
-        float ft1;
-        memcpy(&ft1, &t2, sizeof(float));
-        if ((*i & 0x8000) && (*i & 0x007F0000) != 0x007F0000) {
-            t2 += 0x10000;
-            memcpy(&ft1, &t2, sizeof(float));
-        }
-        return ft1;
-    }
-
-    static short reducePrecisionBitwiseS(const float in) {
-        float f = reducePrecisionBitwise(in);
-        int intf;
-        memcpy(&intf, &f, sizeof(int));
-        intf = intf >> 16;
-        short s = intf;
-        return s;
-    }
-};
-
-
-typedef std::tuple<
-                   InferenceEngine::Precision,
-                   InferenceEngine::Precision,
-                   InferenceEngine::SizeVector,
-                   InferenceEngine::SizeVector,
-                   std::string> basicParams;
-
-
-/**
- * Base class for bf16 tests
- * the flow in this test assume to load network in FP32 and in BF16 modes and verify
- * 1. difference between outptut's tensor with some treshold.
- * 2. which preciosion was selected for layers described in runtime info of performance counters
- *
- * To develop new test you need to
- * 1. define class inherriten from  BasicBF16Test and implement SetUp(). For example:
- *
- * class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
- * protected:
- * void SetUp() override {
- *  fnPtr = std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
-
-        // STAGE1:
-        threshold = 9e-1;
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_4"] = "FP32";
-        expectedPrecisions["Convolution_6"] = "BF16";
-        expectedPrecisions["Convolution_7"] = "BF16";
-        expectedPrecisions["Add_8"] = "ndef";
- *      expectedPrecisions["Convolution_10"] = "BF16";
- *      }
- *      };
- *
- *  2. define test
- *  TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
-    test();
-};
- *  3. INSTANTIATE_TEST_SUITE_P(smoke_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x3_Eltwise::getTestCaseName);
-
- *
- * In 3rd stage do not forget bfloat16 preffix!
- */
-class BasicBF16Test : public testing::WithParamInterface<basicParams>,
-                      public CommonTestUtils::TestsCommon {
-protected:
-    virtual std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) = 0;
-
-public:
-    std::shared_ptr<ngraph::Function> fnPtr;
-    std::string targetDevice;
-    InferenceEngine::SizeVector inputShapes, newInputShapes;
-    InferenceEngine::Precision inputPrecision, netPrecision;
-    std::map<std::string, std::string> expectedPrecisions;
-    float threshold = 2e-2f;  // Is enough for tensor having abs maximum values less than 1
-
-    static std::string getTestCaseName(testing::TestParamInfo<basicParams> obj) {
-        InferenceEngine::Precision inputPrecision, netPrecision;
-        InferenceEngine::SizeVector inputShapes, newInputShapes;
-        std::string targetDevice;
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = obj.param;
-
-        std::ostringstream result;
-        if (!newInputShapes.empty()) {
-            result << "Reshape_From=" << CommonTestUtils::vec2str(inputShapes);;
-            result << "_To=" << CommonTestUtils::vec2str(newInputShapes) << "_";
-        } else {
-            result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
-        }
-        result << "inPRC=" << inputPrecision.name() << "_";
-        result << "netPRC=" << netPrecision.name() << "_";
-        result << "targetDevice=" << targetDevice;
-        return result.str();
-    }
-
-    static void setNetInOutPrecision(InferenceEngine::CNNNetwork &cnnNet, InferenceEngine::Precision inPrc,
-                                     InferenceEngine::Precision outPrc = InferenceEngine::Precision::UNSPECIFIED) {
-        if (inPrc != InferenceEngine::Precision::UNSPECIFIED) {
-            for (const auto &inputItem : cnnNet.getInputsInfo()) {
-                inputItem.second->setPrecision(inPrc);
-            }
-        }
-        if (outPrc != InferenceEngine::Precision::UNSPECIFIED) {
-            for (const auto &output : cnnNet.getOutputsInfo()) {
-                output.second->setPrecision(outPrc);
-            }
-        }
-    }
-
-    void test() {
-        if (!InferenceEngine::with_cpu_x86_avx512_core()) {
-            // We are enabling bf16 tests on platforms with native support bfloat16, and on platforms with AVX512 ISA
-            // On platforms with AVX512 ISA but w/o native bfloat16 support computations are done via simulation mode
-            GTEST_SKIP();
-        }
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        InferenceEngine::CNNNetwork cnnNet(fnPtr);
-
-        setNetInOutPrecision(cnnNet, inputPrecision);
-        std::string inputName = cnnNet.getInputsInfo().begin()->first;
-        std::string outputName = cnnNet.getOutputsInfo().begin()->first;
-        auto ie = InferenceEngine::Core();
-        // BF16 inference
-        std::map<std::string, std::string> options;
-        if (netPrecision == InferenceEngine::Precision::FP32) {
-            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
-        } else {
-            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
-        }
-        options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
-
-        auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
-        auto req1 = exec_net1.CreateInferRequest();
-
-        InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
-        FuncTestUtils::fillInputsBySinValues(inBlob1);
-
-        req1.Infer();
-        auto outBlobBF16 = req1.GetBlob(outputName);
-        InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
-        ASSERT_NE(mout1, nullptr);
-        auto lm1 = mout1->rmap();
-
-        // FP32 infrence
-        // if netPrecision is not eq to the FP32 - change network precision and recreate network
-        InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
-        std::string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
-        std::string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
-        setNetInOutPrecision(cnnNetFP32, inputPrecision);
-        auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
-                                        { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
-        auto req2 = exec_net2.CreateInferRequest();
-
-
-        req2.SetBlob(inputNameFP32, inBlob1);
-
-        req2.Infer();
-        auto outBlobFP32 = req2.GetBlob(outputNameFP32);
-        InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
-        ASSERT_NE(mout2, nullptr);
-        auto lm2 = mout2->rmap();
-
-        // debug to figure out the maximum value in output tensors:
-        // std::cout << "Max in bfloat16 network by output " << outputName << ": " <<
-        //      BFloat16Helpers::getMaxAbsValue(lm1.as<const float *>(), mout1->size()) << std::endl;
-        // std::cout << "Max in fp32 network by output " << outputNameFP32 << ": " <<
-        //     BFloat16Helpers::getMaxAbsValue(lm2.as<const float *>(), mout2->size()) << std::endl;
-        FuncTestUtils::compareRawBuffers(lm1.as<const float *>(),
-                                         lm2.as<const float *>(),
-                                         mout1->size(), mout2->size(),
-                                         FuncTestUtils::CompareType::ABS,
-                                         threshold);
-        // Stage2: verification of performance counters
-        std::pair<std::string, std::string> wrongLayer =
-            BFloat16Helpers::matchPerfCountPrecisionVsExpected(req1.GetPerformanceCounts(), expectedPrecisions);
-        if (wrongLayer.first != std::string("")) {
-            std::string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
-            std::string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
-            ASSERT_EQ(layerInPerfCounts, layerExpected);
-        }
-        fnPtr.reset();
-    }
-};
-
-}  // namespace LayerTestsDefinitions
-
-
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/concat_in_place.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/concat_in_place.cpp
@ -1,160 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-namespace {
-    static const size_t inputSize = 2, concatAxe = 1;
-    static std::vector<SizeVector> paramVector = {
-            SizeVector({ 1, 1, inputSize, inputSize }),
-            SizeVector({ 1, 2, inputSize, inputSize }),
-            SizeVector({ 1, 3, inputSize, inputSize }),
-            SizeVector({ 1, 4, inputSize, inputSize }),
-            SizeVector({ 1, 5, inputSize, inputSize }),
-            SizeVector({ 1, 6, inputSize, inputSize }),
-            SizeVector({ 1, 7, inputSize, inputSize }),
-            SizeVector({ 1, 8, inputSize, inputSize }),
-            SizeVector({ 1, 9, inputSize, inputSize }),
-            SizeVector({ 1, 10, inputSize, inputSize })};
-}  // namespace
-
-class Concat_in_place : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //              scaleshift
-        //             /         \
-        //           Conv      Conv
-        //             \       /
-        //              concat
-        //                |
-        //               relu
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        auto channelsCount = inputShapes[1];
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-                addNode, weightsNode,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-                addNode, weightsNode,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // Concat
-        ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
-
-        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, concatAxe);
-        concNode->set_friendly_name("CONC_1_TEST");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(concNode);
-        reluNode->set_friendly_name("RELU_1");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 10e-1;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-    }
-};
-
-TEST_P(Concat_in_place, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Concat_in_place,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::ValuesIn(paramVector),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Concat_in_place::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Concat_in_place,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::ValuesIn(paramVector),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Concat_in_place::getTestCaseName);
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_add.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_add.cpp
@ -1,142 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ConvAdd : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-//              Power (FP32)
-//                |
-//            Conv(BF16)
-//                |
-//            Eltwise (SUM)(BF16)
-//                |
-//            Conv (BF16)
-
-        auto channelsCount = inputShapes[1];
-
-        // STAGE1: construction of the GRAPH
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // add
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> eltConst0 = nullptr, eltConst1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            eltConst0 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-            eltConst1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            eltConst0 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-            eltConst1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode0 = std::make_shared<opset1::Multiply>(input1, eltConst0);
-        addNode0->set_friendly_name("Add_0");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
-                addNode0, weightsNode0,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode0->set_friendly_name("Convolution_0");
-
-        // eltwise, i.e. sum
-        auto eltSumNode = std::make_shared<opset1::Add>(convNode0, eltConst1);
-        eltSumNode->set_friendly_name("Elt_sum");
-
-        // convolution
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-                eltSumNode, weightsNode1,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution_1");
-
-        return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
-
-        // 256 channels
-        // threshold = 0.26f;  // Max in fp32 network by output: 5.26852
-
-        // 3 channels
-        threshold = 0.2f;  // Max in fp32 network by output: 4.90418
-
-        // STAGE3:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Elt_sum"] = "ndef";
-    }
-};
-
-TEST_P(ConvAdd, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvAdd,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({1, 3, 38, 38})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvAdd::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvAdd,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::Values(SizeVector({1, 3, 38, 38})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvAdd::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_conv.cpp
@ -1,132 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ConvConv : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //     ScaleShift (FP32)
-        //          |
-        //        Conv (BF16)
-        //          |
-        //        Conv (BF16)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ngraph::element::f32, ngraph::Shape{inputShapes});
-        auto const1 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 2.0f });
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        auto const2 = opset1::Constant::create(ngraph::element::f32, Shape{1}, { 1.0f });
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-
-        auto channelsCount = inputShapes[1];
-
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // Convolution
-        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        std::vector<float> weightValues2;
-        weightValues2.resize(channelsCount * channelsCount * 3 * 3);
-        FuncTestUtils::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
-        auto weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape2, weightValues2);
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            convNode1, weightsNode2,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 1.0f;  // Max in fp32 network by output CONV_2: 49.3427
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-    }
-};
-
-TEST_P(ConvConv, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvConv,
-                        ::testing::Combine(
-                        ::testing::Values(Precision::FP32),
-                        ::testing::Values(Precision::FP32),
-                        ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                        ::testing::Values(SizeVector()),
-                        ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvConv::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvConv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvConv::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_dwconv_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_dwconv_relu.cpp
@ -1,151 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <vector>
-#include <string>
-
-#include <ie_core.hpp>
-#include "functional_test_utils/blob_utils.hpp"
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ConvDWConvReLU : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //             scaleshift (FP32)
-        //                |
-        //               Conv (BF16)
-        //                |
-        //            Depthwise Conv (BF16, assuming explicit separte execution of kernel, not fused into prev convolution)
-        //                |
-        //               ReLU (Fused Info DW convolution)
-
-        // multiply
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        auto channelsCount = inputShapes[1];
-
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // DW convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 };
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValues2FP32;
-            weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
-        } else {
-            std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::GroupConvolution>(
-            convNode1, weightsNode2,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // ReLU
-        auto reluNode2 =  std::make_shared<opset1::Relu>(convNode2);
-        reluNode2->set_friendly_name("RELU");
-
-        return std::make_shared<ngraph::Function>(reluNode2, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 0.4f;  // maximum value in tensor is 54.89
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["RELU"] = "ndef";
-    }
-};
-
-TEST_P(ConvDWConvReLU, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvDWConvReLU,
-                            ::testing::Combine(
-                                    ::testing::Values(Precision::FP32),
-                                    ::testing::Values(Precision::FP32),
-                                    ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                    ::testing::Values(SizeVector()),
-                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvDWConvReLU::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvDWConvReLU,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvDWConvReLU::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_eltwise_depthwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_eltwise_depthwise.cpp
@ -1,276 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-#include "shared_test_classes/base/layer_test_utils.hpp"
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-typedef std::tuple< Precision, SizeVector, string, size_t, CoordinateDiff, string> convEltwiseDepthwiseTestParamsSet;
-
-class ConvEltwiseDepthwise :
-    public testing::WithParamInterface<convEltwiseDepthwiseTestParamsSet>, virtual public LayerTestsUtils::LayerTestsCommon {
-public:
-    std::shared_ptr<Function> fnPtr;
-    SizeVector inputShapes;
-    std::map<string, string> expectedPrecisions;
-    float threshold = 7e-2f;
-    Precision netPrecision;
-    size_t kernel;
-    CoordinateDiff pads;
-    std::string dnnlPrimitive;
-
-protected:
-    std::shared_ptr<Function> createGraph(InferenceEngine::Precision netPrecision) {
-        //            scaleshift (FP32)
-        //                |
-        //               Conv (BF16)
-        //                |
-        //              Relu (Eltwise Fused into Conv)
-        //                |
-        //            scaleshift (Depthwise Fused into Conv)
-
-        element::Type ntype = (netPrecision == Precision::FP32) ? element::f32 : element::bf16;
-        size_t chCnt = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, Shape{ inputShapes });
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{ 1 }, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-        mulNode->set_friendly_name("SS_1");
-
-        // add
-        std::shared_ptr<opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{ 1 }, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{ 1 }, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-
-        // convolution
-        std::shared_ptr<opset1::Constant> weightsNode = nullptr;
-        Shape convFilterShape = { chCnt, chCnt, kernel, kernel };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(chCnt * chCnt * kernel * kernel);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(chCnt * chCnt * kernel * kernel);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<Node> convNode1 = std::make_shared<opset1::Convolution>(
-            addNode, weightsNode, Strides({ 1, 1 }), pads, pads, Strides({ 1, 1 }), op::PadType::EXPLICIT);
-        convNode1->set_friendly_name("CONV");
-
-        // Eltwise, i.e. Relu
-        auto reluNode = std::make_shared<opset1::Relu>(convNode1);
-        reluNode->set_friendly_name("RELU");
-
-        // multiply
-        std::shared_ptr<opset1::Constant> const3 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 3.0f });
-        } else {
-            const3 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 },
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
-        }
-        auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
-        mulNode2->set_friendly_name("SS_2");
-
-        // add
-        std::shared_ptr<opset1::Constant> const4 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 }, { 2.0f });
-        } else {
-            const4 = opset1::Constant::create(ntype, Shape{ 1, chCnt, 1, 1 },
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
-
-        return std::make_shared<Function>(NodeVector{ addNode2 }, ParameterVector{ input1 });
-    }
-public:
-    static string getTestCaseName(testing::TestParamInfo<convEltwiseDepthwiseTestParamsSet> obj) {
-        Precision netPrecision;
-        SizeVector inputShapes;
-        string targetDevice;
-        size_t kernel;
-        CoordinateDiff pads;
-        string dnnlPrimitive;
-        std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, dnnlPrimitive) = obj.param;
-
-        std::ostringstream result;
-        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
-        result << "netPRC=" << netPrecision.name() << "_";
-        result << "dnnlPrimitive=" << dnnlPrimitive << "_";
-        result << "targetDevice=" << targetDevice;
-        return result.str();
-    }
-
-    void Run_test() {
-        if (!InferenceEngine::with_cpu_x86_bfloat16()) {
-            // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
-            // tests are useless on such platforms
-            return;
-        }
-        std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, dnnlPrimitive) = this->GetParam();
-        InferenceEngine::CNNNetwork cnnNet(fnPtr);
-
-        for (const auto& inputItem : cnnNet.getInputsInfo()) {
-            inputItem.second->setPrecision(Precision::FP32);
-        }
-
-        string inputName = cnnNet.getInputsInfo().begin()->first;
-        string outputName = cnnNet.getOutputsInfo().begin()->first;
-        auto ie = InferenceEngine::Core();
-        // BF16 inference
-        std::map<string, string> options;
-        if (netPrecision == InferenceEngine::Precision::FP32) {
-            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::YES;
-        } else {
-            options[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] = InferenceEngine::PluginConfigParams::NO;
-        }
-        options[InferenceEngine::PluginConfigParams::KEY_PERF_COUNT] = InferenceEngine::PluginConfigParams::YES;
-
-        auto exec_net1 = ie.LoadNetwork(cnnNet, targetDevice, options);
-        auto req1 = exec_net1.CreateInferRequest();
-
-        InferenceEngine::Blob::Ptr inBlob1 = req1.GetBlob(inputName);
-        FuncTestUtils::fillInputsBySinValues(inBlob1);
-
-        req1.Infer();
-        auto outBlobBF16 = req1.GetBlob(outputName);
-        InferenceEngine::MemoryBlob::CPtr mout1 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobBF16);
-        ASSERT_NE(mout1, nullptr);
-        auto lm1 = mout1->rmap();
-
-        // FP32 infrence
-        // if netPrecision is not eq to the FP32 - change network precision and recreate network
-        InferenceEngine::CNNNetwork cnnNetFP32(createGraph(InferenceEngine::Precision::FP32));
-        string inputNameFP32 = cnnNetFP32.getInputsInfo().begin()->first;
-        string outputNameFP32 = cnnNetFP32.getOutputsInfo().begin()->first;
-        for (const auto& inputItem : cnnNetFP32.getInputsInfo()) {
-            inputItem.second->setPrecision(Precision::FP32);
-        }
-        auto exec_net2 = ie.LoadNetwork(cnnNetFP32, targetDevice,
-            { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } });
-        auto req2 = exec_net2.CreateInferRequest();
-
-        req2.SetBlob(inputNameFP32, inBlob1);
-
-        req2.Infer();
-        auto outBlobFP32 = req2.GetBlob(outputNameFP32);
-        InferenceEngine::MemoryBlob::CPtr mout2 = InferenceEngine::as<InferenceEngine::MemoryBlob>(outBlobFP32);
-        ASSERT_NE(mout2, nullptr);
-        auto lm2 = mout2->rmap();
-
-        FuncTestUtils::compareRawBuffers(lm1.as<const float*>(), lm2.as<const float*>(), mout1->size(), mout2->size(),
-                                                         FuncTestUtils::CompareType::ABS_AND_REL,
-                                                         threshold, threshold);
-
-        // Stage2: verification of performance counters
-        const auto& perf_counts = req1.GetPerformanceCounts();
-        std::pair<string, string> wrongLayer =
-            BFloat16Helpers::matchPerfCountPrecisionVsExpected(perf_counts, expectedPrecisions);
-        if (wrongLayer.first != string("")) {
-            string layerInPerfCounts = wrongLayer.first + " " + wrongLayer.second;
-            string layerExpected = wrongLayer.first + " " + expectedPrecisions[wrongLayer.first];
-            ASSERT_EQ(layerInPerfCounts, layerExpected);
-        }
-        // onednn enabled brgemm kernel, the kernel name changed to:
-        // brgconv_avx512_(1x1)_bf16  isa: AVX512
-        // brgconv/jit_avx512_amx_(1x1)_bf16 isa: AMX
-        // check the avx512 only
-        if (perf_counts.count("CONV")) {
-            const std::string exec_type = perf_counts.at("CONV").exec_type;
-            if (exec_type.find("avx512") == std::string::npos) {
-                EXPECT_TRUE(false) << "CONV expected select AVX512 but actual:" << exec_type;
-            }
-        } else {
-            EXPECT_TRUE(false) << "CONV NOT_FOUND_IN_PERF_COUNTS";
-        }
-        fnPtr.reset();
-    }
-
-    void SetUp() override {
-        std::vector<size_t> inputShape;
-        std::tie(netPrecision, inputShapes, targetDevice, kernel, pads, dnnlPrimitive) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        expectedPrecisions["SS_1"] = "FP32";
-        expectedPrecisions["RELU"] = "ndef";
-        expectedPrecisions["SS_2"] = "ndef";
-    }
-};
-
-TEST_P(ConvEltwiseDepthwise, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    Run_test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_1x1_depthwise_BF16, ConvEltwiseDepthwise,
-    ::testing::Combine(
-        ::testing::Values(Precision::FP32),
-        // If input is 1,5,1,1 it will be same with the postops shape(1,5,1,1)
-        // The new enabled binary postops will think the shapes are the same and sets the
-        //  broadcast strategy 'no broadcast'. The postops layout will be nchw, the conv
-        //  output layout will be nhwc or nChw16c, both are not same with the postops layout.
-        // Change the input size to be different with the postops'.
-        ::testing::Values(SizeVector({ 1, 5, 2, 1 })),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
-        ::testing::Values(size_t(1)),
-        ::testing::Values(CoordinateDiff({ 0, 0 })),
-        ::testing::Values(std::string("jit_avx512_1x1_BF16"))),
-    ConvEltwiseDepthwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_gemm_depthwise_BF16, ConvEltwiseDepthwise,
-    ::testing::Combine(
-        ::testing::Values(Precision::FP32),
-        ::testing::Values(SizeVector({ 1, 3, 10, 10 })),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
-        ::testing::Values(size_t(3)),
-        ::testing::Values(CoordinateDiff({ 1, 1 })),
-        ::testing::Values(std::string("jit_avx512_BF16"))),
-    ConvEltwiseDepthwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_conv_depthwise_BF16, ConvEltwiseDepthwise,
-    ::testing::Combine(
-        ::testing::Values(Precision::FP32),
-        ::testing::Values(SizeVector({ 1, 5, 10, 10 })),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
-        ::testing::Values(size_t(3)),
-        ::testing::Values(CoordinateDiff({ 0, 0 })),
-        ::testing::Values(std::string("jit_avx512_BF16"))),
-    ConvEltwiseDepthwise::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/conv_relu_pool_conv_relu_pool.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/conv_relu_pool_conv_relu_pool.cpp
@ -1,200 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ConvReLUPoolConvReLUPool : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //    Convolution1  (FP32)
-        //        |
-        //       ReLU1      (Fused)
-        //        |
-        //     Pooling1     (BF16)
-        //        |
-        //    Convolution2  (BF16)
-        //        |
-        //       ReLU2      (Fused)
-        //        |
-        //     Pooling2     (BF16)
-        //        |
-        //    Convolution3  (BF16)
-
-
-        // STAGE1: construction of the GRAPH
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-
-        // convolution1
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
-            input1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode->set_friendly_name("Convolution_1");
-
-        // ReLU
-        auto reluNode = std::make_shared<opset1::Relu>(convNode);
-        reluNode->set_friendly_name("ReLU_1");
-
-        // Pooling
-        auto avgpoolNode = std::make_shared<opset1::AvgPool>(reluNode,
-                                                             Strides{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{2, 2},
-                                                             true,
-                                                             op::RoundingType::FLOOR);
-        avgpoolNode->set_friendly_name("AvgPool_1");
-
-        // convolution2
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            avgpoolNode, weightsNode2,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("Convolution_2");
-
-        // ReLU
-        auto reluNode2 = std::make_shared<opset1::Relu>(convNode2);
-        reluNode2->set_friendly_name("ReLU_2");
-
-        // Pooling
-        auto maxpoolNode2 = std::make_shared<opset1::MaxPool>(reluNode2,
-                                                             Strides{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{0, 0},
-                                                             Shape{2, 2},
-                                                             op::RoundingType::FLOOR);
-        maxpoolNode2->set_friendly_name("MaxPool_2");
-
-        // convolution3
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
-        ngraph::Shape convFilterShape3 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
-            maxpoolNode2, weightsNode3,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode3->set_friendly_name("Convolution_3");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        threshold = 0.2f;  // max value in the latest tensor for FP32 network is 9.8
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["ReLU_1"] = "ndef";
-        expectedPrecisions["AvgPool_1"] = "BF16";
-        expectedPrecisions["Convolution_2"] = "BF16";
-        expectedPrecisions["ReLU_2"] = "ndef";
-        expectedPrecisions["MaxPool_2"] = "BF16";
-        expectedPrecisions["Convolution_3"] = "BF16";
-    }
-};
-
-TEST_P(ConvReLUPoolConvReLUPool, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ConvReLUPoolConvReLUPool,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvReLUPoolConvReLUPool::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ConvReLUPoolConvReLUPool,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ConvReLUPoolConvReLUPool::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/elt_max.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/elt_max.cpp
@ -1,152 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Elt_max : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-//         Power (FP32)
-//              |
-//           Conv(BF16)  Const(FP32)
-//              |        /
-//        Eltwise(MAX)(FP32)
-//              |
-//            Conv(BF16)
-
-        // STAGE1: construction of the GRAPH
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-        const size_t conv0OutputChannels = 1;
-
-        // add
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> powerConst = nullptr;
-        if (netPrecision == Precision::FP32) {
-            powerConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            powerConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto powerNode0 = std::make_shared<opset1::Multiply>(input1, powerConst);
-        powerNode0->set_friendly_name("Power_0");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode0 = nullptr, weightsNode1 = nullptr;
-        ngraph::Shape convFilterShape0 = { conv0OutputChannels, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        ngraph::Shape convFilterShape1 = { 1, conv0OutputChannels, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32_0, weightValuesFP32_1;
-            weightValuesFP32_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
-            weightValuesFP32_1.resize(1 * conv0OutputChannels * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32_0.data(), weightValuesFP32_0.size());
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32_1.data(), weightValuesFP32_1.size());
-            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesFP32_0);
-            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesFP32_1);
-        } else {
-            std::vector<short> weightValuesBF16_0, weightValuesBF16_1;
-            weightValuesBF16_0.resize(conv0OutputChannels * channelsCount * 3 * 3);
-            weightValuesBF16_1.resize(1 * conv0OutputChannels * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16_0.data(), weightValuesBF16_0.size());
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16_1.data(), weightValuesBF16_1.size());
-            weightsNode0 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape0, weightValuesBF16_0.data());
-            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape1, weightValuesBF16_1.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode0 = std::make_shared<ngraph::opset1::Convolution>(
-                powerNode0, weightsNode0,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode0->set_friendly_name("Convolution_0");
-
-        // Eltwise, i.e. Max
-        std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
-        auto batchSize = inputShapes[0];
-        auto heightSize = inputShapes[2];
-        auto widthSize = inputShapes[3];
-        if (netPrecision == Precision::FP32) {
-            maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize}, { 2.0f });
-        } else {
-            maxConst = opset1::Constant::create(ntype, Shape{batchSize, conv0OutputChannels, heightSize, widthSize},
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        maxConst->set_friendly_name("Max_const");
-        auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0, maxConst);
-        eltMaxNode->set_friendly_name("Elt_max");
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-                eltMaxNode, weightsNode1,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution_1");
-
-        return std::make_shared<ngraph::Function>(convNode1, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
-        threshold = 0.2f;  // Max in fp32 network by output: 20.0761
-
-        // STAGE3:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Convolution_0"] = "BF16";
-        expectedPrecisions["Convolution_1"] = "BF16";
-    }
-};
-
-TEST_P(Elt_max, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Elt_max,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({1, 3, 40, 40})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Elt_max::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Elt_max,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::Values(SizeVector({1, 3, 40, 40})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Elt_max::getTestCaseName);
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/elt_x3.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/elt_x3.cpp
@ -1,211 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Elt_x3 : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        /*                       Power (FP32)
-         *                    /    |          \
-         *            Conv(BF16)   Conv(BF16)  Conv(BF16)
-         *                /        |          /
-         * ----------------------------------------------
-         *    Eltwise(MAX)(FP32)  Eltwise(Mul) (FP32)
-         *             |            |
-         *            Conv(BF16)   Conv(BF16)
-         *             \           /
-         *            Eltwise (SUM)(BF16)
-         *                |
-         *            Conv (BF16)
-         */
-
-        auto channelsCount = inputShapes[1];
-
-        // STAGE1: construction of the GRAPH
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // add
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
-        if (netPrecision == Precision::FP32) {
-            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
-        addNode0->set_friendly_name("Add_0");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode0_1 = nullptr, weightsNode0_2 = nullptr,
-        weightsNode0_3 = nullptr, weightsNode1 = nullptr,
-        weightsNode2 = nullptr, weightsNode3 = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-            weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-            weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode0_1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-            weightsNode0_2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-            weightsNode0_3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-            weightsNode1 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode0_1 = std::make_shared<ngraph::opset1::Convolution>(
-                addNode0, weightsNode0_1,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode0_1->set_friendly_name("Convolution_0_1");
-
-        std::shared_ptr<ngraph::Node> convNode0_2 = std::make_shared<ngraph::opset1::Convolution>(
-                addNode0, weightsNode0_2,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode0_2->set_friendly_name("Convolution_0_2");
-
-        std::shared_ptr<ngraph::Node> convNode0_3 = std::make_shared<ngraph::opset1::Convolution>(
-                addNode0, weightsNode0_3,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode0_3->set_friendly_name("Convolution_0_3");
-
-        // Eltwise, i.e. Mul
-        auto eltMulNode = std::make_shared<opset1::Multiply>(convNode0_1, convNode0_2);
-        eltMulNode->set_friendly_name("Elt_mul");
-
-        // Eltwise, i.e. Max
-        std::shared_ptr<ngraph::opset1::Constant> maxConst = nullptr;
-        if (netPrecision == Precision::FP32) {
-            maxConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
-        } else {
-            maxConst = opset1::Constant::create(ntype, Shape{inputShapes},
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto eltMaxNode = std::make_shared<opset1::Maximum>(convNode0_3, maxConst);
-        eltMaxNode->set_friendly_name("Elt_max");
-
-        // convolution
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-                eltMulNode, weightsNode1,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution_1");
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-                eltMaxNode, weightsNode2,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("Convolution_2");
-
-        // eltwise, i.e. sum
-        auto eltSumNode = std::make_shared<opset1::Add>(convNode1, convNode2);
-        eltSumNode->set_friendly_name("Elt_sum");
-
-        // convolution
-        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
-                eltSumNode, weightsNode3,
-                ngraph::Strides({ 1, 1 }),   // strides
-                ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-                ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-                ngraph::Strides({ 1, 1 }),        // dilation
-                ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode3->set_friendly_name("Convolution_3");
-
-        return std::make_shared<ngraph::Function>(convNode3, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
-
-        // 256 channels, 38 x 38 size
-        // threshold = 0.6f;  // Max in fp32 network by output: 12.0983
-
-        // 3 channels, 4 x 4 size
-        threshold = 30.6f;  // Max in fp32 network by output: 879.077
-
-        // STAGE3:
-        // filling of expected precision of layer execution defined by precision of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["Convolution_2"] = "BF16";
-        expectedPrecisions["Convolution_3"] = "BF16";
-        expectedPrecisions["Elt_sum"] = "ndef";
-    }
-};
-
-TEST_P(Elt_x3, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Elt_x3,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({1, 3, 4, 4})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Elt_x3::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Elt_x3,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::Values(SizeVector({1, 3, 4, 4})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Elt_x3::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/faster_100_5_1_1_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/faster_100_5_1_1_conv.cpp
@ -1,139 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Faster100_5_1_1_Conv : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //                     Power (FP32)
-        //                       |
-        //                     Convolution (BF16)
-
-        // STAGE1: constructin og the GRAPH
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Add_4");
-
-        // problematic convolution: 100x5x1x1
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 1, 1 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValues;
-            weightValues.resize(channelsCount * channelsCount * 1 * 1, 0.f);
-            weightValues[0] = 1.0f;
-            weightValues[7] = 1.0f;
-            weightValues[11] = 1.0f;
-            weightValues[19] = 1.0f;
-            weightValues[23] = 1.0f;
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, convFilterShape, weightValues);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 1 * 1, FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(0.0f));
-            weightValuesBF16[0] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
-            weightValuesBF16[7] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
-            weightValuesBF16[11] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
-            weightValuesBF16[19] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
-            weightValuesBF16[23] = FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f);
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode->set_friendly_name("Convolution_6");
-
-
-        // ReLU
-        auto reluNode = std::make_shared<opset1::Relu>(convNode);
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_4"] = netPrecision.name();
-        expectedPrecisions["Convolution_6"] = "BF16";
-    }
-};
-
-TEST_P(Faster100_5_1_1_Conv, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_bfloat16_NoReshape, Faster100_5_1_1_Conv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 10, 5, 1, 1 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                            Faster100_5_1_1_Conv::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Faster100_5_1_1_Conv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 10, 5, 1, 1 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Faster100_5_1_1_Conv::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/gather_multiply.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/gather_multiply.cpp
@ -1,130 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Gather_multiply : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-//                   Add (FP32)
-//                    |
-//                  FC (BF16)
-//                   /
-// -------------------------------------------
-//                Gather(BF16)  Const
-//                 \           /
-//                   Mul(FP32)
-
-        // STAGE1: construction of the GRAPH
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto inputSize = inputShapes[1];
-
-        // add
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
-        if (netPrecision == Precision::FP32) {
-            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
-        addNode0->set_friendly_name("Add_1");
-
-        // matmul
-        std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
-        } else {
-            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
-                                                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
-        matmulNode->set_friendly_name("Matmul_0");
-
-        // gather
-        auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
-        std::vector<size_t> gatherArray;
-        for (size_t i = 0; i < inputSize; i++) {
-            gatherArray.push_back(i);
-        }
-        auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
-        auto gatherNode = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
-        gatherNode->set_friendly_name("Gather_1");
-
-        // multiply
-        std::shared_ptr<ngraph::opset1::Constant> mulConst = nullptr;
-        if (netPrecision == Precision::FP32) {
-            mulConst = opset1::Constant::create(ntype, Shape{inputShapes}, { 2.0f });
-        } else {
-            mulConst = opset1::Constant::create(ntype, Shape{inputShapes},
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(gatherNode, mulConst);
-        mulNode->set_friendly_name("Mul_1");
-
-        return std::make_shared<ngraph::Function>(mulNode, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
-        threshold = 0.4f;  // Max in fp32 network by output:  9.20144
-
-        // STAGE3:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-
-        expectedPrecisions["Matmul_0"] = "BF16";
-        expectedPrecisions["Mul_1"] = netPrecision.name(); // tail kept in FP32 precision
-    }
-};
-
-TEST_P(Gather_multiply, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Gather_multiply,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::Values(SizeVector({2048, 64})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Gather_multiply::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Gather_multiply,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({2048, 64})),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Gather_multiply::getTestCaseName);
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/gather_x2_add_mul_relu_concat_matmul.cpp
@ -1,158 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Gather_x2_add_mul_relu_concat_matmul : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-//                       Add (FP32)
-//                        |
-//                     FullyConnected (BF16)
-//                   /             |       \
-// -------------------------------------------
-//             Gather(FP32)  Gather(FP32)    Add (FP32)
-//                 \           /              /
-//                   Mul(FP32)     ReLU(FP32)
-//                     \        /
-//                       Concat(BF16)    Const
-//                           \     /
-//                           Matmul(BF16)
-
-        // STAGE1: construction of the GRAPH
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // add
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        auto inputSize = inputShapes[1];
-
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> addConst = nullptr;
-        if (netPrecision == Precision::FP32) {
-            addConst = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            addConst = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode0 = std::make_shared<opset1::Multiply>(input1, addConst);
-        addNode0->set_friendly_name("Add_1");
-
-        // matmul
-        std::shared_ptr<ngraph::opset1::Constant> matmulConst0 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize}, { 2.0f });
-        } else {
-            matmulConst0 = opset1::Constant::create(ntype, Shape{inputSize, inputSize},
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto matmulNode = std::make_shared<opset1::MatMul>(addNode0, matmulConst0);
-        matmulNode->set_friendly_name("Matmul_0");
-
-        // gather
-        std::vector<size_t> gatherArray;
-        for (size_t i = 0; i < inputSize; i++) {
-            gatherArray.push_back(i);
-        }
-        auto axesConst = opset1::Constant::create(ngraph::element::i64, Shape{1}, { 1 });
-        auto indexesConst = opset1::Constant::create(ngraph::element::i64, Shape{inputSize}, gatherArray);
-        auto gatherNode1 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
-        gatherNode1->set_friendly_name("Gather_1");
-
-        auto gatherNode2 = std::make_shared<opset1::Gather>(matmulNode, indexesConst, axesConst);
-        gatherNode2->set_friendly_name("Gather_2");
-
-        // multiply
-        auto mulNode = std::make_shared<opset1::Multiply>(gatherNode1, gatherNode2);
-        mulNode->set_friendly_name("Mul_1");
-
-        // add
-        auto addNode1 = std::make_shared<opset1::Multiply>(matmulNode, addConst);
-        addNode0->set_friendly_name("Add_1");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(addNode1);
-        reluNode->set_friendly_name("Relu_1");
-
-        // Concat
-        ngraph::NodeVector concInputNodes = {mulNode, reluNode};
-        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
-        concNode->set_friendly_name("Conc_1");
-
-        // matmul
-        std::shared_ptr<ngraph::opset1::Constant> matmulConst1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2}, { 2.0f });
-        } else {
-            matmulConst1 = opset1::Constant::create(ntype, Shape{inputSize * 2, inputSize * 2},
-                    { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto matmulNode1 = std::make_shared<opset1::MatMul>(concNode, matmulConst1);
-        matmulNode1->set_friendly_name("Matmul_1");
-
-        return std::make_shared<ngraph::Function>(matmulNode1, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE2: set up safe threshold <= 5% from maximum value of output tensor
-        threshold = 177.f;  // Max in fp32 network by output:  3887.11
-
-        // STAGE3:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Matmul_0"] = "BF16";
-        expectedPrecisions["Mul_1"] = "BF16";
-        expectedPrecisions["Add_1"] = netPrecision.name(); // FP32->BF16 in case of FP32 net, BF16->BF16 in case of BF16 net
-        expectedPrecisions["Relu_1"] = "ndef";
-        expectedPrecisions["Conc_1"] = "BF16";
-        expectedPrecisions["Matmul_1"] = "BF16";
-    }
-};
-
-TEST_P(Gather_x2_add_mul_relu_concat_matmul, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 2048, 64 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Gather_x2_add_mul_relu_concat_matmul,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::BF16),
-                                ::testing::Values(SizeVector({ 2048, 64 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Gather_x2_add_mul_relu_concat_matmul::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/memory_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/memory_conv.cpp
@ -1,115 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <string>
-#include <fstream>
-
-#include "shared_test_classes/base/layer_test_utils.hpp"
-#include "ie_system_conf.h"
-
-#include <ngraph/ngraph.hpp>
-
-namespace LayerTestsDefinitions {
-
-using InferenceEngine::Precision;
-using InferenceEngine::SizeVector;
-
-class MemoryConv : public testing::WithParamInterface<LayerTestsUtils::basicParams>,
-                   public LayerTestsUtils::LayerTestsCommon {
-public:
-    static std::string getTestCaseName(testing::TestParamInfo<LayerTestsUtils::basicParams> obj) {
-        Precision netPrecision;
-        SizeVector inputShapes, newInputShapes;
-        std::string targetDevice;
-        std::tie(netPrecision, inputShapes, targetDevice) = obj.param;
-
-        std::ostringstream result;
-        result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
-        result << "netPRC=" << netPrecision.name() << "_";
-        result << "targetDevice=" << targetDevice;
-        return result.str();
-    }
-
-protected:
-    void SetUp() override {
-        SizeVector ie_shape;
-        std::tie(inPrc, ie_shape, targetDevice) = this->GetParam();
-
-        using namespace ngraph;
-        using std::make_shared;
-
-        Shape shape = ie_shape;
-        size_t C = shape[1];
-        element::Type type = ngraph::element::f32;
-
-        auto input = make_shared<op::v0::Parameter>(type, shape);
-        auto mem_i = make_shared<op::v0::Constant>(type, shape, 0);
-        auto mem_r = make_shared<op::v3::ReadValue>(mem_i, "id");
-
-        auto mul = make_shared<op::v1::Multiply>(mem_r, input);
-        auto sig = make_shared<op::v0::Sigmoid>(mul);
-
-        auto fc1_w = make_shared<op::v0::Constant>(type, Shape{C, C}, 1);
-        auto fc1_b = make_shared<op::v0::Constant>(type, Shape{C}, 1);
-        auto fc1 = make_shared<op::v0::MatMul>(sig, fc1_w);
-        auto bias_1 =  make_shared<op::v1::Add>(fc1, fc1_b);
-
-        auto fc2_w = make_shared<op::v0::Constant>(type, Shape{C, C}, 1);
-        auto fc2_b = make_shared<op::v0::Constant>(type, Shape{C}, 1);
-        auto fc2 = make_shared<op::v0::MatMul>(bias_1, fc2_w);
-        auto bias_2 =  make_shared<op::v1::Add>(fc2, fc2_b);
-
-        auto mem_w = make_shared<op::v3::Assign>(bias_1, "id");
-
-        // WA. Limitation of ngraph. control_dependency are required.
-        mem_w->add_control_dependency(mem_r);
-        bias_2->add_control_dependency(mem_w);
-
-        function = std::make_shared<ngraph::Function>(
-                ngraph::NodeVector      {bias_2},
-                ngraph::ParameterVector {input},
-                "SimpleNet");
-    }
-};
-
-TEST_P(MemoryConv, CheckTypeConversion) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    if (!InferenceEngine::with_cpu_x86_bfloat16())
-        GTEST_SKIP();
-
-    auto ie = PluginCache::get().ie();
-    auto net = InferenceEngine::CNNNetwork(function);
-    auto exe_net = ie->LoadNetwork(net, "CPU");
-    auto inf_reg = exe_net.CreateInferRequest();
-
-    // check data type via exec graph
-    auto exec_graph = exe_net.GetExecGraphInfo();
-    auto exec_ops = exec_graph.getFunction()->get_ops();
-    std::shared_ptr<ngraph::Node> mem_r, mem_w;
-
-    for (auto &node : exec_ops) {
-        auto var = node->get_rt_info()["layerType"];
-        auto s_val = var.as<std::string>();
-        if (s_val == "MemoryOutput")
-            mem_w = node;
-        if (s_val == "MemoryInput")
-            mem_r = node;
-    }
-
-    ASSERT_NE(nullptr, mem_r);
-    ASSERT_EQ(ngraph::element::bf16, mem_r->output(0).get_element_type());
-
-    ASSERT_NE(nullptr, mem_w);
-    ASSERT_EQ(ngraph::element::bf16, mem_w->input(0).get_element_type());
-}
-
-INSTANTIATE_TEST_SUITE_P(smoke_CPU, MemoryConv,
-                        ::testing::Combine(
-                                ::testing::Values<Precision>(Precision::BF16, Precision::FP32),
-                                ::testing::Values(SizeVector{1, 200}),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        MemoryConv::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/mobilenet_ssd_with_branching.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/mobilenet_ssd_with_branching.cpp
@ -1,185 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <vector>
-#include <string>
-
-#include <ie_core.hpp>
-#include "functional_test_utils/blob_utils.hpp"
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class MobileNet_ssd_with_branching : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //                scaleshift
-        //                    |
-        //                   Conv1 (FP32)
-        //                  |           \
-        //               Conv2 (BF16)    \
-        //                |              |
-        //               relu(fused)     |
-        //                |          Normalize (not LRN)
-        //           Conv (DW)(BF16)     |
-        //                |              |
-        //               ReLU (Fused)    |
-        //                  \           /
-        //                    Concat
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // Conv1
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // Conv2
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            convNode1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(convNode2);
-        reluNode->set_friendly_name("RELU_2");
-
-        // DW convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { channelsCount, 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValues2FP32;
-            weightValues2FP32.resize(channelsCount * 1 * 1 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2FP32.data(), weightValues2FP32.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2FP32);
-        } else {
-            std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(channelsCount * 1 * 1 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> dwConvNode = std::make_shared<ngraph::opset1::GroupConvolution>(
-            reluNode, weightsNode2,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        dwConvNode->set_friendly_name("DW_CONV");
-
-        // ReLU
-        auto reluNode2 =  std::make_shared<opset1::Relu>(dwConvNode);
-        reluNode2->set_friendly_name("RELU_DW");
-
-        // normalize
-        const auto axes = make_shared<op::Constant>(element::i64, Shape{2}, vector<int64_t>{2});
-        float eps{1e-6f};
-        auto eps_mode = op::EpsMode::ADD;
-
-        auto normNode =  std::make_shared<opset1::NormalizeL2>(convNode1, axes, eps, eps_mode);
-        normNode->set_friendly_name("NORM_1");
-
-        // Concat
-        ngraph::NodeVector concInputNodes = { reluNode2, normNode };
-        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
-        concNode->set_friendly_name("CONC_1");
-
-        return std::make_shared<ngraph::Function>(concNode, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 0.85f;  // max value in latest tensor is 87.67
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = "FP32";
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["RELU_2"] = "ndef";
-        expectedPrecisions["DW_CONV"] = "BF16";
-        expectedPrecisions["RELU_DW"] = "ndef";
-    }
-};
-
-TEST_P(MobileNet_ssd_with_branching, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, MobileNet_ssd_with_branching,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        MobileNet_ssd_with_branching::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, MobileNet_ssd_with_branching,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        MobileNet_ssd_with_branching::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_conv.cpp
@ -1,157 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConvEltwiseConv : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //        scaleshift (FP32)     Conv (FP32)
-        //                   \          /
-        //              Eltwise (Fused into Conv)
-        //                |
-        //               Conv (BF16)
-
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        auto channelsCount = inputShapes[1];
-
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            input1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(addNode, convNode1);
-        eltNode->set_friendly_name("ELT_1");
-
-        // Convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValues2;
-            weightValues2.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
-        } else {
-            std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            eltNode, weightsNode2,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 1.0f;  // max value in the latest tensor for FP32 network is 37.77
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConvEltwiseConv, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseConv,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEltwiseConv::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseConv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEltwiseConv::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_relu_conv.cpp
@ -1,163 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConvEltwiseReluConv : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //        scaleshift (FP32)     Conv (FP32_
-        //             \          /
-        //               Eltwise (Fused into conv)
-        //                |
-        //               ReLU (Fused into conv)
-        //                |
-        //               Conv (BF16)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            input1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(addNode, convNode1);
-        eltNode->set_friendly_name("ELT_1");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(eltNode);
-        reluNode->set_friendly_name("RELU_1");
-
-        // Convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValues2;
-            weightValues2.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2.data(), weightValues2.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2);
-        } else {
-            std::vector<short> weightValues2BF16;
-            weightValues2BF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValues2BF16.data(), weightValues2BF16.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValues2BF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            reluNode, weightsNode2,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 1.0f;  // Max in fp32 network by output CONV_2: 30.1374
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-        expectedPrecisions["RELU_1"] = "ndef";
-        expectedPrecisions["ELT_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConvEltwiseReluConv, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseReluConv,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEltwiseReluConv::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseReluConv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEltwiseReluConv::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_eltwise_scaleshift.cpp
@ -1,155 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConvEltwiseScaleshift : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //                    scaleshift (FP32)
-        //                        |
-        //                       Conv (BF16)
-        //             \          /
-        //              Eltwise (Fused into Conv)
-        //                |
-        //            scaleshift (FP32)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(input1, convNode1);
-        eltNode->set_friendly_name("ELT_1");
-
-        auto reluNode =  std::make_shared<opset1::Relu>(eltNode);
-        reluNode->set_friendly_name("RELU_1");
-
-        // multiply
-        std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
-        } else {
-            const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
-        }
-        auto mulNode2 = std::make_shared<opset1::Multiply>(reluNode, const3);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
-        addNode2->set_friendly_name("ADD_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode2}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 0.4f;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConvEltwiseScaleshift, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEltwiseScaleshift,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEltwiseScaleshift::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEltwiseScaleshift,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEltwiseScaleshift::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_elu_conv.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_elu_conv.cpp
@ -1,143 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConvEluConv : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //              scaleshift (FP32)
-        //                 |
-        //                Conv (BF16)
-        //                |
-        //                Elu (FP32 for now, this must be fixed and it must be fused into Conv)
-        //                 |
-        //                Conv (BF16)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // Elu
-        auto eluNode =  std::make_shared<opset1::Elu>(convNode1, 2);
-        eluNode->set_friendly_name("ELU_1");
-
-        // Conv
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            eluNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode2}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 1;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-    }
-};
-
-TEST_P(ScaleshiftConvEluConv, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvEluConv,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEluConv::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvEluConv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvEluConv::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_relu.cpp
@ -1,130 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConvRelu : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //              scaleshift (FP32)
-        //                  |
-        //                Conv (BF16)
-        //                  |
-        //                relu (Fused into convolution)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(convNode1);
-        reluNode->set_friendly_name("RELU_1");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 1e-1f;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["RELU_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConvRelu, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConvRelu,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvRelu::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConvRelu,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConvRelu::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_concat_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_concat_relu.cpp
@ -1,148 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConv_x2_ConcatRelu : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //              scaleshift
-        //             /         \
-        //           Conv      Conv
-        //             \       /
-        //              concat
-        //                |
-        //               relu
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // Concat
-        ngraph::NodeVector concInputNodes = { convNode1, convNode2 };
-        auto concNode = std::make_shared<opset1::Concat>(concInputNodes, 1);
-        concNode->set_friendly_name("CONC_1");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(concNode);
-        reluNode->set_friendly_name("RELU_1");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{reluNode}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 10e-1;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-    }
-};
-
-TEST_P(ScaleshiftConv_x2_ConcatRelu, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_ConcatRelu,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_ConcatRelu::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_ConcatRelu,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_ConcatRelu::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_eltwise.cpp
@ -1,142 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConv_x2_Eltwise : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        /*              scaleshift (FP32)
-         *             /             \
-         *           Conv1 (BF16)     Conv1 (BF16)
-         *             \               /
-         *                eltwise (Fused into Conv1) produce FP32 output
-         */
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
-        eltNode->set_friendly_name("ELT_1");
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 2e-1f;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConv_x2_Eltwise, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_Eltwise,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_Eltwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_Eltwise::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed1_eltwise.cpp
@ -1,141 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConv_x2_mixed1_Eltwise : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //         scaleshift (FP32)
-        //             |               |
-        //      Conv1(BF16)       Conv2(FP32)
-        //             \       /
-        //            eltwise(Fused into Conv1)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            input1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
-        eltNode->set_friendly_name("ELT_1");
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 2e-1f;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["ADD_1"] = netPrecision.name();
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["CONV_2"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConv_x2_mixed1_Eltwise, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_mixed1_Eltwise,
-                            ::testing::Combine(
-                                    ::testing::Values(Precision::FP32),
-                                    ::testing::Values(Precision::FP32),
-                                    ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                    ::testing::Values(SizeVector()),
-                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_mixed1_Eltwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_mixed1_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_mixed1_Eltwise::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x2_mixed2_eltwise.cpp
@ -1,142 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConv_x2_mixed2_Eltwise : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //                   scaleshift (FP32)
-        //             |         |
-        //     Conv1 (FP32)      Conv2 (Bf16)
-        //             \       /
-        //             eltwise (Fused into Conv1)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            input1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("CONV_1");
-
-        // multiply
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("ADD_2");
-
-        // convolution
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("CONV_2");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
-        eltNode->set_friendly_name("ELT_1");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 2e-1f;
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["CONV_1"] = "BF16";
-        expectedPrecisions["ADD_2"] = netPrecision.name();
-        expectedPrecisions["CONV_2"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
-    }
-};
-
-TEST_P(ScaleshiftConv_x2_mixed2_Eltwise, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x2_mixed2_Eltwise,
-                            ::testing::Combine(
-                                    ::testing::Values(Precision::FP32),
-                                    ::testing::Values(Precision::FP32),
-                                    ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                    ::testing::Values(SizeVector()),
-                                    ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_mixed2_Eltwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x2_mixed2_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x2_mixed2_Eltwise::getTestCaseName);
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x3_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_conv_x3_eltwise.cpp
@ -1,175 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class ScaleshiftConv_x3_Eltwise : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //        scaleshift (FP32)
-        //
-        //        /        \
-        //
-        //       Conv1 (BF16)    Conv2 (BF16)
-        //
-        //        \        /
-        //
-        //        Eltwise (Fused to Conv1)
-        //
-        //          |
-        //
-        //         Conv3 (BF16)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-        const size_t outChannels = 16;
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Add_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution_1");
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("Convolution_2");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(convNode1, convNode2);
-        eltNode->set_friendly_name("ELT_1");
-
-
-        // Convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode3 = nullptr;
-        ngraph::Shape convFilterShape3 = { outChannels, outChannels, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(outChannels * outChannels * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(outChannels * outChannels * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode3 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape3, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode3 = std::make_shared<ngraph::opset1::Convolution>(
-            eltNode, weightsNode3,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode3->set_friendly_name("Convolution_3");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{convNode3}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 2.0f;  // max value in the latest tensor for FP32 network is 93.3
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_1"] = netPrecision.name();
-        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["Convolution_2"] = "BF16";
-        expectedPrecisions["ELT_1"] = "ndef";
-        expectedPrecisions["Convolution_3"] = "BF16";
-    }
-};
-
-TEST_P(ScaleshiftConv_x3_Eltwise, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x3_Eltwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, ScaleshiftConv_x3_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        ScaleshiftConv_x3_Eltwise::getTestCaseName);
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x2_conv_x2_eltwise.cpp
@ -1,162 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Scaleshift_x2_Conv_x2_Eltwise : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //
-        //  scaleshift (FP32)    scaleshift (FP32)
-        //        \             /      \
-        //        Eltwise  (FP32)   Conv (BF16)
-        //          |                 |
-        //                          Conv (BF16)
-        //                            |
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Add_1");
-
-        // multiply
-        std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
-        } else {
-            const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
-        }
-        auto mulNode2 = std::make_shared<opset1::Multiply>(input1, const3);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
-        addNode2->set_friendly_name("Add_2");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(addNode, addNode2);
-        eltNode->set_friendly_name("ELT_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode2, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution_1");
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            convNode1, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("Convolution_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{eltNode, convNode2}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 1;
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_2"] = netPrecision.name();
-        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["ELT_1"] = netPrecision.name();
-    }
-};
-
-TEST_P(Scaleshift_x2_Conv_x2_Eltwise, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Scaleshift_x2_Conv_x2_Eltwise,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Scaleshift_x2_Conv_x2_Eltwise::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Scaleshift_x2_Conv_x2_Eltwise,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Scaleshift_x2_Conv_x2_Eltwise::getTestCaseName);
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/scaleshift_x3_conv_eltwise_relu.cpp
@ -1,184 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <functional>
-#include <map>
-#include <utility>
-
-#include <ie_core.hpp>
-
-#include "functional_test_utils/blob_utils.hpp"
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class Scaleshift_x3_ConvEltwiseRelu : public BasicBF16Test {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //
-        //   scaleshift (FP32)
-        //        |
-        //       Conv (BF16)   scaleshift (FP32
-        //
-        //         \         /
-        //
-        //        Eltwise (Fused to Conv)
-        //          |
-        //         ReLU   (Fused to Conv)
-        //           |
-        //       scaleshift  (FP32)
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Add_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { channelsCount, channelsCount, 3, 3 };  // out channel, input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(channelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode1 = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 1, 1 }),  // pad begin
-            ngraph::CoordinateDiff({ 1, 1 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode1->set_friendly_name("Convolution_1");
-
-        // multiply
-        std::shared_ptr<ngraph::opset1::Constant> const3 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const3 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
-        } else {
-            const3 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
-        }
-        auto mulNode2 = std::make_shared<opset1::Multiply>(input1, const3);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const4 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const4 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const4 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto addNode2 = std::make_shared<opset1::Add>(mulNode2, const4);
-        addNode2->set_friendly_name("Add_2");
-
-        // Eltwise, i.e. Add
-        auto eltNode = std::make_shared<opset1::Add>(convNode1, addNode2);
-        eltNode->set_friendly_name("ELT_1");
-
-        // ReLU
-        auto reluNode =  std::make_shared<opset1::Relu>(eltNode);
-        reluNode->set_friendly_name("RELU_1");
-
-        // multiply
-        std::shared_ptr<ngraph::opset1::Constant> const5 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const5 = opset1::Constant::create(ntype, Shape{1}, { 4.0f });
-        } else {
-            const5 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(4.0f)) });
-        }
-        auto mulNode3 = std::make_shared<opset1::Multiply>(reluNode, const5);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const6 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const6 = opset1::Constant::create(ntype, Shape{1}, { 3.0f });
-        } else {
-            const6 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(3.0f)) });
-        }
-        auto addNode3 = std::make_shared<opset1::Add>(mulNode3, const6);
-        addNode3->set_friendly_name("Add_3");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{addNode3}, ngraph::ParameterVector{input1});
-    }
-
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        // STAGE1:
-        threshold = 5e-1;
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_1"] = netPrecision.name();
-        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["Add_2"] = netPrecision.name();
-        expectedPrecisions["ELT_1"] = "ndef";
-        expectedPrecisions["RELU_1"] = "ndef";
-    }
-};
-
-TEST_P(Scaleshift_x3_ConvEltwiseRelu, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
-                        ::testing::Combine(
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(Precision::FP32),
-                                ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                                ::testing::Values(SizeVector()),
-                                ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Scaleshift_x3_ConvEltwiseRelu::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, Scaleshift_x3_ConvEltwiseRelu,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        Scaleshift_x3_ConvEltwiseRelu::getTestCaseName);
-
-
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/tail_fp32_optimization.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/tail_fp32_optimization.cpp
@ -1,145 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class PoolingAfterConv : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //    Scaleshift   (FP32)
-        //        |
-        //    Convolution  (BF16)
-        //        |
-        //       ReLU      (Fused)
-        //        |
-        //     Pooling     (FP32) <- this layer can be be executed in bf16 if it passes data to next bf16 layer
-        //                           in other case there should be tail optimization and return Pooling to FP32
-
-        // STAGE1: construction of the GRAPH
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-        const size_t outChannels = 16;
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Add_4");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { outChannels, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(outChannels * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(outChannels * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode->set_friendly_name("Convolution_6");
-
-        // ReLU
-        auto reluNode = std::make_shared<opset1::Relu>(convNode);
-
-        // Pooling
-        auto avgpoolNode = std::make_shared<opset1::AvgPool>(reluNode,
-                                                             Strides{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{1, 1},
-                                                             Shape{2, 2},
-                                                             true,
-                                                             op::RoundingType::FLOOR);
-        avgpoolNode->set_friendly_name("AvgPool_8");
-
-        return std::make_shared<ngraph::Function>(ngraph::NodeVector{avgpoolNode}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        threshold = 0.14f;  // max value in the latest tensor for FP32 network is 14.6448
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_4"] = netPrecision.name();
-        expectedPrecisions["Convolution_6"] = "BF16";
-    }
-};
-
-TEST_P(PoolingAfterConv, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, PoolingAfterConv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                            PoolingAfterConv::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, PoolingAfterConv,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        PoolingAfterConv::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/bfloat16/topk_inputs_i32.cpp
+++ b/src/plugins/intel_cpu/tests/functional/bfloat16/topk_inputs_i32.cpp
@ -1,167 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "bfloat16_helpers.hpp"
-
-#include <memory>
-#include <tuple>
-#include <vector>
-#include <string>
-#include <map>
-#include <functional>
-#include <utility>
-
-#include <ie_core.hpp>
-#include <ie_plugin_config.hpp>
-
-#include "common_test_utils/common_utils.hpp"
-
-#include "ngraph/opsets/opset1.hpp"
-
-using namespace std;
-using namespace ngraph;
-using namespace InferenceEngine;
-
-namespace LayerTestsDefinitions {
-
-class TopKInputsI32 : public BasicBF16Test  {
-protected:
-    std::shared_ptr<ngraph::Function> createGraph(InferenceEngine::Precision netPrecision) override {
-        //      Power   (FP32)
-        //        |
-        //      Convolution1 (BF16)       Const (I32)
-        //               |                |
-        //               \                /
-        //                  TopK (FP32)
-        //              (BF16)/        \ (I32)
-        //                   |
-        //         Convolution 2
-
-        // STAGE1: construction of the GRAPH
-
-        ngraph::element::Type ntype = (netPrecision == Precision::FP32) ? ngraph::element::f32 : ngraph::element::bf16;
-        auto channelsCount = inputShapes[1];
-        const size_t intermediateChannelsCount = 16;
-
-        // multiply
-        auto input1 = std::make_shared<opset1::Parameter>(ntype, ngraph::Shape{inputShapes});
-        input1->set_friendly_name("Input_1");
-        std::shared_ptr<ngraph::opset1::Constant> const1 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { 2.0f });
-        } else {
-            const1 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(2.0f)) });
-        }
-        auto mulNode = std::make_shared<opset1::Multiply>(input1, const1);
-
-        // add
-        std::shared_ptr<ngraph::opset1::Constant> const2 = nullptr;
-        if (netPrecision == Precision::FP32) {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { 1.0f });
-        } else {
-            const2 = opset1::Constant::create(ntype, Shape{1}, { bfloat16::from_bits(FuncTestUtils::Bf16TestUtils::reducePrecisionBitwiseS(1.0f)) });
-        }
-        auto addNode = std::make_shared<opset1::Add>(mulNode, const2);
-        addNode->set_friendly_name("Add_4");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode = nullptr;
-        ngraph::Shape convFilterShape = { intermediateChannelsCount, channelsCount, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(intermediateChannelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(intermediateChannelsCount * channelsCount * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode = std::make_shared<ngraph::opset1::Convolution>(
-            addNode, weightsNode,
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode->set_friendly_name("Convolution_1");
-
-        // TopK
-        const auto k = make_shared<op::Constant>(element::i32, Shape{}, vector<int>{1});
-        size_t axis = 1;
-        ngraph::op::v1::TopK::Mode mode = ngraph::op::v1::TopK::Mode::MAX;
-        ngraph::op::v1::TopK::SortType sort = ngraph::op::v1::TopK::SortType::NONE;
-        auto argmaxNode = std::make_shared<opset1::TopK>(convNode, k, axis, mode, sort);
-        argmaxNode->set_friendly_name("TopK_1");
-
-        // convolution
-        std::shared_ptr<ngraph::opset1::Constant> weightsNode2 = nullptr;
-        ngraph::Shape convFilterShape2 = { 1, 1, 3, 3 };  // out channel, /input channels, kernel h, kernel w
-        if (netPrecision == Precision::FP32) {
-            std::vector<float> weightValuesFP32;
-            weightValuesFP32.resize(1 * 1 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesFP32.data(), weightValuesFP32.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesFP32);
-        } else {
-            std::vector<short> weightValuesBF16;
-            weightValuesBF16.resize(1 * 1 * 3 * 3);
-            FuncTestUtils::fillInputsBySinValues(weightValuesBF16.data(), weightValuesBF16.size());
-            weightsNode2 = std::make_shared<ngraph::opset1::Constant>(ntype, convFilterShape2, weightValuesBF16.data());
-        }
-
-        std::shared_ptr<ngraph::Node> convNode2 = std::make_shared<ngraph::opset1::Convolution>(
-            argmaxNode->output(0), weightsNode2->output(0),
-            ngraph::Strides({ 1, 1 }),   // strides
-            ngraph::CoordinateDiff({ 0, 0 }),  // pad begin
-            ngraph::CoordinateDiff({ 0, 0 }),   // pad end
-            ngraph::Strides({ 1, 1 }),        // dilation
-            ngraph::op::PadType::EXPLICIT);   // pad type
-        convNode2->set_friendly_name("Convolution_2");
-
-        return std::make_shared<ngraph::Function>(ngraph::OutputVector{convNode2->output(0), argmaxNode->output(1)}, ngraph::ParameterVector{input1});
-    }
-    void SetUp() override {
-        std::tie(inputPrecision, netPrecision, inputShapes, newInputShapes, targetDevice) = this->GetParam();
-        fnPtr = createGraph(netPrecision);
-
-        threshold = 0.5f;  // max value in the latest tensor for FP32 network is 22.6
-
-        // STAGE2:
-        // filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
-        // performance counters
-        expectedPrecisions["Add_4"] = netPrecision.name();
-        expectedPrecisions["Convolution_1"] = "BF16";
-        expectedPrecisions["Convolution_2"] = "BF16";
-        expectedPrecisions["TopK_1"] = netPrecision.name(); // tail kept in FP32 precision
-    }
-};
-
-TEST_P(TopKInputsI32, CompareWithRefImpl) {
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-
-    test();
-};
-
-
-INSTANTIATE_TEST_SUITE_P(smoke_FP32_bfloat16_NoReshape, TopKInputsI32,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        TopKInputsI32::getTestCaseName);
-
-INSTANTIATE_TEST_SUITE_P(smoke_BF16_bfloat16_NoReshape, TopKInputsI32,
-                        ::testing::Combine(
-                            ::testing::Values(Precision::FP32),
-                            ::testing::Values(Precision::BF16),
-                            ::testing::Values(SizeVector({ 1, 3, 40, 40 })),
-                            ::testing::Values(SizeVector()),
-                            ::testing::Values(CommonTestUtils::DEVICE_CPU)),
-                        TopKInputsI32::getTestCaseName);
-
-}  // namespace LayerTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/infer_request/set_io_blob_precision.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/infer_request/set_io_blob_precision.cpp
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/hetero_cpu_query_network.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/behavior/plugin/hetero_cpu_query_network.cpp
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/onnx/quantized_models_tests.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/onnx/quantized_models_tests.cpp