From a4519f0a2cae0f9efce280455e8fee3ce2cc1b91 Mon Sep 17 00:00:00 2001
From: Tomasz Adamowicz <tomasz.adamowicz@intel.com>
Date: Fri, 16 Jun 2023 13:30:59 +0200
Subject: [PATCH] 16 byte memory alignment and concat (#17712)

* use device specific alignment instead of ALIGN64 macro

* update for tests

* update after review
---
 .../intel_gna/src/backend/gna_limitations.cpp |   2 +-
 .../intel_gna/src/backend/gna_limitations.hpp |  12 +-
 .../intel_gna/src/common/graph_utils.hpp      |   2 +-
 .../intel_gna/src/layers/gna_split_layer.hpp  |  24 +-
 .../src/optimizer/gna_pass_manager.cpp        |  32 +-
 .../src/optimizer/gna_pass_manager.hpp        |   2 +-
 ...lit_convolution_with_large_buffer_size.cpp |   4 +-
 .../unit/gna_get_aligned_split_sizes.cpp      |  82 ++-
 .../transformations/gna_insert_copy_layer.cpp | 553 +++++++++++-------
 ...lit_convolution_with_large_buffer_size.cpp | 152 +++--
 .../transformations/gna_split_eltwise.cpp     |  45 +-
 11 files changed, 570 insertions(+), 340 deletions(-)
diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.cpp b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
index a1d86060aab..4a7848ed1dc 100644
--- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
@@ -661,7 +661,6 @@ constexpr uint32_t Limitations::kConvFiltersNumDivider;
 constexpr uint32_t Limitations::kConvFilterSizeDivider;
 constexpr uint32_t Limitations::kConvFilterMaxSize;
 constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
-constexpr uint32_t Limitations::kInputByteAlignment;
 constexpr uint32_t Limitations::kNoOfInputsDivisor;
 constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
 constexpr uint32_t Limitations::kAffineMaxBatchSize;
@@ -673,6 +672,7 @@ constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
 constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
 constexpr uint32_t Limitations::kBytesPerSplitElement;
 constexpr uint32_t Limitations::kBytesPerCropElement;
+constexpr uint32_t Limitations::kBytesPerConcatElement;
 constexpr uint32_t Limitations::kMemoryPageSize;
 
 thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.hpp b/src/plugins/intel_gna/src/backend/gna_limitations.hpp
index e4846d844f8..91cfc8cc55d 100644
--- a/src/plugins/intel_gna/src/backend/gna_limitations.hpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.hpp
@@ -248,6 +248,7 @@ public:
 
     bool use_only_16bit_convolution_weights() const;
     bool is_crop_affined_offset(size_t numberOfElements) const;
+    bool is_aligned(size_t addr) const;
     size_t get_memory_alignment() const;
     std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
 
@@ -260,7 +261,6 @@ public:
     constexpr static uint32_t kConvFilterSizeDivider = 8;
     constexpr static uint32_t kConvFilterMaxSize = 768;
     constexpr static uint32_t kConvEachKernelByteAlignment = 16;
-    constexpr static uint32_t kInputByteAlignment = 64;
     constexpr static uint32_t kNoOfInputsDivisor = 8;
     constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
     constexpr static uint32_t kAffineMaxBatchSize = 8;
@@ -274,10 +274,12 @@ public:
     // Currently split layer only supports 2 bytes in int16 and int8 mode.
     // In fp32 mode this is not necessary but is useful for testing
     constexpr static uint32_t kBytesPerSplitElement = 2;
-
     // Currently crop layer only supports 2 bytes in int16 and int8 mode.
     // In fp32 mode this is not necessary but is useful for testing
     constexpr static uint32_t kBytesPerCropElement = 2;
+    // currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull
+    // for testing
+    constexpr static uint32_t kBytesPerConcatElement = 2;
     constexpr static uint32_t kMemoryPageSize = 4096;
 
 private:
@@ -306,7 +308,11 @@ inline std::shared_ptr<Limitations> Limitations::get_instance() {
 
 inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
     const auto cropOffset = numberOfElements * kBytesPerCropElement;
-    return (ALIGN64(cropOffset) != cropOffset);
+    return !is_aligned(cropOffset);
+}
+
+inline bool Limitations::is_aligned(size_t addr) const {
+    return (addr == ALIGN(addr, get_memory_alignment()));
 }
 
 inline size_t Limitations::get_memory_alignment() const {
diff --git a/src/plugins/intel_gna/src/common/graph_utils.hpp b/src/plugins/intel_gna/src/common/graph_utils.hpp
index 62e4aad80fe..00353215c93 100644
--- a/src/plugins/intel_gna/src/common/graph_utils.hpp
+++ b/src/plugins/intel_gna/src/common/graph_utils.hpp
@@ -87,7 +87,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
             offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
         }
     }
-    return (offset == ALIGN64(offset));
+    return limitations::Limitations::get_instance()->is_aligned(offset);
 }
 
 inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
diff --git a/src/plugins/intel_gna/src/layers/gna_split_layer.hpp b/src/plugins/intel_gna/src/layers/gna_split_layer.hpp
index 33468fe8b32..1feb82d1a64 100644
--- a/src/plugins/intel_gna/src/layers/gna_split_layer.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_split_layer.hpp
@@ -47,12 +47,11 @@ public:
     std::vector<SplitConnectedLayerInfo> splitOutputLayers;
 };
 
-// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
-inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
-                                                  uint32_t maxSplitSize,
-                                                  uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
+// @brief Returns sizes of split outputs to split the input tensor into aligned parts that are not greater than the
+// specified split size or alignment, depending on which one is larger
+inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t splitSize, uint32_t alignment) {
     std::vector<uint32_t> splitSizes;
-    uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
+    uint32_t maxAlignedSplitSize = std::max(splitSize - splitSize % alignment, alignment);
     uint32_t usedSize = 0;
     while (usedSize < totalSize) {
         uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
@@ -73,22 +72,21 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
     IE_ASSERT(firstValuableDim != std::end(dims));
     auto splittedElementsSize = *firstValuableDim;
     auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
-    auto alignment = limitations::Limitations::kInputByteAlignment;
+    auto alignment = limitations::Limitations::get_instance()->get_memory_alignment();
 
-    // Split output size should be multiple by 64 to avoid align filters insertion,
-    // but we need to check if our input size to split exceeds 64; if not we can always
+    // Split output size should be multiple of device memory alignment to avoid align filters insertion,
+    // but we need to check if our input size to split exceeds alignment; if not we can always
     // split if the remaining size is aligned
-    if (splittedElementsSize <= alignment) {
+    auto split_size = limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize;
+
+    if (splittedElementsSize <= alignment || split_size < alignment) {
         if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
             alignment = 1;
         } else {
             return {splittedDimIx, splitSizes};
         }
     }
-    splitSizes =
-        GetAlignedSplitSizes(splittedElementsSize,
-                             limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
-                             alignment);
+    splitSizes = GetAlignedSplitSizes(splittedElementsSize, split_size, alignment);
     return {splittedDimIx, splitSizes};
 }
 
diff --git a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
index 91ed705286c..727f444a7f0 100644
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
@@ -1247,9 +1247,6 @@ void FlattenTrivialConcatPass::run() {
 void InsertConcatAligningFilterPass::run() {
     OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
-    // currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull
-    // for testing
-    const int bytesPerConcatElement = 2;
 
     int numOfFilterLayers = 0;
 
@@ -1273,7 +1270,7 @@ void InsertConcatAligningFilterPass::run() {
 
             auto concatInput = getLayerByIndex(input_idx);
             auto dims = concatInput->getDims();
-            auto outputSize = details::product(++dims.begin(), dims.end()) * bytesPerConcatElement;
+            auto outputSize = details::product(++dims.begin(), dims.end()) * Limitations::kBytesPerConcatElement;
 
             auto useAlignFilterIf = [&concatLayer, &getLayerByIndex](int concat_input_idx) {
                 if (concatLayer->insData.size() <= concat_input_idx)
@@ -1290,7 +1287,8 @@ void InsertConcatAligningFilterPass::run() {
             // correcting offset by copy layer insertion. This can be improved by collapsing copy and affine or diagonal
             // later-on if next concat inputs requires align filter - then current input also requires either copy or
             // align filter
-            if (ALIGN64(offset) != offset || (ALIGN64(outputSize) != outputSize && useAlignFilterIf(input_idx + 1))) {
+            if ((!Limitations::get_instance()->is_aligned(offset)) ||
+                ((!Limitations::get_instance()->is_aligned(outputSize)) && useAlignFilterIf(input_idx + 1))) {
                 auto prevLayer = getCreatorLayer(concatInput).lock();
                 // input layer parameters are copied not using GNA-primitives - so nothing to allign here.
                 if (!useAlignFilterIf(input_idx))
@@ -1310,13 +1308,17 @@ void InsertConcatAligningFilterPass::run() {
                 }
 
                 auto num_rows_in = dims[1];
-                size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(offset) - 64));
-                size_t num_rows_padded = (offset - aligned64_offset) / bytesPerConcatElement;
+                size_t aligned_offset =
+                    std::max(0,
+                             static_cast<int>(ALIGN(offset, Limitations::get_instance()->get_memory_alignment()) -
+                                              Limitations::get_instance()->get_memory_alignment()));
+                size_t num_rows_padded = (offset - aligned_offset) / Limitations::kBytesPerConcatElement;
                 size_t num_rows_out = num_rows_padded + num_rows_in;
 
                 // encodes offset to beginning of split layer input
                 size_t bytesOffset =
-                    (aligned64_offset / bytesPerConcatElement) * (quantized ? bytesPerConcatElement : 4);
+                    (aligned_offset / Limitations::kBytesPerConcatElement) *
+                    (quantized ? Limitations::kBytesPerConcatElement : Precision(Precision::FP32).size());
                 concatAligningFilter->params["output_offset"] = std::to_string(bytesOffset);
 
                 // for padded rows we cannot use copy layer - TBD how to implement
@@ -1496,7 +1498,7 @@ void InsertSplitAligningFilterPass::run() {
         for (auto&& splitOutput : l->outData) {
             auto outputSize = product(begin(splitOutput->getDims()), end(splitOutput->getDims()));
 
-            if ((currentOffset != ALIGN64(currentOffset)) || (padding != 0)) {
+            if ((!Limitations::get_instance()->is_aligned(currentOffset)) || (padding != 0)) {
                 // check that this split output actually connected to further layers
                 if (getInputTo(splitOutput).empty()) {
                     log::debug() << "Output port: " << splitOutIndex << " of " << l->name << " unconnected, skipping\n";
@@ -1507,7 +1509,7 @@ void InsertSplitAligningFilterPass::run() {
                             << " Convolution Filter doesn't support batch=" << splitOutput->getDims().front();
                     }
 
-                    // this split output not beginning from 64 bytes aligned boundary - need to correct by aligning
+                    // this split output not beginning from aligned bytes boundary - need to correct by aligning
                     // filter layer insert the filter
                     auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
 
@@ -1527,20 +1529,22 @@ void InsertSplitAligningFilterPass::run() {
 
                     auto inputData = splitOutput;
 
-                    size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
+                    size_t aligned_offset = std::max(
+                        0,
+                        static_cast<int>(ALIGN(currentOffset, Limitations::get_instance()->get_memory_alignment()) -
+                                         Limitations::get_instance()->get_memory_alignment()));
 
                     IE_ASSERT(filterLayer != nullptr);
 
                     // encodes offset to beginning of split layer input
-                    filterLayer->params["offset"] =
-                        std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
+                    filterLayer->params["offset"] = std::to_string(aligned_offset / Limitations::kBytesPerSplitElement);
                     auto dims = splitOutput->getTensorDesc().getDims();
                     if (dims.size() > 3) {
                         THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
                     }
 
                     const auto offsetOfUnalignment =
-                        (currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
+                        (currentOffset - aligned_offset) / Limitations::kBytesPerSplitElement;
                     // TODO consider to use a different number of filters do decrese the number of trailing zeros
                     // (additionalPaddingOfFilter)
                     const auto numberOfFilters = Limitations::kConvMinFiltersNum;
diff --git a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
index 993de719c44..b84ece077da 100644
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.hpp
@@ -152,7 +152,7 @@ DECL_PASS(InsertSplitAligningFilter);
 DECL_PASS(FlattenTrivialConcat);
 
 /**
- * @brief concat-aligning filter layer insertion required in cases when concat inputs size are not 64-aligned
+ * @brief concat-aligning filter layer insertion required in cases when concat inputs size are not aligned
  */
 DECL_PASS(InsertConcatAligningFilter);
 
diff --git a/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp b/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp
index 64a26489232..6f29dae889f 100644
--- a/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp
+++ b/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp
@@ -64,7 +64,9 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
     auto& input = conv->get_input_shape(0);
     uint32_t width = input.back();
     uint32_t in_channels = input.at(1);
-    auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
+    auto split_sizes = GetAlignedSplitSizes(width,
+                                            Limitations::kBufferMaxSize / in_channels,
+                                            Limitations::get_instance()->get_memory_alignment());
     IE_ASSERT(split_sizes.size() > 1);
     std::vector<int64_t> split_sizes_casted(split_sizes.size());
     std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
diff --git a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp
index 4a7a0dce948..672d8666b5d 100644
--- a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp
@@ -7,9 +7,13 @@
 #include <vector>
 // to suppress deprecated definition errors
 #define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
+#include "common/gna_target.hpp"
 #include "layers/gna_split_layer.hpp"
 #include "ngraph/opsets/opset9.hpp"
 
+using namespace ov::intel_gna::limitations;
+using namespace ov::intel_gna::target;
+
 namespace {
 
 using GetAlignedSplitSizesData = std::tuple<uint32_t,              // total size
@@ -19,10 +23,15 @@ using GetAlignedSplitSizesData = std::tuple<uint32_t,              // total size
                                             >;
 
 const std::vector<GetAlignedSplitSizesData> data = {
+    GetAlignedSplitSizesData{10, 100, 64, std::vector<uint32_t>{10}},
     GetAlignedSplitSizesData{1024, 100, 64, std::vector<uint32_t>(16, 64)},
     GetAlignedSplitSizesData{151, 100, 64, std::vector<uint32_t>{64, 64, 23}},
     GetAlignedSplitSizesData{151, 65, 32, std::vector<uint32_t>{64, 64, 23}},
-    GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}}};
+    GetAlignedSplitSizesData{151, 33, 32, std::vector<uint32_t>{32, 32, 32, 32, 23}},
+    GetAlignedSplitSizesData{151, 17, 16, std::vector<uint32_t>{16, 16, 16, 16, 16, 16, 16, 16, 16, 7}},
+    GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}},
+    GetAlignedSplitSizesData{67000, 65528, 64, std::vector<uint32_t>{65472, 1528}},
+    GetAlignedSplitSizesData{67000, 65528, 16, std::vector<uint32_t>{65520, 1480}}};
 
 TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
     for (const auto& dataItem : data) {
@@ -38,55 +47,86 @@ using VariadicSplitParameters = std::tuple<ov::Shape,             // input size
                                            bool                   // supported
                                            >;
 
-const std::vector<VariadicSplitParameters> variadic_split_data = {
-    VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
-    VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
-    VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
-    VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false},
-};
-
-TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
+void RunVariadicSplitSupportedTest(DeviceVersion device_version, std::vector<VariadicSplitParameters> test_vectors) {
     ov::Shape input_shape;
     uint32_t axis;
     std::vector<int32_t> split_lengths;
     bool result;
-    for (const auto& item : variadic_split_data) {
+
+    Limitations::init(device_version);
+    for (const auto& item : test_vectors) {
         std::tie(input_shape, axis, split_lengths, result) = item;
+
         auto split = std::make_shared<ngraph::opset9::VariadicSplit>(
             std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
             ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}),
             ngraph::opset9::Constant::create(ngraph::element::i64,
                                              ngraph::Shape({split_lengths.size()}),
                                              split_lengths));
-        ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
+        ASSERT_TRUE(Limitations::is_split_supported(split, false) == result);
     }
 }
 
+TEST(CheckSplitSupported, CheckVariadicSplitSupported_GNA3_5) {
+    RunVariadicSplitSupportedTest(
+        DeviceVersion::GNA3_5,
+        {VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
+         VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
+         VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{16, 1008}, false},
+         VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
+         VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false}});
+}
+
+TEST(CheckSplitSupported, CheckVariadicSplitSupported_GNA3_6) {
+    RunVariadicSplitSupportedTest(
+        DeviceVersion::GNA3_6,
+        {VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
+         VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
+         VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{16, 1008}, true},
+         VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
+         VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false}});
+}
+
 using SplitParameters = std::tuple<ov::Shape,  // input size
                                    uint32_t,   // axis
                                    uint32_t,   // num_splits
                                    bool        // supported
                                    >;
 
-const std::vector<SplitParameters> split_data = {
-    SplitParameters{ov::Shape{1024}, 0, 4, true},
-    SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
-    SplitParameters{ov::Shape{1024}, 0, 64, false},
-    SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
-};
-
-TEST(CheckSplitSupported, CheckSplitSupported) {
+void RunSplitSupportedTest(DeviceVersion device_version, std::vector<SplitParameters> test_vectors) {
     ov::Shape input_shape;
     uint32_t axis;
     uint32_t num_splits;
     bool result;
-    for (const auto& item : split_data) {
+
+    Limitations::init(device_version);
+    for (const auto& item : test_vectors) {
         std::tie(input_shape, axis, num_splits, result) = item;
         auto split = std::make_shared<ngraph::opset9::Split>(
             std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
             ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
             num_splits);
-        ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
+        ASSERT_TRUE(Limitations::is_split_supported(split, false) == result);
     }
 }
+
+TEST(CheckSplitSupported, CheckSplitSupported_GNA3_5) {
+    RunSplitSupportedTest(DeviceVersion::GNA3_5,
+                          {
+                              SplitParameters{ov::Shape{1024}, 0, 4, true},
+                              SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
+                              SplitParameters{ov::Shape{1024}, 0, 64, false},
+                              SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
+                          });
+}
+
+TEST(CheckSplitSupported, CheckSplitSupported_GNA3_6) {
+    RunSplitSupportedTest(DeviceVersion::GNA3_6,
+                          {
+                              SplitParameters{ov::Shape{1024}, 0, 4, true},
+                              SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
+                              SplitParameters{ov::Shape{1024}, 0, 64, true},
+                              SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
+                          });
+}
 }  // namespace
diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp
index c315d7ac11d..f8f9dadbc1b 100644
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp
@@ -12,15 +12,20 @@
 #include <transformations/utils/utils.hpp>
 
 #include "backend/gna_limitations.hpp"
+#include "common/gna_target.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "ops/copy.hpp"
 #include "transformations/insert_copy_layer.hpp"
 
+using namespace ov::intel_gna::limitations;
+using namespace ov::intel_gna::target;
+
 namespace testing {
 
-typedef std::tuple<size_t,  // Concat axis
-                   size_t   // input number
+typedef std::tuple<DeviceVersion,  // Device version
+                   size_t,         // Concat axis
+                   size_t          // input number
                    >
     InsertCopyTestParams;
 
@@ -28,10 +33,12 @@ class InsertCopyLayerTest : public CommonTestUtils::TestsCommon,
                             public ::testing::WithParamInterface<InsertCopyTestParams> {
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<InsertCopyTestParams>& obj) {
+        DeviceVersion device_ver;
         size_t axis, inputs_num;
-        std::tie(axis, inputs_num) = obj.param;
+        std::tie(device_ver, axis, inputs_num) = obj.param;
 
         std::ostringstream result;
+        result << DeviceToString(device_ver) << "_";
         result << "inputsNum=" << inputs_num << "_";
         result << "axis=" << axis;
 
@@ -43,6 +50,7 @@ public:
 
 public:
     std::shared_ptr<ngraph::Function> m_func, m_ref_func;
+    DeviceVersion m_device_ver;
     size_t m_axis, m_inputs_num;
 };
 
@@ -54,8 +62,8 @@ void InsertCopyLayerTest::Validate() {
 }
 
 void InsertCopyLayerTest::SetUp() {
-    std::tie(m_axis, m_inputs_num) = this->GetParam();
-    ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
+    std::tie(m_device_ver, m_axis, m_inputs_num) = this->GetParam();
+    Limitations::init(m_device_ver);
 }
 
 void InsertCopyLayerTest::Run() {
@@ -153,8 +161,12 @@ public:
             auto split = ngraph::builder::makeSplit(params, ngraph::element::i64, m_inputs_num, m_axis);
 
             ngraph::OutputVector concat_inputs;
+            int copy_layer_interval =
+                (Limitations::get_instance()->get_memory_alignment() / Limitations::kBytesPerSplitElement) *
+                m_inputs_num / input_shape[0];
+
             for (int i = 0; i < m_inputs_num; ++i) {
-                if (m_inputs_num == 1 || (i % (m_inputs_num / 8) == 0))
+                if (m_inputs_num == 1 || (i % copy_layer_interval == 0))
                     concat_inputs.push_back(std::make_shared<ov::intel_gna::op::Copy>(split->output(i)));
                 else
                     concat_inputs.push_back(split->output(i));
@@ -177,10 +189,50 @@ public:
     }
 };
 
-void RunPasses(ngraph::pass::Manager& m, std::shared_ptr<ov::Model> func) {
-    ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
-    m.run_passes(func);
-}
+class TransformationTestsBase : public CommonTestUtils::TestsCommon,
+                                public ::testing::WithParamInterface<std::tuple<DeviceVersion>> {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<std::tuple<DeviceVersion>>& obj) {
+        DeviceVersion device_ver;
+        std::tie(device_ver) = obj.param;
+
+        std::ostringstream result;
+        result << DeviceToString(device_ver);
+
+        return result.str();
+    }
+
+    void SetUp() override {
+        std::tie(m_device_ver) = this->GetParam();
+        Limitations::init(m_device_ver);
+    }
+
+    void TearDown() override {
+        m_func.reset();
+    }
+
+    void RunPasses(ngraph::pass::Manager& m) {
+        m.run_passes(m_func);
+    }
+
+    void Validate(const std::shared_ptr<ngraph::Function>& f_ref) {
+        ASSERT_NO_THROW(check_rt_info(m_func));
+        auto result1 = compare_functions(m_func, f_ref);
+        ASSERT_TRUE(result1.first);
+    }
+
+    void Validate(const std::shared_ptr<ngraph::Function>& f_ref1, const std::shared_ptr<ngraph::Function>& f_ref2) {
+        ASSERT_NO_THROW(check_rt_info(m_func));
+
+        auto result1 = compare_functions(m_func, f_ref1);
+        auto result2 = compare_functions(m_func, f_ref2);
+        ASSERT_TRUE(result1.first || result2.first);
+    }
+
+public:
+    DeviceVersion m_device_ver;
+    std::shared_ptr<ngraph::Function> m_func;
+};
 
 //      [Parameter]            [Parameter]
 //        \     /       =>         |
@@ -189,8 +241,9 @@ void RunPasses(ngraph::pass::Manager& m, std::shared_ptr<ov::Model> func) {
 //        [Result]              [Concat]
 //                                  |
 //                               [Result]
-TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiParamConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiParamConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     size_t axis = 0;
     ngraph::Shape in_shape{10};
 
@@ -199,7 +252,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
         ngraph::OutputVector concat_inputs{params, params};
         auto concat = std::make_shared<ngraph::opset8::Concat>(concat_inputs, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
 
@@ -217,14 +270,16 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiParamConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 //      [Parameter]              [Parameter]
 //       /       \                /       \
 //    [Reshape][Reshape]      [Reshape][Reshape]
@@ -234,8 +289,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
 //         [Result]                [Concat]
 //                                     |
 //                                 [Result]
-TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiParamNFLConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiParamNFLConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     size_t axis = 0;
     ngraph::Shape shape = {1, 1, 2, 4};
     ngraph::Shape in_shape = {1, 2, 4};
@@ -248,7 +304,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
 
         auto concat = std::make_shared<ngraph::opset8::Concat>(concat_inputs, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
 
@@ -269,14 +325,16 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiParamNFLConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 //      [Parameter]                [Parameter]
 //       /       \                  /       \
 //    [Reshape][Reshape]        [Reshape][Reshape]
@@ -287,8 +345,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
 //      [Result] [Result]           [Concat] [Concat]
 //                                     |        |
 //                                  [Result] [Result]
-TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiParamMultiNFLConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiParamMultiNFLConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     size_t axis = 0;
     ngraph::Shape shape = {1, 1, 2, 4};
     ngraph::Shape in_shape = {1, 2, 4};
@@ -304,9 +363,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
         auto result1 = std::make_shared<ngraph::opset8::Result>(concat1);
         auto result2 = std::make_shared<ngraph::opset8::Result>(concat2);
         auto result3 = std::make_shared<ngraph::opset8::Result>(reshape1);
-        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2, result3},
-                                                  ngraph::ParameterVector{params},
-                                                  "Concat");
+        m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2, result3},
+                                                    ngraph::ParameterVector{params},
+                                                    "Concat");
     }
 
     {
@@ -330,14 +389,16 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiParamMultiNFLConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 //  [Parameter][Constant]  [Parameter][Constant]
 //      \      |      /       \       |       /
 //         [Concat]            \   [Copy]    /
@@ -345,8 +406,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
 //         [Result]               [Concat]
 //                                    |
 //                                 [Result]
-TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func1, ref_func2;
+using InsertCopyLayerMultiConstConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiConstConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func1, ref_func2;
     size_t axis = 0;
     ngraph::Shape in_shape{10};
 
@@ -357,7 +419,7 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
         ngraph::OutputVector concat_inputs{params, constant, constant};
         auto concat = std::make_shared<ngraph::opset8::Concat>(concat_inputs, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
 
@@ -388,15 +450,16 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result1 = compare_functions(func, ref_func1);
-    auto result2 = compare_functions(func, ref_func2);
-    ASSERT_TRUE(result1.first || result2.first);
+    RunPasses(m);
+    Validate(ref_func1, ref_func2);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiConstConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]     [Parameter]
 //   \    /          \    /
 //   [Add]            [Add]
@@ -406,8 +469,9 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
 //  [Result]           [Concat]
 //                        |
 //                     [Result]
-TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func1, ref_func2;
+using InsertCopyLayerMultiLayerConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiLayerConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func1, ref_func2;
     size_t axis = 0;
     ngraph::Shape in_shape{10};
 
@@ -417,7 +481,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
         ngraph::OutputVector concat_inputs{add, add};
         auto concat = std::make_shared<ngraph::opset8::Concat>(concat_inputs, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
 
@@ -448,18 +512,18 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
-
-    ASSERT_NO_THROW(check_rt_info(func));
 
+    RunPasses(m);
     // Transformation is based on outputs order and insert copy layer in one of the branches,
     // so this is right, that we have two different result graph based on output order.
-    auto result1 = compare_functions(func, ref_func1);
-    auto result2 = compare_functions(func, ref_func2);
-
-    ASSERT_TRUE(result1.first || result2.first);
+    Validate(ref_func1, ref_func1);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiLayerConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]     [Constant]     [Parameter]    [Constant]
 //     |    \          |             |    \         |
 //  [Assign] \    [ReadValue]     [Copy]  [Copy]  [ReadValue]
@@ -467,8 +531,9 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
 //             [Add]        => [Assign]        [Add]
 //                |                              |
 //            [Result]                        [Result]
-TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func1, ref_func2;
+using InsertCopyLayerMultiLayerNFLConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiLayerNFLConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func1, ref_func2;
     size_t axis = 0;
     ngraph::Shape shape = {1, 1, 2, 4};
     ngraph::Shape in_shape = {1, 2, 4};
@@ -481,7 +546,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
         ngraph::OutputVector concat_inputs{reshape1, reshape2};
         auto concat = std::make_shared<ngraph::opset8::Concat>(concat_inputs, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
 
@@ -516,18 +581,18 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
-
-    ASSERT_NO_THROW(check_rt_info(func));
 
+    RunPasses(m);
     // Transformation is based on outputs order and insert copy layer in one of the branches,
     // so this is right, that we have two different result graph based on output order.
-    auto result1 = compare_functions(func, ref_func1);
-    auto result2 = compare_functions(func, ref_func2);
-
-    ASSERT_TRUE(result1.first || result2.first);
+    Validate(ref_func1, ref_func2);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiLayerNFLConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]     [Constant]     [Parameter]    [Constant]
 //     |    \          |             |    \         |
 //  [Assign] \    [ReadValue]     [Copy]  [Copy]  [ReadValue]
@@ -535,8 +600,9 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
 //             [Add]        => [Assign]        [Add]
 //                |                              |
 //            [Result]                        [Result]
-TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiParamMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiParamMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape{10};
     const std::string variable_name("variable_id");
 
@@ -554,7 +620,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -579,14 +645,16 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiParamMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]     [Constant]     [Parameter]    [Constant]
 //     |    \          |             |    \         |
 //  [Assign] \    [ReadValue]     [Copy]  [Copy]  [ReadValue]
@@ -594,8 +662,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
 //            [Concat]        => [Assign]     [Concat]
 //                |                              |
 //            [Result]                        [Result]
-TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiParamConcatMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiParamConcatMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape{10};
     size_t axis = 0;
     const std::string variable_name("variable_id");
@@ -614,7 +683,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -639,14 +708,16 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiParamConcatMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 //   [Parameter]     [Constant]     [Parameter]    [Constant]
 //     /      \         |             /      \         |
 // [Reshape][Reshape][ReadValue] [Reshape][Reshape][ReadValue]
@@ -656,8 +727,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
 //              [Result]          [Assign]     [Concat]
 //                                                |
 //                                             [Result]
-TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiParamNFLConcatMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiParamNFLConcatMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape = {1, 2, 4};
     ngraph::Shape shape1 = {1, 1, 2, 4};
     ngraph::Shape shape2 = {2, 4};
@@ -682,7 +754,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -711,14 +783,16 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiParamNFLConcatMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]    [Constant]         [Parameter]    [Constant]
 //     |               |                 |               |
 // [Reshape]      [ReadValue]        [Reshape]      [ReadValue]
@@ -728,8 +802,9 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
 // [Assign]   [Mul]                    [Copy]    [Mul]
 //              |                        |        |
 //           [Result]                [Assign]  [Result]
-TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerMultiLayerConcatMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerMultiLayerConcatMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -755,7 +830,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -782,14 +857,16 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerMultiLayerConcatMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]    [Constant]         [Parameter]    [Constant]
 //     |               |                 |               |
 // [Reshape]      [ReadValue]        [Reshape]      [ReadValue]
@@ -801,8 +878,9 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
 // [Assign]   [Add]                   [Copy]    [Add]
 //              |                        |        |
 //           [Result]                [Assign] [Result]
-TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerCropMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerCropMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -829,7 +907,7 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -857,14 +935,16 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerCropMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter] [Constant]      [Parameter]    [Constant]
 //     |            |               |            |
 // [Reshape]  [ReadValue]        [Reshape]   [ReadValue]
@@ -876,8 +956,9 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
 // [Assign]  [Result]              [Сopy]  [Result]
 //                                    |
 //                                 [Assign]
-TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerCropNFLMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerCropNFLMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape{10};
     size_t axis = 0;
     const std::string variable_name("variable_id");
@@ -898,7 +979,7 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -924,14 +1005,16 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerCropNFLMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter1][Parameter2][Constant]  [Parameter1][Parameter2][Constant]
 //     |           /           |            |            /         |
 // [Reshape]      /        [ReadValue]   [Reshape]      /     [ReadValue]
@@ -942,8 +1025,9 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
 //                  [Add]                    |          [Add]
 //                    |                   [Assign]        |
 //                 [Result]                           [Result]
-TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerConcatMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerConcatMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape = {1, 2, 4};
     ngraph::Shape out_shape = {2, 2, 4};
     size_t axis = 0;
@@ -966,7 +1050,7 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
         ngraph::ParameterVector params = {input1, input2};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -993,14 +1077,16 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerConcatMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter1][Parameter2][Constant]  [Parameter1][Parameter2][Constant]
 //     |           /           |            |            /         |
 // [Reshape]      /        [ReadValue]   [Reshape]      /     [ReadValue]
@@ -1013,8 +1099,9 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
 //                 [Add]                       |        [Add]
 //                   |                      [Assign]      |
 //                [Result]                             [Result]
-TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerConcatNFLMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerConcatNFLMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape shape = {1, 2, 2, 4};
     ngraph::Shape in_shape = {1, 2, 4};
     size_t axis = 0;
@@ -1038,7 +1125,7 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
         ngraph::ParameterVector params = {input1, input2};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -1066,14 +1153,16 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerConcatNFLMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter] [Constant]      [Parameter] [Constant]
 //     |           |               |           |
 //  [Split]   [ReadValue]        [Split]   [ReadValue]
@@ -1081,8 +1170,9 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
 // [Assign][Concat]           [Сopy]   [Concat]
 //            |                 |         |
 //          [Result]         [Assign   [Result]
-TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerSplitMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerSplitMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape{10};
     ngraph::Shape out_shape{5};
     size_t axis = 0;
@@ -1103,7 +1193,7 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -1128,14 +1218,16 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerSplitMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter] [Constant]    [Parameter] [Constant]
 //    |            |              |           |
 // [Split]    [ReadValue]      [Split]   [ReadValue]
@@ -1145,8 +1237,9 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
 // [Assign] [Concat]          [Сopy]  [Concat]
 //             |                 |       |
 //          [Result]         [Assign   [Result]
-TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerSplitNFLMemoryTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerSplitNFLMemoryTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape in_shape{10};
     ngraph::Shape shape{1, 5};
     ngraph::Shape out_shape{5};
@@ -1169,7 +1262,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
         ngraph::ParameterVector params = {input};
         ngraph::ResultVector results = {result};
         ngraph::SinkVector sinks = {assign};
-        func = std::make_shared<ngraph::Function>(results, sinks, params);
+        m_func = std::make_shared<ngraph::Function>(results, sinks, params);
     }
 
     {
@@ -1195,14 +1288,16 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerSplitNFLMemoryTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]                [Parameter]
 //      |                          |
 //  [Reshape]                  [Reshape]
@@ -1214,8 +1309,9 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
 //       [Result]                   [Concat]
 //                                     |
 //                                  [Result]
-TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerCropConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerCropConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     size_t axis = 0;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
@@ -1231,7 +1327,7 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
         auto const_value = ngraph::builder::makeConstant(ngraph::element::i64, out_shape, std::vector<size_t>{1});
         auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{crop, const_value}, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
 
@@ -1250,14 +1346,16 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerCropConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]      [Parameter]
 //      |               |
 //  [Reshape]  =>     [Copy]
@@ -1265,8 +1363,9 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
 //   [Result]       [Reshape]
 //                      |
 //                   [Result]
-TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerNonfuncTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerNonfuncTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -1277,9 +1376,9 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
         auto params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::i64, in_shape);
         auto reshape = ov::op::util::reshapeTo(params, shape);
         auto result = std::make_shared<ngraph::opset8::Result>(reshape);
-        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
-                                                  ngraph::ParameterVector{params},
-                                                  "nonfunc");
+        m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
+                                                    ngraph::ParameterVector{params},
+                                                    "nonfunc");
     }
 
     {
@@ -1295,14 +1394,16 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerNonfuncTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 //    [Parameter]        [Parameter]
 //      /     \               |
 // [Reshape][Reshape] =>    [Copy]
@@ -1310,8 +1411,9 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
 //  [Result] [Result]  [Reshape][Reshape]
 //                        |         |
 //                     [Result] [Result]
-TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerNonfuncTwoSubgraphsTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerNonfuncTwoSubgraphsTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -1324,9 +1426,9 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
         auto reshape2 = ov::op::util::reshapeTo(params, shape);
         auto result1 = std::make_shared<ngraph::opset8::Result>(reshape1);
         auto result2 = std::make_shared<ngraph::opset8::Result>(reshape2);
-        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
-                                                  ngraph::ParameterVector{params},
-                                                  "nonfunc");
+        m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                    ngraph::ParameterVector{params},
+                                                    "nonfunc");
     }
 
     {
@@ -1344,14 +1446,16 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerNonfuncTwoSubgraphsTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 //   [Parameter]        [Parameter]
 //        |                  |
 //    [Reshape]           [Copy]
@@ -1359,8 +1463,9 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
 //  [Result] [Result]    [Reshape]
 //                        /      \
 //                     [Result] [Result]
-TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerNonfuncTwoResultsTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerNonfuncTwoResultsTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -1372,9 +1477,9 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
         auto reshape = ov::op::util::reshapeTo(params, shape);
         auto result1 = std::make_shared<ngraph::opset8::Result>(reshape);
         auto result2 = std::make_shared<ngraph::opset8::Result>(reshape);
-        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
-                                                  ngraph::ParameterVector{params},
-                                                  "nonfunc");
+        m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result1, result2},
+                                                    ngraph::ParameterVector{params},
+                                                    "nonfunc");
     }
 
     {
@@ -1391,14 +1496,16 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerNonfuncTwoResultsTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]        [Parameter]
 //     |                   |
 // [Reshape]            [Reshape]
@@ -1408,8 +1515,9 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
 //  [Result] [Result]   [Result] [Reshape]
 //                                  |
 //                               [Result]
-TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerNFLBranchTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerNFLBranchTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -1425,9 +1533,9 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
         auto relu = std::make_shared<ngraph::opset8::Relu>(reshape);
         auto result_relu = std::make_shared<ngraph::opset8::Result>(relu);
 
-        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result, result_relu},
-                                                  ngraph::ParameterVector{params},
-                                                  "nonfunc");
+        m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result, result_relu},
+                                                    ngraph::ParameterVector{params},
+                                                    "nonfunc");
     }
 
     {
@@ -1448,14 +1556,16 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerNFLBranchTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
+
 // [Parameter]        [Parameter]
 //     |                   |
 // [Reshape]            [Reshape]
@@ -1465,8 +1575,9 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
 // [Reshape] [Result]   [Reshape] [Reshape]
 //     |                   |          |
 //  [Result]            [Result]   [Result]
-TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerNFLvsFLSubgraphTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerNFLvsFLSubgraphTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     std::vector<int64_t> axes = {0, 1, 2, 3};
     std::vector<int64_t> dim = {1, 1, 2, 2};
     std::vector<int64_t> offset = {0, 0, 0, 0};
@@ -1482,9 +1593,9 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
         auto reshape2 = ov::op::util::reshapeTo(relu, shape);
         auto result_relu = std::make_shared<ngraph::opset8::Result>(reshape2);
 
-        func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result, result_relu},
-                                                  ngraph::ParameterVector{params},
-                                                  "nonfunc");
+        m_func = std::make_shared<ngraph::Function>(ngraph::ResultVector{result, result_relu},
+                                                    ngraph::ParameterVector{params},
+                                                    "nonfunc");
     }
 
     {
@@ -1505,13 +1616,14 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerNFLvsFLSubgraphTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
 
 // [Parameter]              [Parameter]
 //     |                         |
@@ -1524,8 +1636,9 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
 //    [Result]                   [Concat]
 //                                  |
 //                               [Result]
-TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
-    std::shared_ptr<ngraph::Function> func, ref_func;
+using InsertCopyLayerSplitNFLConcatTest = TransformationTestsBase;
+TEST_P(InsertCopyLayerSplitNFLConcatTest, CompareWithRefs) {
+    std::shared_ptr<ngraph::Function> ref_func;
     ngraph::Shape input_shape{1, 2, 4};
     ngraph::Shape shape{1, 1, 2, 4};
     size_t axis = 0;
@@ -1537,7 +1650,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
         auto const_value = ngraph::builder::makeConstant(ngraph::element::i64, shape, std::vector<size_t>{1});
         auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{reshape, const_value}, axis);
         auto result = std::make_shared<ngraph::opset8::Result>(concat);
-        func =
+        m_func =
             std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{params}, "Concat");
     }
     {
@@ -1556,16 +1669,15 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
     ngraph::pass::Manager m;
     m.register_pass<ov::pass::InitNodeInfo>();
     m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
-    RunPasses(m, func);
 
-    ASSERT_NO_THROW(check_rt_info(func));
-
-    auto result = compare_functions(func, ref_func);
-    ASSERT_TRUE(result.first);
+    RunPasses(m);
+    Validate(ref_func);
 }
 
-const size_t axis = 0;
-const std::vector<size_t> inputCounts = {1, 64, 128, 256};
+INSTANTIATE_TEST_SUITE_P(TransformationTests,
+                         InsertCopyLayerSplitNFLConcatTest,
+                         ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+                         TransformationTestsBase::getTestCaseName);
 
 TEST_P(InsertCopyLayerConcatTest, CompareWithRefs) {
     Run();
@@ -1575,14 +1687,25 @@ TEST_P(InsertCopyLayerSplitConcatTest, CompareWithRefs) {
     Run();
 }
 
+const size_t axis = 0;
+const std::vector<size_t> inputCounts = {1, 64, 128, 256};
+
 INSTANTIATE_TEST_SUITE_P(TransformationTests,
                          InsertCopyLayerConcatTest,
-                         ::testing::Combine(::testing::Values(axis), ::testing::ValuesIn(inputCounts)),
+                         ::testing::Combine(::testing::ValuesIn(std::vector<DeviceVersion>{DeviceVersion::GNA3_0,
+                                                                                           DeviceVersion::GNA3_5,
+                                                                                           DeviceVersion::GNA3_6}),
+                                            ::testing::Values(axis),
+                                            ::testing::ValuesIn(inputCounts)),
                          InsertCopyLayerTest::getTestCaseName);
 
 INSTANTIATE_TEST_SUITE_P(TransformationTests,
                          InsertCopyLayerSplitConcatTest,
-                         ::testing::Combine(::testing::Values(axis), ::testing::ValuesIn(inputCounts)),
+                         ::testing::Combine(::testing::ValuesIn(std::vector<DeviceVersion>{DeviceVersion::GNA3_0,
+                                                                                           DeviceVersion::GNA3_5,
+                                                                                           DeviceVersion::GNA3_6}),
+                                            ::testing::Values(axis),
+                                            ::testing::ValuesIn(inputCounts)),
                          InsertCopyLayerTest::getTestCaseName);
 
 }  // namespace testing
diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_split_convolution_with_large_buffer_size.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_split_convolution_with_large_buffer_size.cpp
index bbbccf4a8b2..8b468c9daf9 100644
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_split_convolution_with_large_buffer_size.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_split_convolution_with_large_buffer_size.cpp
@@ -9,9 +9,14 @@
 #include <ngraph/pass/manager.hpp>
 #include <transformations/init_node_info.hpp>
 
+#include "backend/gna_limitations.hpp"
+#include "common/gna_target.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "transformations/split_convolution_with_large_buffer_size.hpp"
 
+using namespace ov::intel_gna::limitations;
+using namespace ov::intel_gna::target;
+
 namespace testing {
 namespace {
 
@@ -126,43 +131,41 @@ ngraph::Output<ngraph::Node> CreateConvolution::createOutputNode(const ngraph::O
 }
 
 // should be used only after CreateBaseDecorator
+template <const ngraph::Shape& kernel_shape, const ngraph::Shape& split_shape>
 class CreateSplittedConvolution : public CreateGraphDecorator {
 public:
-    CreateSplittedConvolution(CreateGraphDecoratorPtr prev,
-                              const ngraph::Shape& kernel_shape = ngraph::Shape{1, 64, 1, 1},
-                              const ngraph::Shape& split_shape = ngraph::Shape{960, 960, 960, 960, 256})
+    CreateSplittedConvolution(CreateGraphDecoratorPtr prev)
         : CreateGraphDecorator(std::move(prev)),
           kernel_shape_(kernel_shape),
           split_shape_(split_shape) {}
 
 protected:
-    void updateGraph(Graph& graph) override;
+    void updateGraph(Graph& graph) override {
+        auto split_node_c1 =
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{3});
+        auto split_node_c2 =
+            ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_shape_.size()}), split_shape_);
+        auto split_node =
+            std::make_shared<ngraph::opset7::VariadicSplit>(graph.input_params, split_node_c1, split_node_c2);
+
+        auto kernel = ngraph::opset7::Constant::create(ngraph::element::f32, kernel_shape_, {1});
+
+        for (int i = 0; i < split_shape_.size(); ++i) {
+            auto convolution_operation = std::make_shared<ngraph::opset7::Convolution>(split_node->output(i),
+                                                                                       kernel,
+                                                                                       ngraph::Strides{1, 1},
+                                                                                       ngraph::CoordinateDiff{0, 0},
+                                                                                       ngraph::CoordinateDiff{0, 0},
+                                                                                       ngraph::Strides{1, 1});
+            graph.output_nodes.push_back(convolution_operation);
+        }
+    }
 
 private:
     const ngraph::Shape kernel_shape_;
     const ngraph::Shape split_shape_;
 };
 
-void CreateSplittedConvolution::updateGraph(Graph& graph) {
-    auto split_node_c1 =
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{3});
-    auto split_node_c2 =
-        ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_shape_.size()}), split_shape_);
-    auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(graph.input_params, split_node_c1, split_node_c2);
-
-    auto kernel = ngraph::opset7::Constant::create(ngraph::element::f32, kernel_shape_, {1});
-
-    for (int i = 0; i < split_shape_.size(); ++i) {
-        auto convolution_operation = std::make_shared<ngraph::opset7::Convolution>(split_node->output(i),
-                                                                                   kernel,
-                                                                                   ngraph::Strides{1, 1},
-                                                                                   ngraph::CoordinateDiff{0, 0},
-                                                                                   ngraph::CoordinateDiff{0, 0},
-                                                                                   ngraph::Strides{1, 1});
-        graph.output_nodes.push_back(convolution_operation);
-    }
-}
-
 class CreateAdd : public CreateAppendableGraphDecorator {
 public:
     CreateAdd(CreateGraphDecoratorPtr prev) : CreateAppendableGraphDecorator(std::move(prev)) {}
@@ -261,9 +264,10 @@ Graph createSolidGraph(const ngraph::Shape& input_shape, const ngraph::Shape& ke
 
 // -------------------------------------------------------------------------------------------------------
 
+using TestParams = std::tuple<Graph, Graph, ngraph::pass::Manager>;
+
 class SplitConvolutionFixture : public CommonTestUtils::TestsCommon,
-                                public ::testing::WithParamInterface<
-                                    std::tuple<Graph /* tranformed */, Graph /* reference */, ngraph::pass::Manager>> {
+                                public ::testing::WithParamInterface<std::tuple<DeviceVersion, TestParams>> {
 public:
     void SetUp() override;
 
@@ -274,10 +278,14 @@ public:
 
 void SplitConvolutionFixture::SetUp() {
     // TODO: use auto & [transformed_graph, reference_graph] = this->GetParam() when C++17
+    DeviceVersion device_version;
+    TestParams params;
     Graph transformed_graph;
     Graph reference_graph;
-    std::tie(transformed_graph, reference_graph, pass_manager) = this->GetParam();
+    std::tie(device_version, params) = this->GetParam();
+    std::tie(transformed_graph, reference_graph, pass_manager) = params;
 
+    Limitations::init(device_version);
     function = transformed_graph.createFunction();
     reference_function = reference_graph.createFunction();
 }
@@ -305,34 +313,70 @@ TEST_P(SplitConvolutionFixture, CompareFunctions) {
 }
 
 INSTANTIATE_TEST_SUITE_P(
-    SplitConvolutionTestSuite,
+    SplitConvolution_GNA3_0_3_5_3_6_TestSuite,
     SplitConvolutionFixture,
-    ::testing::Values(
-        std::make_tuple(createGraph<CreateConvolution>(),
-                        createGraph<CreateConcat, CreateSplittedConvolution>(),
-                        createPassManager<ov::intel_gna::pass::SplitConvolution>()),
-        std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
-                        createGraph<CreateConcat, CreateAdd, CreateSplittedConvolution>(),
-                        createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
-        std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
-                        createGraph<CreateConcat, CreateFakeQuantize, CreateSplittedConvolution>(),
-                        createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
-        std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
-                        createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplittedConvolution>(),
-                        createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
-        std::make_tuple(createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-                        createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-                        createPassManager<ov::intel_gna::pass::SplitConvolution>()),
-        std::make_tuple(createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-                        createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-                        createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
-        std::make_tuple(createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-                        createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-                        createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
-        std::make_tuple(
-            createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-            createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
-            createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>())));
+    ::testing::Combine(
+        ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
+        ::testing::Values(
+            std::make_tuple(createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                            createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                            createPassManager<ov::intel_gna::pass::SplitConvolution>()),
+            std::make_tuple(createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                            createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
+            std::make_tuple(createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                            createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
+            std::make_tuple(
+                createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
+                createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
+
+ngraph::Shape kernel_shape_3_5 = {1, 64, 1, 1};
+ngraph::Shape split_shape_3_5 = {960, 960, 960, 960, 256};
+using CreateSplitedConvolution3_5 = CreateSplittedConvolution<kernel_shape_3_5, split_shape_3_5>;
+
+INSTANTIATE_TEST_SUITE_P(
+    SplitConvolution_GNA3_0_3_5_TestSuite,
+    SplitConvolutionFixture,
+    ::testing::Combine(
+        ::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5),
+        ::testing::Values(
+            std::make_tuple(createGraph<CreateConvolution>(),
+                            createGraph<CreateConcat, CreateSplitedConvolution3_5>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolution>()),
+            std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
+                            createGraph<CreateConcat, CreateAdd, CreateSplitedConvolution3_5>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
+            std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
+                            createGraph<CreateConcat, CreateFakeQuantize, CreateSplitedConvolution3_5>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
+            std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
+                            createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplitedConvolution3_5>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
+
+ngraph::Shape kernel_shape_3_6 = {1, 64, 1, 1};
+ngraph::Shape split_shape_3_6 = {1008, 1008, 1008, 1008, 64};
+using CreateSplitedConvolution3_6 = CreateSplittedConvolution<kernel_shape_3_6, split_shape_3_6>;
+
+INSTANTIATE_TEST_SUITE_P(
+    SplitConvolution_GNA3_6_TestSuite,
+    SplitConvolutionFixture,
+    ::testing::Combine(
+        ::testing::Values(DeviceVersion::GNA3_6),
+        ::testing::Values(
+            std::make_tuple(createGraph<CreateConvolution>(),
+                            createGraph<CreateConcat, CreateSplitedConvolution3_6>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolution>()),
+            std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
+                            createGraph<CreateConcat, CreateAdd, CreateSplitedConvolution3_6>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
+            std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
+                            createGraph<CreateConcat, CreateFakeQuantize, CreateSplitedConvolution3_6>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
+            std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
+                            createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplitedConvolution3_6>(),
+                            createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
 
 }  // namespace
 }  // namespace testing
diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_split_eltwise.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_split_eltwise.cpp
index 9a0d2a91b1d..f53c3d2ebd2 100644
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_split_eltwise.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_split_eltwise.cpp
@@ -11,10 +11,15 @@
 #include <ngraph/pass/manager.hpp>
 #include <transformations/init_node_info.hpp>
 
+#include "backend/gna_limitations.hpp"
+#include "common/gna_target.hpp"
 #include "common_test_utils/common_utils.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "transformations/split_eltwise.hpp"
 
+using namespace ov::intel_gna::limitations;
+using namespace ov::intel_gna::target;
+
 namespace testing {
 namespace {
 
@@ -87,21 +92,24 @@ static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& inp
     }
 }
 
-typedef std::tuple<ngraph::Shape,
-                   bool,         // with const
-                   bool,         // with fq
-                   ELTWISE_TYPE  // eltwise type
+typedef std::tuple<DeviceVersion,  // device version
+                   ngraph::Shape,  // input shape
+                   bool,           // with const
+                   bool,           // with fq
+                   ELTWISE_TYPE    // eltwise type
                    >
     EltwiseSplitParams;
 
 static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitParams> obj) {
+    DeviceVersion device_ver;
     ngraph::Shape shape;
     bool with_const;
     bool with_fq;
     ELTWISE_TYPE type;
-    std::tie(shape, with_const, with_fq, type) = obj.param;
+    std::tie(device_ver, shape, with_const, with_fq, type) = obj.param;
 
     std::ostringstream result;
+    result << DeviceToString(device_ver) << "_";
     result << "IS=" << CommonTestUtils::vec2str(shape) << "_";
     result << "wConst=" << with_const << "_";
     result << "wFQ=" << with_fq << "_";
@@ -132,11 +140,13 @@ public:
 };
 
 void SplitEltwiseTestSuiteFixture::SetUp() {
+    DeviceVersion device_ver;
     ngraph::Shape shape;
     bool with_const;
     bool with_fq;
     ELTWISE_TYPE type;
-    std::tie(shape, with_const, with_fq, type) = this->GetParam();
+    std::tie(device_ver, shape, with_const, with_fq, type) = this->GetParam();
+    Limitations::init(device_ver);
     function = createFunction(shape, with_const, with_fq, type, false);
     reference_function = createFunction(shape, with_const, with_fq, type, true);
 }
@@ -158,16 +168,19 @@ TEST_P(SplitEltwiseTestSuiteFixture, CompareFunctions) {
 
 const std::vector<ov::Shape> inputShape = {{1, 67000}, {1, 500000}, {1, 936, 513}, {1, 64, 64, 64}, {1, 256, 64, 64}};
 
-INSTANTIATE_TEST_SUITE_P(SplitEltwiseTestSuite,
-                         SplitEltwiseTestSuiteFixture,
-                         ::testing::Combine(::testing::ValuesIn(inputShape),
-                                            ::testing::ValuesIn(std::vector<bool>{true, false}),  // with const
-                                            ::testing::ValuesIn(std::vector<bool>{true, false}),  // with fq
-                                            ::testing::ValuesIn(std::vector<ELTWISE_TYPE>{
-                                                ELTWISE_TYPE::Sum,
-                                                ELTWISE_TYPE::Sub,
-                                                ELTWISE_TYPE::Prod})),  // eltwise type
-                         getTestCaseName);
+INSTANTIATE_TEST_SUITE_P(
+    SplitEltwiseTestSuite,
+    SplitEltwiseTestSuiteFixture,
+    ::testing::Combine(::testing::ValuesIn(std::vector<DeviceVersion>{DeviceVersion::GNA3_0,  // device version
+                                                                      DeviceVersion::GNA3_5,
+                                                                      DeviceVersion::GNA3_6}),
+                       ::testing::ValuesIn(inputShape),
+                       ::testing::ValuesIn(std::vector<bool>{true, false}),  // with const
+                       ::testing::ValuesIn(std::vector<bool>{true, false}),  // with fq
+                       ::testing::ValuesIn(std::vector<ELTWISE_TYPE>{ELTWISE_TYPE::Sum,
+                                                                     ELTWISE_TYPE::Sub,
+                                                                     ELTWISE_TYPE::Prod})),  // eltwise type
+    getTestCaseName);
 
 }  // namespace
 }  // namespace testing