16 byte memory alignment and concat (#17712)

* use device specific alignment instead of ALIGN64 macro

* update for tests

* update after review
This commit is contained in:
Tomasz Adamowicz 2023-06-16 13:30:59 +02:00 committed by GitHub
parent 0b708b5eff
commit a4519f0a2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 570 additions and 340 deletions

View File

@ -661,7 +661,6 @@ constexpr uint32_t Limitations::kConvFiltersNumDivider;
constexpr uint32_t Limitations::kConvFilterSizeDivider;
constexpr uint32_t Limitations::kConvFilterMaxSize;
constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
constexpr uint32_t Limitations::kInputByteAlignment;
constexpr uint32_t Limitations::kNoOfInputsDivisor;
constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
constexpr uint32_t Limitations::kAffineMaxBatchSize;
@ -673,6 +672,7 @@ constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
constexpr uint32_t Limitations::kBytesPerSplitElement;
constexpr uint32_t Limitations::kBytesPerCropElement;
constexpr uint32_t Limitations::kBytesPerConcatElement;
constexpr uint32_t Limitations::kMemoryPageSize;
thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};

View File

@ -248,6 +248,7 @@ public:
bool use_only_16bit_convolution_weights() const;
bool is_crop_affined_offset(size_t numberOfElements) const;
bool is_aligned(size_t addr) const;
size_t get_memory_alignment() const;
std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
@ -260,7 +261,6 @@ public:
constexpr static uint32_t kConvFilterSizeDivider = 8;
constexpr static uint32_t kConvFilterMaxSize = 768;
constexpr static uint32_t kConvEachKernelByteAlignment = 16;
constexpr static uint32_t kInputByteAlignment = 64;
constexpr static uint32_t kNoOfInputsDivisor = 8;
constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
constexpr static uint32_t kAffineMaxBatchSize = 8;
@ -274,10 +274,12 @@ public:
// Currently split layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr static uint32_t kBytesPerSplitElement = 2;
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr static uint32_t kBytesPerCropElement = 2;
// currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull
// for testing
constexpr static uint32_t kBytesPerConcatElement = 2;
constexpr static uint32_t kMemoryPageSize = 4096;
private:
@ -306,7 +308,11 @@ inline std::shared_ptr<Limitations> Limitations::get_instance() {
inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
const auto cropOffset = numberOfElements * kBytesPerCropElement;
return (ALIGN64(cropOffset) != cropOffset);
return !is_aligned(cropOffset);
}
inline bool Limitations::is_aligned(size_t addr) const {
return (addr == ALIGN(addr, get_memory_alignment()));
}
inline size_t Limitations::get_memory_alignment() const {

View File

@ -87,7 +87,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
}
}
return (offset == ALIGN64(offset));
return limitations::Limitations::get_instance()->is_aligned(offset);
}
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {

View File

@ -47,12 +47,11 @@ public:
std::vector<SplitConnectedLayerInfo> splitOutputLayers;
};
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
uint32_t maxSplitSize,
uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
// @brief Returns sizes of split outputs to split the input tensor into aligned parts that are not greater than the
// specified split size or alignment, depending on which one is larger
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t splitSize, uint32_t alignment) {
std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
uint32_t maxAlignedSplitSize = std::max(splitSize - splitSize % alignment, alignment);
uint32_t usedSize = 0;
while (usedSize < totalSize) {
uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
@ -73,22 +72,21 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
IE_ASSERT(firstValuableDim != std::end(dims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
auto alignment = limitations::Limitations::kInputByteAlignment;
auto alignment = limitations::Limitations::get_instance()->get_memory_alignment();
// Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always
// Split output size should be multiple of device memory alignment to avoid align filters insertion,
// but we need to check if our input size to split exceeds alignment; if not we can always
// split if the remaining size is aligned
if (splittedElementsSize <= alignment) {
auto split_size = limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize;
if (splittedElementsSize <= alignment || split_size < alignment) {
if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
alignment = 1;
} else {
return {splittedDimIx, splitSizes};
}
}
splitSizes =
GetAlignedSplitSizes(splittedElementsSize,
limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
alignment);
splitSizes = GetAlignedSplitSizes(splittedElementsSize, split_size, alignment);
return {splittedDimIx, splitSizes};
}

View File

@ -1247,9 +1247,6 @@ void FlattenTrivialConcatPass::run() {
void InsertConcatAligningFilterPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
// currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull
// for testing
const int bytesPerConcatElement = 2;
int numOfFilterLayers = 0;
@ -1273,7 +1270,7 @@ void InsertConcatAligningFilterPass::run() {
auto concatInput = getLayerByIndex(input_idx);
auto dims = concatInput->getDims();
auto outputSize = details::product(++dims.begin(), dims.end()) * bytesPerConcatElement;
auto outputSize = details::product(++dims.begin(), dims.end()) * Limitations::kBytesPerConcatElement;
auto useAlignFilterIf = [&concatLayer, &getLayerByIndex](int concat_input_idx) {
if (concatLayer->insData.size() <= concat_input_idx)
@ -1290,7 +1287,8 @@ void InsertConcatAligningFilterPass::run() {
// correcting offset by copy layer insertion. This can be improved by collapsing copy and affine or diagonal
// later-on if next concat inputs requires align filter - then current input also requires either copy or
// align filter
if (ALIGN64(offset) != offset || (ALIGN64(outputSize) != outputSize && useAlignFilterIf(input_idx + 1))) {
if ((!Limitations::get_instance()->is_aligned(offset)) ||
((!Limitations::get_instance()->is_aligned(outputSize)) && useAlignFilterIf(input_idx + 1))) {
auto prevLayer = getCreatorLayer(concatInput).lock();
// input layer parameters are copied not using GNA-primitives - so nothing to allign here.
if (!useAlignFilterIf(input_idx))
@ -1310,13 +1308,17 @@ void InsertConcatAligningFilterPass::run() {
}
auto num_rows_in = dims[1];
size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(offset) - 64));
size_t num_rows_padded = (offset - aligned64_offset) / bytesPerConcatElement;
size_t aligned_offset =
std::max(0,
static_cast<int>(ALIGN(offset, Limitations::get_instance()->get_memory_alignment()) -
Limitations::get_instance()->get_memory_alignment()));
size_t num_rows_padded = (offset - aligned_offset) / Limitations::kBytesPerConcatElement;
size_t num_rows_out = num_rows_padded + num_rows_in;
// encodes offset to beginning of split layer input
size_t bytesOffset =
(aligned64_offset / bytesPerConcatElement) * (quantized ? bytesPerConcatElement : 4);
(aligned_offset / Limitations::kBytesPerConcatElement) *
(quantized ? Limitations::kBytesPerConcatElement : Precision(Precision::FP32).size());
concatAligningFilter->params["output_offset"] = std::to_string(bytesOffset);
// for padded rows we cannot use copy layer - TBD how to implement
@ -1496,7 +1498,7 @@ void InsertSplitAligningFilterPass::run() {
for (auto&& splitOutput : l->outData) {
auto outputSize = product(begin(splitOutput->getDims()), end(splitOutput->getDims()));
if ((currentOffset != ALIGN64(currentOffset)) || (padding != 0)) {
if ((!Limitations::get_instance()->is_aligned(currentOffset)) || (padding != 0)) {
// check that this split output actually connected to further layers
if (getInputTo(splitOutput).empty()) {
log::debug() << "Output port: " << splitOutIndex << " of " << l->name << " unconnected, skipping\n";
@ -1507,7 +1509,7 @@ void InsertSplitAligningFilterPass::run() {
<< " Convolution Filter doesn't support batch=" << splitOutput->getDims().front();
}
// this split output not beginning from 64 bytes aligned boundary - need to correct by aligning
// this split output not beginning from aligned bytes boundary - need to correct by aligning
// filter layer insert the filter
auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
@ -1527,20 +1529,22 @@ void InsertSplitAligningFilterPass::run() {
auto inputData = splitOutput;
size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
size_t aligned_offset = std::max(
0,
static_cast<int>(ALIGN(currentOffset, Limitations::get_instance()->get_memory_alignment()) -
Limitations::get_instance()->get_memory_alignment()));
IE_ASSERT(filterLayer != nullptr);
// encodes offset to beginning of split layer input
filterLayer->params["offset"] =
std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
filterLayer->params["offset"] = std::to_string(aligned_offset / Limitations::kBytesPerSplitElement);
auto dims = splitOutput->getTensorDesc().getDims();
if (dims.size() > 3) {
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
}
const auto offsetOfUnalignment =
(currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
(currentOffset - aligned_offset) / Limitations::kBytesPerSplitElement;
// TODO consider to use a different number of filters do decrese the number of trailing zeros
// (additionalPaddingOfFilter)
const auto numberOfFilters = Limitations::kConvMinFiltersNum;

View File

@ -152,7 +152,7 @@ DECL_PASS(InsertSplitAligningFilter);
DECL_PASS(FlattenTrivialConcat);
/**
* @brief concat-aligning filter layer insertion required in cases when concat inputs size are not 64-aligned
* @brief concat-aligning filter layer insertion required in cases when concat inputs size are not aligned
*/
DECL_PASS(InsertConcatAligningFilter);

View File

@ -64,7 +64,9 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
auto& input = conv->get_input_shape(0);
uint32_t width = input.back();
uint32_t in_channels = input.at(1);
auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
auto split_sizes = GetAlignedSplitSizes(width,
Limitations::kBufferMaxSize / in_channels,
Limitations::get_instance()->get_memory_alignment());
IE_ASSERT(split_sizes.size() > 1);
std::vector<int64_t> split_sizes_casted(split_sizes.size());
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {

View File

@ -7,9 +7,13 @@
#include <vector>
// to suppress deprecated definition errors
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
#include "common/gna_target.hpp"
#include "layers/gna_split_layer.hpp"
#include "ngraph/opsets/opset9.hpp"
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::target;
namespace {
using GetAlignedSplitSizesData = std::tuple<uint32_t, // total size
@ -19,10 +23,15 @@ using GetAlignedSplitSizesData = std::tuple<uint32_t, // total size
>;
const std::vector<GetAlignedSplitSizesData> data = {
GetAlignedSplitSizesData{10, 100, 64, std::vector<uint32_t>{10}},
GetAlignedSplitSizesData{1024, 100, 64, std::vector<uint32_t>(16, 64)},
GetAlignedSplitSizesData{151, 100, 64, std::vector<uint32_t>{64, 64, 23}},
GetAlignedSplitSizesData{151, 65, 32, std::vector<uint32_t>{64, 64, 23}},
GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}}};
GetAlignedSplitSizesData{151, 33, 32, std::vector<uint32_t>{32, 32, 32, 32, 23}},
GetAlignedSplitSizesData{151, 17, 16, std::vector<uint32_t>{16, 16, 16, 16, 16, 16, 16, 16, 16, 7}},
GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}},
GetAlignedSplitSizesData{67000, 65528, 64, std::vector<uint32_t>{65472, 1528}},
GetAlignedSplitSizesData{67000, 65528, 16, std::vector<uint32_t>{65520, 1480}}};
TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
for (const auto& dataItem : data) {
@ -38,55 +47,86 @@ using VariadicSplitParameters = std::tuple<ov::Shape, // input size
bool // supported
>;
const std::vector<VariadicSplitParameters> variadic_split_data = {
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false},
};
TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
void RunVariadicSplitSupportedTest(DeviceVersion device_version, std::vector<VariadicSplitParameters> test_vectors) {
ov::Shape input_shape;
uint32_t axis;
std::vector<int32_t> split_lengths;
bool result;
for (const auto& item : variadic_split_data) {
Limitations::init(device_version);
for (const auto& item : test_vectors) {
std::tie(input_shape, axis, split_lengths, result) = item;
auto split = std::make_shared<ngraph::opset9::VariadicSplit>(
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}),
ngraph::opset9::Constant::create(ngraph::element::i64,
ngraph::Shape({split_lengths.size()}),
split_lengths));
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
ASSERT_TRUE(Limitations::is_split_supported(split, false) == result);
}
}
TEST(CheckSplitSupported, CheckVariadicSplitSupported_GNA3_5) {
RunVariadicSplitSupportedTest(
DeviceVersion::GNA3_5,
{VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{16, 1008}, false},
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false}});
}
TEST(CheckSplitSupported, CheckVariadicSplitSupported_GNA3_6) {
RunVariadicSplitSupportedTest(
DeviceVersion::GNA3_6,
{VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{16, 1008}, true},
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false}});
}
using SplitParameters = std::tuple<ov::Shape, // input size
uint32_t, // axis
uint32_t, // num_splits
bool // supported
>;
const std::vector<SplitParameters> split_data = {
SplitParameters{ov::Shape{1024}, 0, 4, true},
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
SplitParameters{ov::Shape{1024}, 0, 64, false},
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
};
TEST(CheckSplitSupported, CheckSplitSupported) {
void RunSplitSupportedTest(DeviceVersion device_version, std::vector<SplitParameters> test_vectors) {
ov::Shape input_shape;
uint32_t axis;
uint32_t num_splits;
bool result;
for (const auto& item : split_data) {
Limitations::init(device_version);
for (const auto& item : test_vectors) {
std::tie(input_shape, axis, num_splits, result) = item;
auto split = std::make_shared<ngraph::opset9::Split>(
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
num_splits);
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
ASSERT_TRUE(Limitations::is_split_supported(split, false) == result);
}
}
TEST(CheckSplitSupported, CheckSplitSupported_GNA3_5) {
RunSplitSupportedTest(DeviceVersion::GNA3_5,
{
SplitParameters{ov::Shape{1024}, 0, 4, true},
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
SplitParameters{ov::Shape{1024}, 0, 64, false},
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
});
}
TEST(CheckSplitSupported, CheckSplitSupported_GNA3_6) {
RunSplitSupportedTest(DeviceVersion::GNA3_6,
{
SplitParameters{ov::Shape{1024}, 0, 4, true},
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
SplitParameters{ov::Shape{1024}, 0, 64, true},
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
});
}
} // namespace

View File

@ -9,9 +9,14 @@
#include <ngraph/pass/manager.hpp>
#include <transformations/init_node_info.hpp>
#include "backend/gna_limitations.hpp"
#include "common/gna_target.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::target;
namespace testing {
namespace {
@ -126,29 +131,22 @@ ngraph::Output<ngraph::Node> CreateConvolution::createOutputNode(const ngraph::O
}
// should be used only after CreateBaseDecorator
template <const ngraph::Shape& kernel_shape, const ngraph::Shape& split_shape>
class CreateSplittedConvolution : public CreateGraphDecorator {
public:
CreateSplittedConvolution(CreateGraphDecoratorPtr prev,
const ngraph::Shape& kernel_shape = ngraph::Shape{1, 64, 1, 1},
const ngraph::Shape& split_shape = ngraph::Shape{960, 960, 960, 960, 256})
CreateSplittedConvolution(CreateGraphDecoratorPtr prev)
: CreateGraphDecorator(std::move(prev)),
kernel_shape_(kernel_shape),
split_shape_(split_shape) {}
protected:
void updateGraph(Graph& graph) override;
private:
const ngraph::Shape kernel_shape_;
const ngraph::Shape split_shape_;
};
void CreateSplittedConvolution::updateGraph(Graph& graph) {
void updateGraph(Graph& graph) override {
auto split_node_c1 =
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{3});
auto split_node_c2 =
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_shape_.size()}), split_shape_);
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(graph.input_params, split_node_c1, split_node_c2);
auto split_node =
std::make_shared<ngraph::opset7::VariadicSplit>(graph.input_params, split_node_c1, split_node_c2);
auto kernel = ngraph::opset7::Constant::create(ngraph::element::f32, kernel_shape_, {1});
@ -161,7 +159,12 @@ void CreateSplittedConvolution::updateGraph(Graph& graph) {
ngraph::Strides{1, 1});
graph.output_nodes.push_back(convolution_operation);
}
}
}
private:
const ngraph::Shape kernel_shape_;
const ngraph::Shape split_shape_;
};
class CreateAdd : public CreateAppendableGraphDecorator {
public:
@ -261,9 +264,10 @@ Graph createSolidGraph(const ngraph::Shape& input_shape, const ngraph::Shape& ke
// -------------------------------------------------------------------------------------------------------
using TestParams = std::tuple<Graph, Graph, ngraph::pass::Manager>;
class SplitConvolutionFixture : public CommonTestUtils::TestsCommon,
public ::testing::WithParamInterface<
std::tuple<Graph /* tranformed */, Graph /* reference */, ngraph::pass::Manager>> {
public ::testing::WithParamInterface<std::tuple<DeviceVersion, TestParams>> {
public:
void SetUp() override;
@ -274,10 +278,14 @@ public:
void SplitConvolutionFixture::SetUp() {
// TODO: use auto & [transformed_graph, reference_graph] = this->GetParam() when C++17
DeviceVersion device_version;
TestParams params;
Graph transformed_graph;
Graph reference_graph;
std::tie(transformed_graph, reference_graph, pass_manager) = this->GetParam();
std::tie(device_version, params) = this->GetParam();
std::tie(transformed_graph, reference_graph, pass_manager) = params;
Limitations::init(device_version);
function = transformed_graph.createFunction();
reference_function = reference_graph.createFunction();
}
@ -305,21 +313,11 @@ TEST_P(SplitConvolutionFixture, CompareFunctions) {
}
INSTANTIATE_TEST_SUITE_P(
SplitConvolutionTestSuite,
SplitConvolution_GNA3_0_3_5_3_6_TestSuite,
SplitConvolutionFixture,
::testing::Combine(
::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
::testing::Values(
std::make_tuple(createGraph<CreateConvolution>(),
createGraph<CreateConcat, CreateSplittedConvolution>(),
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
createGraph<CreateConcat, CreateAdd, CreateSplittedConvolution>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
createGraph<CreateConcat, CreateFakeQuantize, CreateSplittedConvolution>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplittedConvolution>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
std::make_tuple(createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
@ -332,7 +330,53 @@ INSTANTIATE_TEST_SUITE_P(
std::make_tuple(
createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>())));
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
ngraph::Shape kernel_shape_3_5 = {1, 64, 1, 1};
ngraph::Shape split_shape_3_5 = {960, 960, 960, 960, 256};
using CreateSplitedConvolution3_5 = CreateSplittedConvolution<kernel_shape_3_5, split_shape_3_5>;
INSTANTIATE_TEST_SUITE_P(
SplitConvolution_GNA3_0_3_5_TestSuite,
SplitConvolutionFixture,
::testing::Combine(
::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5),
::testing::Values(
std::make_tuple(createGraph<CreateConvolution>(),
createGraph<CreateConcat, CreateSplitedConvolution3_5>(),
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
createGraph<CreateConcat, CreateAdd, CreateSplitedConvolution3_5>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
createGraph<CreateConcat, CreateFakeQuantize, CreateSplitedConvolution3_5>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplitedConvolution3_5>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
ngraph::Shape kernel_shape_3_6 = {1, 64, 1, 1};
ngraph::Shape split_shape_3_6 = {1008, 1008, 1008, 1008, 64};
using CreateSplitedConvolution3_6 = CreateSplittedConvolution<kernel_shape_3_6, split_shape_3_6>;
INSTANTIATE_TEST_SUITE_P(
SplitConvolution_GNA3_6_TestSuite,
SplitConvolutionFixture,
::testing::Combine(
::testing::Values(DeviceVersion::GNA3_6),
::testing::Values(
std::make_tuple(createGraph<CreateConvolution>(),
createGraph<CreateConcat, CreateSplitedConvolution3_6>(),
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
createGraph<CreateConcat, CreateAdd, CreateSplitedConvolution3_6>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
createGraph<CreateConcat, CreateFakeQuantize, CreateSplitedConvolution3_6>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplitedConvolution3_6>(),
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
} // namespace
} // namespace testing

View File

@ -11,10 +11,15 @@
#include <ngraph/pass/manager.hpp>
#include <transformations/init_node_info.hpp>
#include "backend/gna_limitations.hpp"
#include "common/gna_target.hpp"
#include "common_test_utils/common_utils.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include "transformations/split_eltwise.hpp"
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::target;
namespace testing {
namespace {
@ -87,7 +92,8 @@ static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& inp
}
}
typedef std::tuple<ngraph::Shape,
typedef std::tuple<DeviceVersion, // device version
ngraph::Shape, // input shape
bool, // with const
bool, // with fq
ELTWISE_TYPE // eltwise type
@ -95,13 +101,15 @@ typedef std::tuple<ngraph::Shape,
EltwiseSplitParams;
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitParams> obj) {
DeviceVersion device_ver;
ngraph::Shape shape;
bool with_const;
bool with_fq;
ELTWISE_TYPE type;
std::tie(shape, with_const, with_fq, type) = obj.param;
std::tie(device_ver, shape, with_const, with_fq, type) = obj.param;
std::ostringstream result;
result << DeviceToString(device_ver) << "_";
result << "IS=" << CommonTestUtils::vec2str(shape) << "_";
result << "wConst=" << with_const << "_";
result << "wFQ=" << with_fq << "_";
@ -132,11 +140,13 @@ public:
};
void SplitEltwiseTestSuiteFixture::SetUp() {
DeviceVersion device_ver;
ngraph::Shape shape;
bool with_const;
bool with_fq;
ELTWISE_TYPE type;
std::tie(shape, with_const, with_fq, type) = this->GetParam();
std::tie(device_ver, shape, with_const, with_fq, type) = this->GetParam();
Limitations::init(device_ver);
function = createFunction(shape, with_const, with_fq, type, false);
reference_function = createFunction(shape, with_const, with_fq, type, true);
}
@ -158,13 +168,16 @@ TEST_P(SplitEltwiseTestSuiteFixture, CompareFunctions) {
const std::vector<ov::Shape> inputShape = {{1, 67000}, {1, 500000}, {1, 936, 513}, {1, 64, 64, 64}, {1, 256, 64, 64}};
INSTANTIATE_TEST_SUITE_P(SplitEltwiseTestSuite,
INSTANTIATE_TEST_SUITE_P(
SplitEltwiseTestSuite,
SplitEltwiseTestSuiteFixture,
::testing::Combine(::testing::ValuesIn(inputShape),
::testing::Combine(::testing::ValuesIn(std::vector<DeviceVersion>{DeviceVersion::GNA3_0, // device version
DeviceVersion::GNA3_5,
DeviceVersion::GNA3_6}),
::testing::ValuesIn(inputShape),
::testing::ValuesIn(std::vector<bool>{true, false}), // with const
::testing::ValuesIn(std::vector<bool>{true, false}), // with fq
::testing::ValuesIn(std::vector<ELTWISE_TYPE>{
ELTWISE_TYPE::Sum,
::testing::ValuesIn(std::vector<ELTWISE_TYPE>{ELTWISE_TYPE::Sum,
ELTWISE_TYPE::Sub,
ELTWISE_TYPE::Prod})), // eltwise type
getTestCaseName);