16 byte memory alignment and concat (#17712)
* use device specific alignment instead of ALIGN64 macro * update for tests * update after review
This commit is contained in:
parent
0b708b5eff
commit
a4519f0a2c
@ -661,7 +661,6 @@ constexpr uint32_t Limitations::kConvFiltersNumDivider;
|
|||||||
constexpr uint32_t Limitations::kConvFilterSizeDivider;
|
constexpr uint32_t Limitations::kConvFilterSizeDivider;
|
||||||
constexpr uint32_t Limitations::kConvFilterMaxSize;
|
constexpr uint32_t Limitations::kConvFilterMaxSize;
|
||||||
constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
|
constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
|
||||||
constexpr uint32_t Limitations::kInputByteAlignment;
|
|
||||||
constexpr uint32_t Limitations::kNoOfInputsDivisor;
|
constexpr uint32_t Limitations::kNoOfInputsDivisor;
|
||||||
constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
|
constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
|
||||||
constexpr uint32_t Limitations::kAffineMaxBatchSize;
|
constexpr uint32_t Limitations::kAffineMaxBatchSize;
|
||||||
@ -673,6 +672,7 @@ constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
|
|||||||
constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
|
constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
|
||||||
constexpr uint32_t Limitations::kBytesPerSplitElement;
|
constexpr uint32_t Limitations::kBytesPerSplitElement;
|
||||||
constexpr uint32_t Limitations::kBytesPerCropElement;
|
constexpr uint32_t Limitations::kBytesPerCropElement;
|
||||||
|
constexpr uint32_t Limitations::kBytesPerConcatElement;
|
||||||
constexpr uint32_t Limitations::kMemoryPageSize;
|
constexpr uint32_t Limitations::kMemoryPageSize;
|
||||||
|
|
||||||
thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
|
thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
|
||||||
|
@ -248,6 +248,7 @@ public:
|
|||||||
|
|
||||||
bool use_only_16bit_convolution_weights() const;
|
bool use_only_16bit_convolution_weights() const;
|
||||||
bool is_crop_affined_offset(size_t numberOfElements) const;
|
bool is_crop_affined_offset(size_t numberOfElements) const;
|
||||||
|
bool is_aligned(size_t addr) const;
|
||||||
size_t get_memory_alignment() const;
|
size_t get_memory_alignment() const;
|
||||||
std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
|
std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
|
||||||
|
|
||||||
@ -260,7 +261,6 @@ public:
|
|||||||
constexpr static uint32_t kConvFilterSizeDivider = 8;
|
constexpr static uint32_t kConvFilterSizeDivider = 8;
|
||||||
constexpr static uint32_t kConvFilterMaxSize = 768;
|
constexpr static uint32_t kConvFilterMaxSize = 768;
|
||||||
constexpr static uint32_t kConvEachKernelByteAlignment = 16;
|
constexpr static uint32_t kConvEachKernelByteAlignment = 16;
|
||||||
constexpr static uint32_t kInputByteAlignment = 64;
|
|
||||||
constexpr static uint32_t kNoOfInputsDivisor = 8;
|
constexpr static uint32_t kNoOfInputsDivisor = 8;
|
||||||
constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
|
constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
|
||||||
constexpr static uint32_t kAffineMaxBatchSize = 8;
|
constexpr static uint32_t kAffineMaxBatchSize = 8;
|
||||||
@ -274,10 +274,12 @@ public:
|
|||||||
// Currently split layer only supports 2 bytes in int16 and int8 mode.
|
// Currently split layer only supports 2 bytes in int16 and int8 mode.
|
||||||
// In fp32 mode this is not necessary but is useful for testing
|
// In fp32 mode this is not necessary but is useful for testing
|
||||||
constexpr static uint32_t kBytesPerSplitElement = 2;
|
constexpr static uint32_t kBytesPerSplitElement = 2;
|
||||||
|
|
||||||
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
|
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
|
||||||
// In fp32 mode this is not necessary but is useful for testing
|
// In fp32 mode this is not necessary but is useful for testing
|
||||||
constexpr static uint32_t kBytesPerCropElement = 2;
|
constexpr static uint32_t kBytesPerCropElement = 2;
|
||||||
|
// currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull
|
||||||
|
// for testing
|
||||||
|
constexpr static uint32_t kBytesPerConcatElement = 2;
|
||||||
constexpr static uint32_t kMemoryPageSize = 4096;
|
constexpr static uint32_t kMemoryPageSize = 4096;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -306,7 +308,11 @@ inline std::shared_ptr<Limitations> Limitations::get_instance() {
|
|||||||
|
|
||||||
inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
|
inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
|
||||||
const auto cropOffset = numberOfElements * kBytesPerCropElement;
|
const auto cropOffset = numberOfElements * kBytesPerCropElement;
|
||||||
return (ALIGN64(cropOffset) != cropOffset);
|
return !is_aligned(cropOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Limitations::is_aligned(size_t addr) const {
|
||||||
|
return (addr == ALIGN(addr, get_memory_alignment()));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t Limitations::get_memory_alignment() const {
|
inline size_t Limitations::get_memory_alignment() const {
|
||||||
|
@ -87,7 +87,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
|
|||||||
offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
|
offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (offset == ALIGN64(offset));
|
return limitations::Limitations::get_instance()->is_aligned(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
|
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
|
||||||
|
@ -47,12 +47,11 @@ public:
|
|||||||
std::vector<SplitConnectedLayerInfo> splitOutputLayers;
|
std::vector<SplitConnectedLayerInfo> splitOutputLayers;
|
||||||
};
|
};
|
||||||
|
|
||||||
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
|
// @brief Returns sizes of split outputs to split the input tensor into aligned parts that are not greater than the
|
||||||
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
|
// specified split size or alignment, depending on which one is larger
|
||||||
uint32_t maxSplitSize,
|
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t splitSize, uint32_t alignment) {
|
||||||
uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
|
|
||||||
std::vector<uint32_t> splitSizes;
|
std::vector<uint32_t> splitSizes;
|
||||||
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
|
uint32_t maxAlignedSplitSize = std::max(splitSize - splitSize % alignment, alignment);
|
||||||
uint32_t usedSize = 0;
|
uint32_t usedSize = 0;
|
||||||
while (usedSize < totalSize) {
|
while (usedSize < totalSize) {
|
||||||
uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
|
uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
|
||||||
@ -73,22 +72,21 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
|
|||||||
IE_ASSERT(firstValuableDim != std::end(dims));
|
IE_ASSERT(firstValuableDim != std::end(dims));
|
||||||
auto splittedElementsSize = *firstValuableDim;
|
auto splittedElementsSize = *firstValuableDim;
|
||||||
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
|
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
|
||||||
auto alignment = limitations::Limitations::kInputByteAlignment;
|
auto alignment = limitations::Limitations::get_instance()->get_memory_alignment();
|
||||||
|
|
||||||
// Split output size should be multiple by 64 to avoid align filters insertion,
|
// Split output size should be multiple of device memory alignment to avoid align filters insertion,
|
||||||
// but we need to check if our input size to split exceeds 64; if not we can always
|
// but we need to check if our input size to split exceeds alignment; if not we can always
|
||||||
// split if the remaining size is aligned
|
// split if the remaining size is aligned
|
||||||
if (splittedElementsSize <= alignment) {
|
auto split_size = limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize;
|
||||||
|
|
||||||
|
if (splittedElementsSize <= alignment || split_size < alignment) {
|
||||||
if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
|
if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
|
||||||
alignment = 1;
|
alignment = 1;
|
||||||
} else {
|
} else {
|
||||||
return {splittedDimIx, splitSizes};
|
return {splittedDimIx, splitSizes};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
splitSizes =
|
splitSizes = GetAlignedSplitSizes(splittedElementsSize, split_size, alignment);
|
||||||
GetAlignedSplitSizes(splittedElementsSize,
|
|
||||||
limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
|
|
||||||
alignment);
|
|
||||||
return {splittedDimIx, splitSizes};
|
return {splittedDimIx, splitSizes};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1247,9 +1247,6 @@ void FlattenTrivialConcatPass::run() {
|
|||||||
void InsertConcatAligningFilterPass::run() {
|
void InsertConcatAligningFilterPass::run() {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
|
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
|
||||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||||
// currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull
|
|
||||||
// for testing
|
|
||||||
const int bytesPerConcatElement = 2;
|
|
||||||
|
|
||||||
int numOfFilterLayers = 0;
|
int numOfFilterLayers = 0;
|
||||||
|
|
||||||
@ -1273,7 +1270,7 @@ void InsertConcatAligningFilterPass::run() {
|
|||||||
|
|
||||||
auto concatInput = getLayerByIndex(input_idx);
|
auto concatInput = getLayerByIndex(input_idx);
|
||||||
auto dims = concatInput->getDims();
|
auto dims = concatInput->getDims();
|
||||||
auto outputSize = details::product(++dims.begin(), dims.end()) * bytesPerConcatElement;
|
auto outputSize = details::product(++dims.begin(), dims.end()) * Limitations::kBytesPerConcatElement;
|
||||||
|
|
||||||
auto useAlignFilterIf = [&concatLayer, &getLayerByIndex](int concat_input_idx) {
|
auto useAlignFilterIf = [&concatLayer, &getLayerByIndex](int concat_input_idx) {
|
||||||
if (concatLayer->insData.size() <= concat_input_idx)
|
if (concatLayer->insData.size() <= concat_input_idx)
|
||||||
@ -1290,7 +1287,8 @@ void InsertConcatAligningFilterPass::run() {
|
|||||||
// correcting offset by copy layer insertion. This can be improved by collapsing copy and affine or diagonal
|
// correcting offset by copy layer insertion. This can be improved by collapsing copy and affine or diagonal
|
||||||
// later-on if next concat inputs requires align filter - then current input also requires either copy or
|
// later-on if next concat inputs requires align filter - then current input also requires either copy or
|
||||||
// align filter
|
// align filter
|
||||||
if (ALIGN64(offset) != offset || (ALIGN64(outputSize) != outputSize && useAlignFilterIf(input_idx + 1))) {
|
if ((!Limitations::get_instance()->is_aligned(offset)) ||
|
||||||
|
((!Limitations::get_instance()->is_aligned(outputSize)) && useAlignFilterIf(input_idx + 1))) {
|
||||||
auto prevLayer = getCreatorLayer(concatInput).lock();
|
auto prevLayer = getCreatorLayer(concatInput).lock();
|
||||||
// input layer parameters are copied not using GNA-primitives - so nothing to allign here.
|
// input layer parameters are copied not using GNA-primitives - so nothing to allign here.
|
||||||
if (!useAlignFilterIf(input_idx))
|
if (!useAlignFilterIf(input_idx))
|
||||||
@ -1310,13 +1308,17 @@ void InsertConcatAligningFilterPass::run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto num_rows_in = dims[1];
|
auto num_rows_in = dims[1];
|
||||||
size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(offset) - 64));
|
size_t aligned_offset =
|
||||||
size_t num_rows_padded = (offset - aligned64_offset) / bytesPerConcatElement;
|
std::max(0,
|
||||||
|
static_cast<int>(ALIGN(offset, Limitations::get_instance()->get_memory_alignment()) -
|
||||||
|
Limitations::get_instance()->get_memory_alignment()));
|
||||||
|
size_t num_rows_padded = (offset - aligned_offset) / Limitations::kBytesPerConcatElement;
|
||||||
size_t num_rows_out = num_rows_padded + num_rows_in;
|
size_t num_rows_out = num_rows_padded + num_rows_in;
|
||||||
|
|
||||||
// encodes offset to beginning of split layer input
|
// encodes offset to beginning of split layer input
|
||||||
size_t bytesOffset =
|
size_t bytesOffset =
|
||||||
(aligned64_offset / bytesPerConcatElement) * (quantized ? bytesPerConcatElement : 4);
|
(aligned_offset / Limitations::kBytesPerConcatElement) *
|
||||||
|
(quantized ? Limitations::kBytesPerConcatElement : Precision(Precision::FP32).size());
|
||||||
concatAligningFilter->params["output_offset"] = std::to_string(bytesOffset);
|
concatAligningFilter->params["output_offset"] = std::to_string(bytesOffset);
|
||||||
|
|
||||||
// for padded rows we cannot use copy layer - TBD how to implement
|
// for padded rows we cannot use copy layer - TBD how to implement
|
||||||
@ -1496,7 +1498,7 @@ void InsertSplitAligningFilterPass::run() {
|
|||||||
for (auto&& splitOutput : l->outData) {
|
for (auto&& splitOutput : l->outData) {
|
||||||
auto outputSize = product(begin(splitOutput->getDims()), end(splitOutput->getDims()));
|
auto outputSize = product(begin(splitOutput->getDims()), end(splitOutput->getDims()));
|
||||||
|
|
||||||
if ((currentOffset != ALIGN64(currentOffset)) || (padding != 0)) {
|
if ((!Limitations::get_instance()->is_aligned(currentOffset)) || (padding != 0)) {
|
||||||
// check that this split output actually connected to further layers
|
// check that this split output actually connected to further layers
|
||||||
if (getInputTo(splitOutput).empty()) {
|
if (getInputTo(splitOutput).empty()) {
|
||||||
log::debug() << "Output port: " << splitOutIndex << " of " << l->name << " unconnected, skipping\n";
|
log::debug() << "Output port: " << splitOutIndex << " of " << l->name << " unconnected, skipping\n";
|
||||||
@ -1507,7 +1509,7 @@ void InsertSplitAligningFilterPass::run() {
|
|||||||
<< " Convolution Filter doesn't support batch=" << splitOutput->getDims().front();
|
<< " Convolution Filter doesn't support batch=" << splitOutput->getDims().front();
|
||||||
}
|
}
|
||||||
|
|
||||||
// this split output not beginning from 64 bytes aligned boundary - need to correct by aligning
|
// this split output not beginning from aligned bytes boundary - need to correct by aligning
|
||||||
// filter layer insert the filter
|
// filter layer insert the filter
|
||||||
auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
|
auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
|
||||||
|
|
||||||
@ -1527,20 +1529,22 @@ void InsertSplitAligningFilterPass::run() {
|
|||||||
|
|
||||||
auto inputData = splitOutput;
|
auto inputData = splitOutput;
|
||||||
|
|
||||||
size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
|
size_t aligned_offset = std::max(
|
||||||
|
0,
|
||||||
|
static_cast<int>(ALIGN(currentOffset, Limitations::get_instance()->get_memory_alignment()) -
|
||||||
|
Limitations::get_instance()->get_memory_alignment()));
|
||||||
|
|
||||||
IE_ASSERT(filterLayer != nullptr);
|
IE_ASSERT(filterLayer != nullptr);
|
||||||
|
|
||||||
// encodes offset to beginning of split layer input
|
// encodes offset to beginning of split layer input
|
||||||
filterLayer->params["offset"] =
|
filterLayer->params["offset"] = std::to_string(aligned_offset / Limitations::kBytesPerSplitElement);
|
||||||
std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
|
|
||||||
auto dims = splitOutput->getTensorDesc().getDims();
|
auto dims = splitOutput->getTensorDesc().getDims();
|
||||||
if (dims.size() > 3) {
|
if (dims.size() > 3) {
|
||||||
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
|
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto offsetOfUnalignment =
|
const auto offsetOfUnalignment =
|
||||||
(currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
|
(currentOffset - aligned_offset) / Limitations::kBytesPerSplitElement;
|
||||||
// TODO consider to use a different number of filters do decrese the number of trailing zeros
|
// TODO consider to use a different number of filters do decrese the number of trailing zeros
|
||||||
// (additionalPaddingOfFilter)
|
// (additionalPaddingOfFilter)
|
||||||
const auto numberOfFilters = Limitations::kConvMinFiltersNum;
|
const auto numberOfFilters = Limitations::kConvMinFiltersNum;
|
||||||
|
@ -152,7 +152,7 @@ DECL_PASS(InsertSplitAligningFilter);
|
|||||||
DECL_PASS(FlattenTrivialConcat);
|
DECL_PASS(FlattenTrivialConcat);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief concat-aligning filter layer insertion required in cases when concat inputs size are not 64-aligned
|
* @brief concat-aligning filter layer insertion required in cases when concat inputs size are not aligned
|
||||||
*/
|
*/
|
||||||
DECL_PASS(InsertConcatAligningFilter);
|
DECL_PASS(InsertConcatAligningFilter);
|
||||||
|
|
||||||
|
@ -64,7 +64,9 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
|||||||
auto& input = conv->get_input_shape(0);
|
auto& input = conv->get_input_shape(0);
|
||||||
uint32_t width = input.back();
|
uint32_t width = input.back();
|
||||||
uint32_t in_channels = input.at(1);
|
uint32_t in_channels = input.at(1);
|
||||||
auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
|
auto split_sizes = GetAlignedSplitSizes(width,
|
||||||
|
Limitations::kBufferMaxSize / in_channels,
|
||||||
|
Limitations::get_instance()->get_memory_alignment());
|
||||||
IE_ASSERT(split_sizes.size() > 1);
|
IE_ASSERT(split_sizes.size() > 1);
|
||||||
std::vector<int64_t> split_sizes_casted(split_sizes.size());
|
std::vector<int64_t> split_sizes_casted(split_sizes.size());
|
||||||
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
|
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
|
||||||
|
@ -7,9 +7,13 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
// to suppress deprecated definition errors
|
// to suppress deprecated definition errors
|
||||||
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||||
|
#include "common/gna_target.hpp"
|
||||||
#include "layers/gna_split_layer.hpp"
|
#include "layers/gna_split_layer.hpp"
|
||||||
#include "ngraph/opsets/opset9.hpp"
|
#include "ngraph/opsets/opset9.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
using namespace ov::intel_gna::target;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
using GetAlignedSplitSizesData = std::tuple<uint32_t, // total size
|
using GetAlignedSplitSizesData = std::tuple<uint32_t, // total size
|
||||||
@ -19,10 +23,15 @@ using GetAlignedSplitSizesData = std::tuple<uint32_t, // total size
|
|||||||
>;
|
>;
|
||||||
|
|
||||||
const std::vector<GetAlignedSplitSizesData> data = {
|
const std::vector<GetAlignedSplitSizesData> data = {
|
||||||
|
GetAlignedSplitSizesData{10, 100, 64, std::vector<uint32_t>{10}},
|
||||||
GetAlignedSplitSizesData{1024, 100, 64, std::vector<uint32_t>(16, 64)},
|
GetAlignedSplitSizesData{1024, 100, 64, std::vector<uint32_t>(16, 64)},
|
||||||
GetAlignedSplitSizesData{151, 100, 64, std::vector<uint32_t>{64, 64, 23}},
|
GetAlignedSplitSizesData{151, 100, 64, std::vector<uint32_t>{64, 64, 23}},
|
||||||
GetAlignedSplitSizesData{151, 65, 32, std::vector<uint32_t>{64, 64, 23}},
|
GetAlignedSplitSizesData{151, 65, 32, std::vector<uint32_t>{64, 64, 23}},
|
||||||
GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}}};
|
GetAlignedSplitSizesData{151, 33, 32, std::vector<uint32_t>{32, 32, 32, 32, 23}},
|
||||||
|
GetAlignedSplitSizesData{151, 17, 16, std::vector<uint32_t>{16, 16, 16, 16, 16, 16, 16, 16, 16, 7}},
|
||||||
|
GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}},
|
||||||
|
GetAlignedSplitSizesData{67000, 65528, 64, std::vector<uint32_t>{65472, 1528}},
|
||||||
|
GetAlignedSplitSizesData{67000, 65528, 16, std::vector<uint32_t>{65520, 1480}}};
|
||||||
|
|
||||||
TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
|
TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
|
||||||
for (const auto& dataItem : data) {
|
for (const auto& dataItem : data) {
|
||||||
@ -38,55 +47,86 @@ using VariadicSplitParameters = std::tuple<ov::Shape, // input size
|
|||||||
bool // supported
|
bool // supported
|
||||||
>;
|
>;
|
||||||
|
|
||||||
const std::vector<VariadicSplitParameters> variadic_split_data = {
|
void RunVariadicSplitSupportedTest(DeviceVersion device_version, std::vector<VariadicSplitParameters> test_vectors) {
|
||||||
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
|
|
||||||
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
|
|
||||||
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
|
|
||||||
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false},
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
|
|
||||||
ov::Shape input_shape;
|
ov::Shape input_shape;
|
||||||
uint32_t axis;
|
uint32_t axis;
|
||||||
std::vector<int32_t> split_lengths;
|
std::vector<int32_t> split_lengths;
|
||||||
bool result;
|
bool result;
|
||||||
for (const auto& item : variadic_split_data) {
|
|
||||||
|
Limitations::init(device_version);
|
||||||
|
for (const auto& item : test_vectors) {
|
||||||
std::tie(input_shape, axis, split_lengths, result) = item;
|
std::tie(input_shape, axis, split_lengths, result) = item;
|
||||||
|
|
||||||
auto split = std::make_shared<ngraph::opset9::VariadicSplit>(
|
auto split = std::make_shared<ngraph::opset9::VariadicSplit>(
|
||||||
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
||||||
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}),
|
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}),
|
||||||
ngraph::opset9::Constant::create(ngraph::element::i64,
|
ngraph::opset9::Constant::create(ngraph::element::i64,
|
||||||
ngraph::Shape({split_lengths.size()}),
|
ngraph::Shape({split_lengths.size()}),
|
||||||
split_lengths));
|
split_lengths));
|
||||||
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
|
ASSERT_TRUE(Limitations::is_split_supported(split, false) == result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CheckSplitSupported, CheckVariadicSplitSupported_GNA3_5) {
|
||||||
|
RunVariadicSplitSupportedTest(
|
||||||
|
DeviceVersion::GNA3_5,
|
||||||
|
{VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
|
||||||
|
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
|
||||||
|
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{16, 1008}, false},
|
||||||
|
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
|
||||||
|
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CheckSplitSupported, CheckVariadicSplitSupported_GNA3_6) {
|
||||||
|
RunVariadicSplitSupportedTest(
|
||||||
|
DeviceVersion::GNA3_6,
|
||||||
|
{VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
|
||||||
|
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
|
||||||
|
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{16, 1008}, true},
|
||||||
|
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
|
||||||
|
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false}});
|
||||||
|
}
|
||||||
|
|
||||||
using SplitParameters = std::tuple<ov::Shape, // input size
|
using SplitParameters = std::tuple<ov::Shape, // input size
|
||||||
uint32_t, // axis
|
uint32_t, // axis
|
||||||
uint32_t, // num_splits
|
uint32_t, // num_splits
|
||||||
bool // supported
|
bool // supported
|
||||||
>;
|
>;
|
||||||
|
|
||||||
const std::vector<SplitParameters> split_data = {
|
void RunSplitSupportedTest(DeviceVersion device_version, std::vector<SplitParameters> test_vectors) {
|
||||||
SplitParameters{ov::Shape{1024}, 0, 4, true},
|
|
||||||
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
|
|
||||||
SplitParameters{ov::Shape{1024}, 0, 64, false},
|
|
||||||
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST(CheckSplitSupported, CheckSplitSupported) {
|
|
||||||
ov::Shape input_shape;
|
ov::Shape input_shape;
|
||||||
uint32_t axis;
|
uint32_t axis;
|
||||||
uint32_t num_splits;
|
uint32_t num_splits;
|
||||||
bool result;
|
bool result;
|
||||||
for (const auto& item : split_data) {
|
|
||||||
|
Limitations::init(device_version);
|
||||||
|
for (const auto& item : test_vectors) {
|
||||||
std::tie(input_shape, axis, num_splits, result) = item;
|
std::tie(input_shape, axis, num_splits, result) = item;
|
||||||
auto split = std::make_shared<ngraph::opset9::Split>(
|
auto split = std::make_shared<ngraph::opset9::Split>(
|
||||||
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
||||||
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
|
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
|
||||||
num_splits);
|
num_splits);
|
||||||
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
|
ASSERT_TRUE(Limitations::is_split_supported(split, false) == result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CheckSplitSupported, CheckSplitSupported_GNA3_5) {
|
||||||
|
RunSplitSupportedTest(DeviceVersion::GNA3_5,
|
||||||
|
{
|
||||||
|
SplitParameters{ov::Shape{1024}, 0, 4, true},
|
||||||
|
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
|
||||||
|
SplitParameters{ov::Shape{1024}, 0, 64, false},
|
||||||
|
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CheckSplitSupported, CheckSplitSupported_GNA3_6) {
|
||||||
|
RunSplitSupportedTest(DeviceVersion::GNA3_6,
|
||||||
|
{
|
||||||
|
SplitParameters{ov::Shape{1024}, 0, 4, true},
|
||||||
|
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
|
||||||
|
SplitParameters{ov::Shape{1024}, 0, 64, true},
|
||||||
|
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
|
||||||
|
});
|
||||||
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -9,9 +9,14 @@
|
|||||||
#include <ngraph/pass/manager.hpp>
|
#include <ngraph/pass/manager.hpp>
|
||||||
#include <transformations/init_node_info.hpp>
|
#include <transformations/init_node_info.hpp>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
|
#include "common/gna_target.hpp"
|
||||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
using namespace ov::intel_gna::target;
|
||||||
|
|
||||||
namespace testing {
|
namespace testing {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -126,43 +131,41 @@ ngraph::Output<ngraph::Node> CreateConvolution::createOutputNode(const ngraph::O
|
|||||||
}
|
}
|
||||||
|
|
||||||
// should be used only after CreateBaseDecorator
|
// should be used only after CreateBaseDecorator
|
||||||
|
template <const ngraph::Shape& kernel_shape, const ngraph::Shape& split_shape>
|
||||||
class CreateSplittedConvolution : public CreateGraphDecorator {
|
class CreateSplittedConvolution : public CreateGraphDecorator {
|
||||||
public:
|
public:
|
||||||
CreateSplittedConvolution(CreateGraphDecoratorPtr prev,
|
CreateSplittedConvolution(CreateGraphDecoratorPtr prev)
|
||||||
const ngraph::Shape& kernel_shape = ngraph::Shape{1, 64, 1, 1},
|
|
||||||
const ngraph::Shape& split_shape = ngraph::Shape{960, 960, 960, 960, 256})
|
|
||||||
: CreateGraphDecorator(std::move(prev)),
|
: CreateGraphDecorator(std::move(prev)),
|
||||||
kernel_shape_(kernel_shape),
|
kernel_shape_(kernel_shape),
|
||||||
split_shape_(split_shape) {}
|
split_shape_(split_shape) {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void updateGraph(Graph& graph) override;
|
void updateGraph(Graph& graph) override {
|
||||||
|
auto split_node_c1 =
|
||||||
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{3});
|
||||||
|
auto split_node_c2 =
|
||||||
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_shape_.size()}), split_shape_);
|
||||||
|
auto split_node =
|
||||||
|
std::make_shared<ngraph::opset7::VariadicSplit>(graph.input_params, split_node_c1, split_node_c2);
|
||||||
|
|
||||||
|
auto kernel = ngraph::opset7::Constant::create(ngraph::element::f32, kernel_shape_, {1});
|
||||||
|
|
||||||
|
for (int i = 0; i < split_shape_.size(); ++i) {
|
||||||
|
auto convolution_operation = std::make_shared<ngraph::opset7::Convolution>(split_node->output(i),
|
||||||
|
kernel,
|
||||||
|
ngraph::Strides{1, 1},
|
||||||
|
ngraph::CoordinateDiff{0, 0},
|
||||||
|
ngraph::CoordinateDiff{0, 0},
|
||||||
|
ngraph::Strides{1, 1});
|
||||||
|
graph.output_nodes.push_back(convolution_operation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const ngraph::Shape kernel_shape_;
|
const ngraph::Shape kernel_shape_;
|
||||||
const ngraph::Shape split_shape_;
|
const ngraph::Shape split_shape_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void CreateSplittedConvolution::updateGraph(Graph& graph) {
|
|
||||||
auto split_node_c1 =
|
|
||||||
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{3});
|
|
||||||
auto split_node_c2 =
|
|
||||||
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_shape_.size()}), split_shape_);
|
|
||||||
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(graph.input_params, split_node_c1, split_node_c2);
|
|
||||||
|
|
||||||
auto kernel = ngraph::opset7::Constant::create(ngraph::element::f32, kernel_shape_, {1});
|
|
||||||
|
|
||||||
for (int i = 0; i < split_shape_.size(); ++i) {
|
|
||||||
auto convolution_operation = std::make_shared<ngraph::opset7::Convolution>(split_node->output(i),
|
|
||||||
kernel,
|
|
||||||
ngraph::Strides{1, 1},
|
|
||||||
ngraph::CoordinateDiff{0, 0},
|
|
||||||
ngraph::CoordinateDiff{0, 0},
|
|
||||||
ngraph::Strides{1, 1});
|
|
||||||
graph.output_nodes.push_back(convolution_operation);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class CreateAdd : public CreateAppendableGraphDecorator {
|
class CreateAdd : public CreateAppendableGraphDecorator {
|
||||||
public:
|
public:
|
||||||
CreateAdd(CreateGraphDecoratorPtr prev) : CreateAppendableGraphDecorator(std::move(prev)) {}
|
CreateAdd(CreateGraphDecoratorPtr prev) : CreateAppendableGraphDecorator(std::move(prev)) {}
|
||||||
@ -261,9 +264,10 @@ Graph createSolidGraph(const ngraph::Shape& input_shape, const ngraph::Shape& ke
|
|||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
using TestParams = std::tuple<Graph, Graph, ngraph::pass::Manager>;
|
||||||
|
|
||||||
class SplitConvolutionFixture : public CommonTestUtils::TestsCommon,
|
class SplitConvolutionFixture : public CommonTestUtils::TestsCommon,
|
||||||
public ::testing::WithParamInterface<
|
public ::testing::WithParamInterface<std::tuple<DeviceVersion, TestParams>> {
|
||||||
std::tuple<Graph /* tranformed */, Graph /* reference */, ngraph::pass::Manager>> {
|
|
||||||
public:
|
public:
|
||||||
void SetUp() override;
|
void SetUp() override;
|
||||||
|
|
||||||
@ -274,10 +278,14 @@ public:
|
|||||||
|
|
||||||
void SplitConvolutionFixture::SetUp() {
|
void SplitConvolutionFixture::SetUp() {
|
||||||
// TODO: use auto & [transformed_graph, reference_graph] = this->GetParam() when C++17
|
// TODO: use auto & [transformed_graph, reference_graph] = this->GetParam() when C++17
|
||||||
|
DeviceVersion device_version;
|
||||||
|
TestParams params;
|
||||||
Graph transformed_graph;
|
Graph transformed_graph;
|
||||||
Graph reference_graph;
|
Graph reference_graph;
|
||||||
std::tie(transformed_graph, reference_graph, pass_manager) = this->GetParam();
|
std::tie(device_version, params) = this->GetParam();
|
||||||
|
std::tie(transformed_graph, reference_graph, pass_manager) = params;
|
||||||
|
|
||||||
|
Limitations::init(device_version);
|
||||||
function = transformed_graph.createFunction();
|
function = transformed_graph.createFunction();
|
||||||
reference_function = reference_graph.createFunction();
|
reference_function = reference_graph.createFunction();
|
||||||
}
|
}
|
||||||
@ -305,34 +313,70 @@ TEST_P(SplitConvolutionFixture, CompareFunctions) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
SplitConvolutionTestSuite,
|
SplitConvolution_GNA3_0_3_5_3_6_TestSuite,
|
||||||
SplitConvolutionFixture,
|
SplitConvolutionFixture,
|
||||||
::testing::Values(
|
::testing::Combine(
|
||||||
std::make_tuple(createGraph<CreateConvolution>(),
|
::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5, DeviceVersion::GNA3_6),
|
||||||
createGraph<CreateConcat, CreateSplittedConvolution>(),
|
::testing::Values(
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
|
std::make_tuple(createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
|
createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
createGraph<CreateConcat, CreateAdd, CreateSplittedConvolution>(),
|
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
|
std::make_tuple(createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
|
createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
createGraph<CreateConcat, CreateFakeQuantize, CreateSplittedConvolution>(),
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
|
std::make_tuple(createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
|
createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplittedConvolution>(),
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
|
std::make_tuple(
|
||||||
std::make_tuple(createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
createSolidGraph(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
|
||||||
std::make_tuple(createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
|
||||||
createSolidGraph<CreateAdd>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
ngraph::Shape kernel_shape_3_5 = {1, 64, 1, 1};
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
|
ngraph::Shape split_shape_3_5 = {960, 960, 960, 960, 256};
|
||||||
std::make_tuple(createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
using CreateSplitedConvolution3_5 = CreateSplittedConvolution<kernel_shape_3_5, split_shape_3_5>;
|
||||||
createSolidGraph<CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
std::make_tuple(
|
SplitConvolution_GNA3_0_3_5_TestSuite,
|
||||||
createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
SplitConvolutionFixture,
|
||||||
createSolidGraph<CreateAdd, CreateFakeQuantize>(ngraph::Shape{1, 1, 1, 1}, ngraph::Shape{1, 1, 1, 1}),
|
::testing::Combine(
|
||||||
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>())));
|
::testing::Values(DeviceVersion::GNA3_0, DeviceVersion::GNA3_5),
|
||||||
|
::testing::Values(
|
||||||
|
std::make_tuple(createGraph<CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateSplitedConvolution3_5>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
|
||||||
|
std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateAdd, CreateSplitedConvolution3_5>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
|
||||||
|
std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateFakeQuantize, CreateSplitedConvolution3_5>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
|
||||||
|
std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplitedConvolution3_5>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
|
||||||
|
|
||||||
|
ngraph::Shape kernel_shape_3_6 = {1, 64, 1, 1};
|
||||||
|
ngraph::Shape split_shape_3_6 = {1008, 1008, 1008, 1008, 64};
|
||||||
|
using CreateSplitedConvolution3_6 = CreateSplittedConvolution<kernel_shape_3_6, split_shape_3_6>;
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
SplitConvolution_GNA3_6_TestSuite,
|
||||||
|
SplitConvolutionFixture,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::Values(DeviceVersion::GNA3_6),
|
||||||
|
::testing::Values(
|
||||||
|
std::make_tuple(createGraph<CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateSplitedConvolution3_6>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolution>()),
|
||||||
|
std::make_tuple(createGraph<CreateAdd, CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateAdd, CreateSplitedConvolution3_6>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithBias>()),
|
||||||
|
std::make_tuple(createGraph<CreateFakeQuantize, CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateFakeQuantize, CreateSplitedConvolution3_6>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()),
|
||||||
|
std::make_tuple(createGraph<CreateFakeQuantize, CreateAdd, CreateConvolution>(),
|
||||||
|
createGraph<CreateConcat, CreateFakeQuantize, CreateAdd, CreateSplitedConvolution3_6>(),
|
||||||
|
createPassManager<ov::intel_gna::pass::SplitConvolutionWithFq>()))));
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace testing
|
} // namespace testing
|
||||||
|
@ -11,10 +11,15 @@
|
|||||||
#include <ngraph/pass/manager.hpp>
|
#include <ngraph/pass/manager.hpp>
|
||||||
#include <transformations/init_node_info.hpp>
|
#include <transformations/init_node_info.hpp>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
|
#include "common/gna_target.hpp"
|
||||||
#include "common_test_utils/common_utils.hpp"
|
#include "common_test_utils/common_utils.hpp"
|
||||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
#include "transformations/split_eltwise.hpp"
|
#include "transformations/split_eltwise.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
using namespace ov::intel_gna::target;
|
||||||
|
|
||||||
namespace testing {
|
namespace testing {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -87,21 +92,24 @@ static std::shared_ptr<ngraph::Function> createFunction(const ngraph::Shape& inp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef std::tuple<ngraph::Shape,
|
typedef std::tuple<DeviceVersion, // device version
|
||||||
bool, // with const
|
ngraph::Shape, // input shape
|
||||||
bool, // with fq
|
bool, // with const
|
||||||
ELTWISE_TYPE // eltwise type
|
bool, // with fq
|
||||||
|
ELTWISE_TYPE // eltwise type
|
||||||
>
|
>
|
||||||
EltwiseSplitParams;
|
EltwiseSplitParams;
|
||||||
|
|
||||||
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitParams> obj) {
|
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitParams> obj) {
|
||||||
|
DeviceVersion device_ver;
|
||||||
ngraph::Shape shape;
|
ngraph::Shape shape;
|
||||||
bool with_const;
|
bool with_const;
|
||||||
bool with_fq;
|
bool with_fq;
|
||||||
ELTWISE_TYPE type;
|
ELTWISE_TYPE type;
|
||||||
std::tie(shape, with_const, with_fq, type) = obj.param;
|
std::tie(device_ver, shape, with_const, with_fq, type) = obj.param;
|
||||||
|
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
|
result << DeviceToString(device_ver) << "_";
|
||||||
result << "IS=" << CommonTestUtils::vec2str(shape) << "_";
|
result << "IS=" << CommonTestUtils::vec2str(shape) << "_";
|
||||||
result << "wConst=" << with_const << "_";
|
result << "wConst=" << with_const << "_";
|
||||||
result << "wFQ=" << with_fq << "_";
|
result << "wFQ=" << with_fq << "_";
|
||||||
@ -132,11 +140,13 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
void SplitEltwiseTestSuiteFixture::SetUp() {
|
void SplitEltwiseTestSuiteFixture::SetUp() {
|
||||||
|
DeviceVersion device_ver;
|
||||||
ngraph::Shape shape;
|
ngraph::Shape shape;
|
||||||
bool with_const;
|
bool with_const;
|
||||||
bool with_fq;
|
bool with_fq;
|
||||||
ELTWISE_TYPE type;
|
ELTWISE_TYPE type;
|
||||||
std::tie(shape, with_const, with_fq, type) = this->GetParam();
|
std::tie(device_ver, shape, with_const, with_fq, type) = this->GetParam();
|
||||||
|
Limitations::init(device_ver);
|
||||||
function = createFunction(shape, with_const, with_fq, type, false);
|
function = createFunction(shape, with_const, with_fq, type, false);
|
||||||
reference_function = createFunction(shape, with_const, with_fq, type, true);
|
reference_function = createFunction(shape, with_const, with_fq, type, true);
|
||||||
}
|
}
|
||||||
@ -158,16 +168,19 @@ TEST_P(SplitEltwiseTestSuiteFixture, CompareFunctions) {
|
|||||||
|
|
||||||
const std::vector<ov::Shape> inputShape = {{1, 67000}, {1, 500000}, {1, 936, 513}, {1, 64, 64, 64}, {1, 256, 64, 64}};
|
const std::vector<ov::Shape> inputShape = {{1, 67000}, {1, 500000}, {1, 936, 513}, {1, 64, 64, 64}, {1, 256, 64, 64}};
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(SplitEltwiseTestSuite,
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
SplitEltwiseTestSuiteFixture,
|
SplitEltwiseTestSuite,
|
||||||
::testing::Combine(::testing::ValuesIn(inputShape),
|
SplitEltwiseTestSuiteFixture,
|
||||||
::testing::ValuesIn(std::vector<bool>{true, false}), // with const
|
::testing::Combine(::testing::ValuesIn(std::vector<DeviceVersion>{DeviceVersion::GNA3_0, // device version
|
||||||
::testing::ValuesIn(std::vector<bool>{true, false}), // with fq
|
DeviceVersion::GNA3_5,
|
||||||
::testing::ValuesIn(std::vector<ELTWISE_TYPE>{
|
DeviceVersion::GNA3_6}),
|
||||||
ELTWISE_TYPE::Sum,
|
::testing::ValuesIn(inputShape),
|
||||||
ELTWISE_TYPE::Sub,
|
::testing::ValuesIn(std::vector<bool>{true, false}), // with const
|
||||||
ELTWISE_TYPE::Prod})), // eltwise type
|
::testing::ValuesIn(std::vector<bool>{true, false}), // with fq
|
||||||
getTestCaseName);
|
::testing::ValuesIn(std::vector<ELTWISE_TYPE>{ELTWISE_TYPE::Sum,
|
||||||
|
ELTWISE_TYPE::Sub,
|
||||||
|
ELTWISE_TYPE::Prod})), // eltwise type
|
||||||
|
getTestCaseName);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace testing
|
} // namespace testing
|
||||||
|
Loading…
Reference in New Issue
Block a user