[GNA] Fix large eltwise split in case split dimension is less than alignment (#9933)
* [GNA] Fix large eltwise split in case split dimension is less than alignment * Update src/plugins/intel_gna/optimizer/gna_pass_manager.cpp Co-authored-by: Krzysztof Bruniecki <krzysztof.bruniecki@intel.com> Co-authored-by: Krzysztof Bruniecki <krzysztof.bruniecki@intel.com>
This commit is contained in:
parent
d4779bb351
commit
069b3839ce
@ -23,6 +23,7 @@ constexpr uint32_t convFiltersNumDivider = 4;
|
||||
constexpr uint32_t convFilterSizeDivider = 8;
|
||||
constexpr uint32_t convFilterMaxSize = 768;
|
||||
constexpr uint32_t convEachKernelByteAlignment = 16;
|
||||
constexpr uint32_t inputByteAlignment = 64;
|
||||
constexpr uint32_t noOfInputsDivisor = 8;
|
||||
constexpr uint32_t noOfInputsLowPrecDivisor = 16;
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "backend/gna_limitations.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
// Split, Slice
|
||||
@ -47,7 +48,7 @@ public:
|
||||
};
|
||||
|
||||
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
|
||||
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) {
|
||||
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = GNALimitations::inputByteAlignment) {
|
||||
std::vector<uint32_t> splitSizes;
|
||||
uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment;
|
||||
uint32_t usedSize = 0;
|
||||
|
@ -1451,10 +1451,21 @@ void EltwiseSplitOverChannelsPass::run() {
|
||||
IE_ASSERT(firstValuableDim != std::end(oDims));
|
||||
auto splittedElementsSize = *firstValuableDim;
|
||||
auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
|
||||
auto alignment = GNALimitations::inputByteAlignment;
|
||||
|
||||
// Split output size should be multiple by 64 to avoid align filters insertion,
|
||||
// but we need to check if our input size to split exceeds 64; if not we can always
|
||||
// split if the remaining size is aligned
|
||||
if (splittedElementsSize <= 64) {
|
||||
if ((totalElementsSize / splittedElementsSize) % alignment == 0) {
|
||||
alignment = 1;
|
||||
} else {
|
||||
THROW_GNA_LAYER_EXCEPTION(l) << "splitting didn't succeed\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Split output size should be multiple by 64 to avoid align filters insertion
|
||||
auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
|
||||
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize);
|
||||
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
|
||||
|
||||
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
||||
|
@ -78,7 +78,8 @@ const std::vector<std::map<std::string, std::string>> configs = {
|
||||
const std::vector<std::vector<size_t>> inputShape = {
|
||||
{1, 67000},
|
||||
{1, 500000},
|
||||
{1, 936, 513}
|
||||
{1, 936, 513},
|
||||
{1, 64, 64, 64}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,
|
||||
|
Loading…
Reference in New Issue
Block a user