[GNA] Introduce 16Byte memory alignment for LNL (GNA3.6) (#16363)

* [GNA] Introduce 16Byte memory alignment for LNL (GNA3.6)

* update after review
This commit is contained in:
Tomasz Adamowicz 2023-03-27 11:42:34 +02:00 committed by GitHub
parent 5e835e327b
commit 4936d4bb1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 190 additions and 9 deletions

View File

@ -41,6 +41,20 @@ const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_typ
ov::element::i16,
ov::element::f32};
size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
{target::DeviceVersion::GNA1_0, 64},
{target::DeviceVersion::GNA2_0, 64},
{target::DeviceVersion::GNA3_0, 64},
{target::DeviceVersion::GNA3_1, 64},
{target::DeviceVersion::GNA3_5, 64},
{target::DeviceVersion::GNAEmbedded3_5, 64},
{target::DeviceVersion::GNA3_6, 16},
{target::DeviceVersion::GNA4_0, 16}};
return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
}
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_parameter_types.count(elem_type) == 0) {
if (is_exception_allowed) {

View File

@ -11,6 +11,7 @@
#include <ie_algorithm.hpp>
#include "common/gna_target.hpp"
#include "common/misc_utils.hpp"
#include "dnn_types.hpp"
#include "gna_lib_ver_selector.hpp"
#include "legacy/ngraph_ops/convolution_ie.hpp"
@ -55,7 +56,7 @@ constexpr uint32_t bytesPerSplitElement = 2;
// In fp32 mode this is not necessary but is useful for testing
constexpr uint32_t bytesPerCropElement = 2;
constexpr uint32_t kMemoryAlignmentBytes = 64;
constexpr uint32_t kMemoryPageSize = 4096;
inline bool isCropAffinedOffset(size_t numberOfElements) {
const auto cropOffset = numberOfElements * bytesPerCropElement;
@ -78,6 +79,8 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
}
size_t getMemoryAlignmentBytes(target::DeviceVersion target);
class SupportedElementTypes {
public:
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);

View File

@ -38,7 +38,8 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
: target(targetIn),
nGnaDeviceIndex{selectGnaDevice()},
useDeviceEmbeddedExport(deviceEmbedded),
isPerformanceMeasuring(isPerformanceMeasuring) {
isPerformanceMeasuring(isPerformanceMeasuring),
m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
open();
@ -48,8 +49,6 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
GetGnaLibraryVersion();
maxLayersCount_ = retrieveMaxLayersCount();
m_mem_alignment = limitations::kMemoryAlignmentBytes;
}
GNADeviceHelper::~GNADeviceHelper() {

View File

@ -47,7 +47,6 @@ class GNADeviceHelper : public GNADevice {
uint32_t nGnaDeviceIndex = 0;
bool useDeviceEmbeddedExport = false;
uint32_t maxLayersCount_ = 0;
size_t m_mem_alignment = 0;
static const uint32_t TotalGna2InstrumentationPoints = 2;
Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
@ -68,6 +67,7 @@ class GNADeviceHelper : public GNADevice {
uint64_t debugLogIndexRequestWait = 0;
static constexpr const char* kDumpExt = ".bin";
static constexpr const char* kDumpDelimiter = ".";
const size_t m_mem_alignment;
public:
explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
@ -128,7 +128,7 @@ public:
return allAllocations;
}
const size_t getMemAlignment() const {
size_t getMemAlignment() const {
return m_mem_alignment;
}

View File

@ -375,9 +375,10 @@ void GNAPlugin::InitGNADevice() {
gnadevice = std::make_shared<GNADeviceHelper>(config.target,
gnaFlags->performance_counting,
!config.embedded_export_path.empty());
size_t page_size_bytes = 4096;
size_t mem_alignment = gnadevice->getMemAlignment();
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), mem_alignment, page_size_bytes);
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
gnadevice->getMemAlignment(),
limitations::kMemoryPageSize);
}
graphCompiler.setGNAMemoryPtr(gnamem);
}

View File

@ -0,0 +1,164 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include "any_copy.hpp"
#include "backend/gna_limitations.hpp"
#include "common/gna_target.hpp"
#include "gna_data_types.hpp"
#include "gna_plugin.hpp"
#include "memory/gna_memory.hpp"
#include "ngraph_functions/builders.hpp"
using namespace InferenceEngine;
using namespace ov::intel_gna::target;
using namespace ov::intel_gna::limitations;
namespace testing {
using MemAlignmentTestParams =
std::tuple<ExecutionMode, // execution mode
HWGeneration, // compile target
std::pair<ngraph::Shape, size_t> // input shape vs expected memory size of the input region in bytes.
// For this specific model and when the value of input_shape_H = 1,
// the memory input region size can be calculated using below formula:
// mem_input_region_size = ALIGN8(input_shape_W)*inputPrecInBytes.
// Refer to GNAGraphCompiler::AffinePrimitive for more details.
>;
const std::vector<std::pair<ngraph::Shape, size_t>> param_16B_alignment_prec_fp32{{{1, 2}, 32},
{{1, 8}, 32},
{{1, 9}, 64}};
const std::vector<std::pair<ngraph::Shape, size_t>> param_64B_alignment_prec_int16{{{1, 2}, 64},
{{1, 32}, 64},
{{1, 33}, 128}};
const std::vector<std::pair<ngraph::Shape, size_t>> param_16B_alignment_prec_int16{{{1, 2}, 16},
{{1, 8}, 16},
{{1, 9}, 32},
{{1, 33}, 80}};
class GNAPluginForMemoryAlignmentTest : public GNAPlugin {
public:
GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
if (gnadevice) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
gnadevice->getMemAlignment(),
limitations::kMemoryPageSize));
graphCompiler.setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
}
const size_t get_memory_REGION_INPUTS_size() const {
return this->gnamem->getQueue(ov::intel_gna::memory::REGION_INPUTS)->calcSize();
}
};
class GNAPluginLoadNetworkTests : public ::testing::TestWithParam<MemAlignmentTestParams> {
public:
static std::string GetTestCaseName(const testing::TestParamInfo<MemAlignmentTestParams>& obj) {
ExecutionMode exe_mode;
HWGeneration hw_gen;
std::pair<ngraph::Shape, size_t> inp_shape_vs_mem;
tie(exe_mode, hw_gen, inp_shape_vs_mem) = obj.param;
std::ostringstream result;
result << "inp=" << inp_shape_vs_mem.first.to_string() << "_";
result << "mem_region_size=" << inp_shape_vs_mem.second;
return result.str();
}
protected:
void Run() {
ExecutionMode exe_mode;
HWGeneration hw_gen;
std::pair<ngraph::Shape, size_t> inp_shape_vs_mem;
tie(exe_mode, hw_gen, inp_shape_vs_mem) = this->GetParam();
ngraph::Shape inp_shape = inp_shape_vs_mem.first;
size_t mem_region_size = inp_shape_vs_mem.second;
const ov::AnyMap gna_config = {ov::intel_gna::execution_mode(exe_mode), ov::intel_gna::compile_target(hw_gen)};
auto plugin = GNAPluginForMemoryAlignmentTest(any_copy(gna_config));
auto function = getMulFunction(inp_shape);
CNNNetwork cnnNetwork(function);
plugin.LoadNetwork(cnnNetwork);
EXPECT_EQ(plugin.get_memory_REGION_INPUTS_size(), mem_region_size);
}
void SetUp() override {
test_params = GetParam();
}
private:
std::shared_ptr<ov::Model> getMulFunction(const ngraph::Shape input_shape) {
const ngraph::element::Type net_precision = ngraph::element::f32;
auto input = std::make_shared<ngraph::opset8::Parameter>(net_precision, input_shape);
auto multiplier = std::make_shared<ngraph::opset8::Constant>(net_precision, input_shape);
auto matmul = std::make_shared<ngraph::opset8::MatMul>(input, multiplier, false, true);
auto result = std::make_shared<ngraph::opset8::Result>(matmul);
auto function = std::make_shared<ov::Model>(ov::ResultVector({result}), ov::ParameterVector({input}), "MatMul");
return function;
}
MemAlignmentTestParams test_params;
};
TEST_P(GNAPluginLoadNetworkTests, CompareInpShapeVsReservedMemRegion) {
Run();
}
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_FP32,
GNAPluginLoadNetworkTests,
::testing::Combine(::testing::Values(ExecutionMode::SW_FP32),
::testing::Values(HWGeneration::UNDEFINED),
::testing::ValuesIn(param_16B_alignment_prec_fp32)),
GNAPluginLoadNetworkTests::GetTestCaseName);
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_0,
GNAPluginLoadNetworkTests,
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
::testing::Values(HWGeneration::GNA_3_0),
::testing::ValuesIn(param_64B_alignment_prec_int16)),
GNAPluginLoadNetworkTests::GetTestCaseName);
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_5,
GNAPluginLoadNetworkTests,
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
::testing::Values(HWGeneration::GNA_3_5),
::testing::ValuesIn(param_64B_alignment_prec_int16)),
GNAPluginLoadNetworkTests::GetTestCaseName);
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_6,
GNAPluginLoadNetworkTests,
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
::testing::Values(HWGeneration::GNA_3_6),
::testing::ValuesIn(param_16B_alignment_prec_int16)),
GNAPluginLoadNetworkTests::GetTestCaseName);
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
GNAPluginLoadNetworkTests,
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
::testing::Values(HWGeneration::GNA_4_0),
::testing::ValuesIn(param_16B_alignment_prec_int16)),
GNAPluginLoadNetworkTests::GetTestCaseName);
class MemoryAlignmentTest : public ::testing::Test {};
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) {
EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet));
}
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
}
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16);
}
} // namespace testing