[GNA] Introduce 16Byte memory alignment for LNL (GNA3.6) (#16363)
* [GNA] Introduce 16Byte memory alignment for LNL (GNA3.6) * update after review
This commit is contained in:
parent
5e835e327b
commit
4936d4bb1d
@ -41,6 +41,20 @@ const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_typ
|
||||
ov::element::i16,
|
||||
ov::element::f32};
|
||||
|
||||
size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
|
||||
static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
|
||||
{target::DeviceVersion::GNA1_0, 64},
|
||||
{target::DeviceVersion::GNA2_0, 64},
|
||||
{target::DeviceVersion::GNA3_0, 64},
|
||||
{target::DeviceVersion::GNA3_1, 64},
|
||||
{target::DeviceVersion::GNA3_5, 64},
|
||||
{target::DeviceVersion::GNAEmbedded3_5, 64},
|
||||
{target::DeviceVersion::GNA3_6, 16},
|
||||
{target::DeviceVersion::GNA4_0, 16}};
|
||||
|
||||
return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
|
||||
}
|
||||
|
||||
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
|
||||
if (supported_parameter_types.count(elem_type) == 0) {
|
||||
if (is_exception_allowed) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <ie_algorithm.hpp>
|
||||
|
||||
#include "common/gna_target.hpp"
|
||||
#include "common/misc_utils.hpp"
|
||||
#include "dnn_types.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "legacy/ngraph_ops/convolution_ie.hpp"
|
||||
@ -55,7 +56,7 @@ constexpr uint32_t bytesPerSplitElement = 2;
|
||||
// In fp32 mode this is not necessary but is useful for testing
|
||||
constexpr uint32_t bytesPerCropElement = 2;
|
||||
|
||||
constexpr uint32_t kMemoryAlignmentBytes = 64;
|
||||
constexpr uint32_t kMemoryPageSize = 4096;
|
||||
|
||||
inline bool isCropAffinedOffset(size_t numberOfElements) {
|
||||
const auto cropOffset = numberOfElements * bytesPerCropElement;
|
||||
@ -78,6 +79,8 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
|
||||
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
|
||||
}
|
||||
|
||||
size_t getMemoryAlignmentBytes(target::DeviceVersion target);
|
||||
|
||||
class SupportedElementTypes {
|
||||
public:
|
||||
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
|
||||
|
@ -38,7 +38,8 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
|
||||
: target(targetIn),
|
||||
nGnaDeviceIndex{selectGnaDevice()},
|
||||
useDeviceEmbeddedExport(deviceEmbedded),
|
||||
isPerformanceMeasuring(isPerformanceMeasuring) {
|
||||
isPerformanceMeasuring(isPerformanceMeasuring),
|
||||
m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
|
||||
per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
|
||||
per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
|
||||
open();
|
||||
@ -48,8 +49,6 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
|
||||
GetGnaLibraryVersion();
|
||||
|
||||
maxLayersCount_ = retrieveMaxLayersCount();
|
||||
|
||||
m_mem_alignment = limitations::kMemoryAlignmentBytes;
|
||||
}
|
||||
|
||||
GNADeviceHelper::~GNADeviceHelper() {
|
||||
|
@ -47,7 +47,6 @@ class GNADeviceHelper : public GNADevice {
|
||||
uint32_t nGnaDeviceIndex = 0;
|
||||
bool useDeviceEmbeddedExport = false;
|
||||
uint32_t maxLayersCount_ = 0;
|
||||
size_t m_mem_alignment = 0;
|
||||
|
||||
static const uint32_t TotalGna2InstrumentationPoints = 2;
|
||||
Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
|
||||
@ -68,6 +67,7 @@ class GNADeviceHelper : public GNADevice {
|
||||
uint64_t debugLogIndexRequestWait = 0;
|
||||
static constexpr const char* kDumpExt = ".bin";
|
||||
static constexpr const char* kDumpDelimiter = ".";
|
||||
const size_t m_mem_alignment;
|
||||
|
||||
public:
|
||||
explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
|
||||
@ -128,7 +128,7 @@ public:
|
||||
return allAllocations;
|
||||
}
|
||||
|
||||
const size_t getMemAlignment() const {
|
||||
size_t getMemAlignment() const {
|
||||
return m_mem_alignment;
|
||||
}
|
||||
|
||||
|
@ -375,9 +375,10 @@ void GNAPlugin::InitGNADevice() {
|
||||
gnadevice = std::make_shared<GNADeviceHelper>(config.target,
|
||||
gnaFlags->performance_counting,
|
||||
!config.embedded_export_path.empty());
|
||||
size_t page_size_bytes = 4096;
|
||||
size_t mem_alignment = gnadevice->getMemAlignment();
|
||||
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), mem_alignment, page_size_bytes);
|
||||
|
||||
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
|
||||
gnadevice->getMemAlignment(),
|
||||
limitations::kMemoryPageSize);
|
||||
}
|
||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
||||
}
|
||||
|
164
src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp
Normal file
164
src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp
Normal file
@ -0,0 +1,164 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "any_copy.hpp"
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "common/gna_target.hpp"
|
||||
#include "gna_data_types.hpp"
|
||||
#include "gna_plugin.hpp"
|
||||
#include "memory/gna_memory.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace ov::intel_gna::target;
|
||||
using namespace ov::intel_gna::limitations;
|
||||
namespace testing {
|
||||
|
||||
using MemAlignmentTestParams =
|
||||
std::tuple<ExecutionMode, // execution mode
|
||||
HWGeneration, // compile target
|
||||
std::pair<ngraph::Shape, size_t> // input shape vs expected memory size of the input region in bytes.
|
||||
// For this specific model and when the value of input_shape_H = 1,
|
||||
// the memory input region size can be calculated using below formula:
|
||||
// mem_input_region_size = ALIGN8(input_shape_W)*inputPrecInBytes.
|
||||
// Refer to GNAGraphCompiler::AffinePrimitive for more details.
|
||||
>;
|
||||
|
||||
const std::vector<std::pair<ngraph::Shape, size_t>> param_16B_alignment_prec_fp32{{{1, 2}, 32},
|
||||
{{1, 8}, 32},
|
||||
{{1, 9}, 64}};
|
||||
|
||||
const std::vector<std::pair<ngraph::Shape, size_t>> param_64B_alignment_prec_int16{{{1, 2}, 64},
|
||||
{{1, 32}, 64},
|
||||
{{1, 33}, 128}};
|
||||
|
||||
const std::vector<std::pair<ngraph::Shape, size_t>> param_16B_alignment_prec_int16{{{1, 2}, 16},
|
||||
{{1, 8}, 16},
|
||||
{{1, 9}, 32},
|
||||
{{1, 33}, 80}};
|
||||
|
||||
class GNAPluginForMemoryAlignmentTest : public GNAPlugin {
|
||||
public:
|
||||
GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
||||
if (gnadevice) {
|
||||
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
|
||||
gnadevice->getMemAlignment(),
|
||||
limitations::kMemoryPageSize));
|
||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
||||
gnadevice.reset();
|
||||
}
|
||||
}
|
||||
|
||||
const size_t get_memory_REGION_INPUTS_size() const {
|
||||
return this->gnamem->getQueue(ov::intel_gna::memory::REGION_INPUTS)->calcSize();
|
||||
}
|
||||
};
|
||||
|
||||
class GNAPluginLoadNetworkTests : public ::testing::TestWithParam<MemAlignmentTestParams> {
|
||||
public:
|
||||
static std::string GetTestCaseName(const testing::TestParamInfo<MemAlignmentTestParams>& obj) {
|
||||
ExecutionMode exe_mode;
|
||||
HWGeneration hw_gen;
|
||||
std::pair<ngraph::Shape, size_t> inp_shape_vs_mem;
|
||||
tie(exe_mode, hw_gen, inp_shape_vs_mem) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "inp=" << inp_shape_vs_mem.first.to_string() << "_";
|
||||
result << "mem_region_size=" << inp_shape_vs_mem.second;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void Run() {
|
||||
ExecutionMode exe_mode;
|
||||
HWGeneration hw_gen;
|
||||
std::pair<ngraph::Shape, size_t> inp_shape_vs_mem;
|
||||
tie(exe_mode, hw_gen, inp_shape_vs_mem) = this->GetParam();
|
||||
ngraph::Shape inp_shape = inp_shape_vs_mem.first;
|
||||
size_t mem_region_size = inp_shape_vs_mem.second;
|
||||
|
||||
const ov::AnyMap gna_config = {ov::intel_gna::execution_mode(exe_mode), ov::intel_gna::compile_target(hw_gen)};
|
||||
|
||||
auto plugin = GNAPluginForMemoryAlignmentTest(any_copy(gna_config));
|
||||
auto function = getMulFunction(inp_shape);
|
||||
CNNNetwork cnnNetwork(function);
|
||||
plugin.LoadNetwork(cnnNetwork);
|
||||
EXPECT_EQ(plugin.get_memory_REGION_INPUTS_size(), mem_region_size);
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
test_params = GetParam();
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<ov::Model> getMulFunction(const ngraph::Shape input_shape) {
|
||||
const ngraph::element::Type net_precision = ngraph::element::f32;
|
||||
|
||||
auto input = std::make_shared<ngraph::opset8::Parameter>(net_precision, input_shape);
|
||||
auto multiplier = std::make_shared<ngraph::opset8::Constant>(net_precision, input_shape);
|
||||
auto matmul = std::make_shared<ngraph::opset8::MatMul>(input, multiplier, false, true);
|
||||
auto result = std::make_shared<ngraph::opset8::Result>(matmul);
|
||||
auto function = std::make_shared<ov::Model>(ov::ResultVector({result}), ov::ParameterVector({input}), "MatMul");
|
||||
return function;
|
||||
}
|
||||
|
||||
MemAlignmentTestParams test_params;
|
||||
};
|
||||
|
||||
TEST_P(GNAPluginLoadNetworkTests, CompareInpShapeVsReservedMemRegion) {
|
||||
Run();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_FP32,
|
||||
GNAPluginLoadNetworkTests,
|
||||
::testing::Combine(::testing::Values(ExecutionMode::SW_FP32),
|
||||
::testing::Values(HWGeneration::UNDEFINED),
|
||||
::testing::ValuesIn(param_16B_alignment_prec_fp32)),
|
||||
GNAPluginLoadNetworkTests::GetTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_0,
|
||||
GNAPluginLoadNetworkTests,
|
||||
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
|
||||
::testing::Values(HWGeneration::GNA_3_0),
|
||||
::testing::ValuesIn(param_64B_alignment_prec_int16)),
|
||||
GNAPluginLoadNetworkTests::GetTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_5,
|
||||
GNAPluginLoadNetworkTests,
|
||||
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
|
||||
::testing::Values(HWGeneration::GNA_3_5),
|
||||
::testing::ValuesIn(param_64B_alignment_prec_int16)),
|
||||
GNAPluginLoadNetworkTests::GetTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_3_6,
|
||||
GNAPluginLoadNetworkTests,
|
||||
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
|
||||
::testing::Values(HWGeneration::GNA_3_6),
|
||||
::testing::ValuesIn(param_16B_alignment_prec_int16)),
|
||||
GNAPluginLoadNetworkTests::GetTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
|
||||
GNAPluginLoadNetworkTests,
|
||||
::testing::Combine(::testing::Values(ExecutionMode::SW_EXACT),
|
||||
::testing::Values(HWGeneration::GNA_4_0),
|
||||
::testing::ValuesIn(param_16B_alignment_prec_int16)),
|
||||
GNAPluginLoadNetworkTests::GetTestCaseName);
|
||||
|
||||
class MemoryAlignmentTest : public ::testing::Test {};
|
||||
|
||||
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) {
|
||||
EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet));
|
||||
}
|
||||
|
||||
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
|
||||
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
|
||||
}
|
||||
|
||||
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
|
||||
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16);
|
||||
}
|
||||
|
||||
} // namespace testing
|
Loading…
Reference in New Issue
Block a user