[GNA] Refactor memory alignment macro usage (#15749)

* Add possibility to use memory alignment different than 64B

* update tests for new memory api

* Remove ineffective code

* [FIX] Fix memory alignment issues for graph compiler primitives

* Update after review
This commit is contained in:
Tomasz Adamowicz 2023-03-13 16:22:58 +01:00 committed by GitHub
parent 625890c666
commit 5311ab0938
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 185 additions and 190 deletions

View File

@ -55,6 +55,8 @@ constexpr uint32_t bytesPerSplitElement = 2;
// In fp32 mode this is not necessary but is useful for testing
constexpr uint32_t bytesPerCropElement = 2;
constexpr uint32_t kMemoryAlignmentBytes = 64;
inline bool isCropAffinedOffset(size_t numberOfElements) {
const auto cropOffset = numberOfElements * bytesPerCropElement;
return (ALIGN64(cropOffset) != cropOffset);

View File

@ -48,6 +48,8 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
GetGnaLibraryVersion();
maxLayersCount_ = retrieveMaxLayersCount();
m_mem_alignment = limitations::kMemoryAlignmentBytes;
}
GNADeviceHelper::~GNADeviceHelper() {
@ -583,6 +585,5 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
return kMaxLayersCountGNA3_X;
}
}
} // namespace intel_gna
} // namespace ov

View File

@ -47,6 +47,7 @@ class GNADeviceHelper : public GNADevice {
uint32_t nGnaDeviceIndex = 0;
bool useDeviceEmbeddedExport = false;
uint32_t maxLayersCount_ = 0;
size_t m_mem_alignment = 0;
static const uint32_t TotalGna2InstrumentationPoints = 2;
Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
@ -127,6 +128,10 @@ public:
return allAllocations;
}
const size_t getMemAlignment() const {
return m_mem_alignment;
}
/**
* @see GNADevice::createModel()
*/

View File

@ -619,8 +619,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
ptr_weights,
transposedWeights.data(),
convolution._weights->byteSize(),
64);
convolution._weights->byteSize());
} else {
auto paddedWeights = num_filter_coefficients * num_filters;
auto paddedWeightsSize = paddedWeights * convolution.precision.size();
@ -651,17 +650,16 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
}
};
gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, initializer, 64);
gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, initializer);
}
if (convolution._biases) {
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_biases,
convolution._biases->cbuffer().as<const void*>(),
convolution._biases->byteSize(),
64);
convolution._biases->byteSize());
} else {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels);
}
}
@ -803,20 +801,15 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
transposedWeights.resize(transposedWeights.size() + kernelPad);
}
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
ptr_weights,
transposedWeights.data(),
transposedWeights.size(),
64);
gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights, transposedWeights.data(), transposedWeights.size());
if (convolution._biases) {
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_biases,
convolution._biases->cbuffer().as<const void*>(),
convolution._biases->byteSize(),
64);
convolution._biases->byteSize());
} else {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels);
}
}
@ -835,13 +828,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in;
uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) *
outputs->getPrecision().size();
size_t num_data_bytes_in = InferenceEngine::details::product(begin(input->getDims()), end(input->getDims())) *
input->getPrecision().size();
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
if (power.power == 1.0f) {
void* ptr_inputs = nullptr;
@ -875,8 +866,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
if (gna_config.gnaFlags.sw_fp32) {
IE_ASSERT(quantized == nullptr);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, power.scale, num_rows_out + num_padding);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, power.offset, num_rows_out + num_padding);
} else {
IE_ASSERT(quantized != nullptr);
if (!gna_config.gnaFlags.input_low_precision) {
@ -884,15 +875,27 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast<float>(INT16_MAX)));
auto quantizedOffset = FloatToInt32(
std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast<float>(INT32_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer,
ptr_weights,
quantizedScale,
num_rows_out + num_padding);
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer,
ptr_biases,
quantizedOffset,
num_rows_out + num_padding);
} else {
auto quantizedScale = FloatToInt8(
std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast<float>(INT8_MAX)));
auto quantizedOffset = FloatToInt8(
std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast<float>(INT8_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
ptr_weights,
quantizedScale,
num_rows_out + num_padding);
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
ptr_biases,
quantizedOffset,
num_rows_out + num_padding);
}
}
} else {
@ -954,8 +957,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
ptr_pwl_segments_target,
&ptr_pwl_segments.front(),
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
64);
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t));
}
}
}
@ -1284,9 +1286,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
FillWeightOfAligningFilter(layer, ptr_weights, crop_params.start_offset, (quantized == nullptr) ? false : true);
(quantized == nullptr)
? gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64)
: gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
(quantized == nullptr) ? gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out)
: gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out);
}
}
@ -1383,6 +1384,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_rows_in = in_4b_total_size;
uint32_t num_columns_in = 1;
uint32_t num_rows_out = num_rows_in;
uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
void* ptr_inputs = nullptr;
@ -1408,9 +1410,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
true);
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) *
outputs->getPrecision().size();
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->getPrecision().size();
connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1419,7 +1419,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
switch (eltwise._operation) {
case EltwiseLayer::Sub:
if (quantized == nullptr) {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, -1.0f, num_rows_out + num_padding);
} else {
auto scaledIdentity = -quantized->_weights_quant.GetScale();
@ -1428,23 +1428,21 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer,
ptr_weights,
quantizedIdentity,
num_rows_out,
64);
num_rows_out + num_padding);
} else {
auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
ptr_weights,
quantizedIdentity,
num_rows_out,
64);
num_rows_out + num_padding);
}
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
break;
case EltwiseLayer::Sum:
if (quantized == nullptr) {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, 1.0f, num_rows_out + num_padding);
} else {
auto scaledIdentity = quantized->_weights_quant.GetScale();
@ -1454,16 +1452,14 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer,
ptr_weights,
quantizedIdentity,
num_rows_out,
64);
num_rows_out + num_padding);
} else {
auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
ptr_weights,
quantizedIdentity,
num_rows_out,
64);
num_rows_out + num_padding);
}
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
@ -1471,12 +1467,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
case EltwiseLayer::Prod:
if (quantized == nullptr) {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding);
} else {
if (gna_config.gnaFlags.input_low_precision == false) {
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out + num_padding);
} else {
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, 0, num_rows_out + num_padding);
}
}
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
@ -1496,7 +1492,8 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input_1 = layer->insData[0].lock();
auto input_2 = layer->insData[1].lock(); // the second input corresponds to ptr_weights in component
auto outputs = *layer->outData.begin();
auto inputPrecision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
auto in_dims = input_1->getDims();
@ -1505,7 +1502,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_columns_in = batch_size;
const auto out_dims = outputs->getDims();
const auto out_dims_size = ngraph::shape_size(out_dims);
const auto bytes_per_output = outputs->getPrecision().size();
uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
@ -1521,9 +1517,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
num_rows_in + num_padding,
num_columns_in,
num_rows_out,
inputPrecision.size(),
bytes_per_output,
quantized == nullptr ? input_2->getPrecision().size() : 2,
input1_precision.size(),
outputs->getPrecision().size(),
input2_precision.size(),
quantized == nullptr ? input_2->getPrecision().size() : 4,
GetScaleFactor(layer, QuantizedDataType::weights),
GetScaleFactor(layer, QuantizedDataType::output),
@ -1533,21 +1529,18 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_biases,
false);
const auto num_data_bytes_out = out_dims_size * bytes_per_output;
size_t num_data_bytes_in_1 = InferenceEngine::details::product(begin(input_1->getDims()), end(input_1->getDims())) *
input_1->getPrecision().size();
size_t num_data_bytes_in_2 = InferenceEngine::details::product(begin(input_2->getDims()), end(input_2->getDims())) *
input_2->getPrecision().size();
size_t num_data_bytes_out = out_dims_size * outputs->getPrecision().size();
size_t num_data_bytes_in_1 = (num_rows_in + num_padding) * num_columns_in * input1_precision.size();
size_t num_data_bytes_in_2 = (num_rows_in + num_padding) * num_columns_in * num_rows_out * input2_precision.size();
connectOutput(layer, ptr_outputs, num_data_bytes_out);
connectInput(layer, ptr_input_1, num_data_bytes_in_1);
connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
if (gna_config.gnaFlags.sw_fp32) {
IE_ASSERT(quantized == nullptr);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out);
} else {
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out);
}
}
@ -1580,6 +1573,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
uint32_t num_columns_in = batch_size;
uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
uint32_t num_padding_out = isDiag ? num_padding : 0;
@ -1624,7 +1618,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
ptr_biases,
isDiag);
size_t num_data_bytes_out = num_columns_in * (num_rows_out + num_padding_out) * outputs->getPrecision().size();
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding_out) * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->getPrecision().size();
@ -1667,8 +1661,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_weights,
weightable._weights->cbuffer().as<const void*>(),
weightable._weights->byteSize(),
64);
weightable._weights->byteSize());
} else {
gnamem->getQueue(REGION_RO)->push_initializer(
layer,
@ -1695,8 +1688,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
}
}
}
},
64);
});
}
} else {
if (transpose) {
@ -1715,22 +1707,20 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
ie_memcpy(data, size, weightsBuffer + num_rows_in * i * wpSize, num_rows_in * wpSize);
data = reinterpret_cast<uint8_t*>(data) + (num_rows_in + num_padding) * wpSize;
}
},
64);
});
}
if (weightable._biases) {
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_biases,
weightable._biases->cbuffer().as<const void*>(),
weightable._biases->byteSize(),
64);
weightable._biases->byteSize());
} else {
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
if (useBiasConnection) {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding);
} else {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out);
}
}
}
@ -1769,8 +1759,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
}
++out;
}
},
64);
});
}
void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
@ -1880,11 +1869,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
gnamem->getQueue(REGION_RO)->push_initializer(
layer,
ptr_weights,
paddedWeightsSize,
[=](void* data, size_t size) {
gnamem->getQueue(REGION_RO)
->push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
size_t roffset = weights_offset;
size_t woffset = 0;
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
@ -1895,18 +1881,16 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
roffset += weights_stride;
woffset += elementsIn * weightsElementSize;
}
},
64);
});
}
if (filterLayer->_biases) {
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_biases,
filterLayer->_biases->cbuffer().as<const void*>(),
filterLayer->_biases->byteSize(),
64);
filterLayer->_biases->byteSize());
} else {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out);
}
}
@ -1986,17 +1970,15 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_weights,
filterLayer->_weights->cbuffer().as<const void*>(),
filterLayer->_weights->byteSize(),
64);
filterLayer->_weights->byteSize());
if (filterLayer->_biases) {
gnamem->getQueue(REGION_RO)->push_ptr(layer,
ptr_biases,
filterLayer->_biases->cbuffer().as<const void*>(),
filterLayer->_biases->byteSize(),
64);
filterLayer->_biases->byteSize());
} else {
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, numberOfFilters);
}
}
@ -2232,8 +2214,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
ptr_pwl_segments_target,
&ptr_pwl_segments.front(),
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
64);
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t));
}
}
@ -2409,9 +2390,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
auto& nextMemoryLayer = nextMemoryLayerIt->second;
// memory layer not yet initialized
if (nextMemoryLayer.reserved_size == 0) {
nextMemoryLayer.reserved_size = ALIGN64(nextMemoryLayer.getByteSize());
nextMemoryLayer.reserved_size = ALIGN(nextMemoryLayer.getByteSize(), gnamem->getDataMemAlignment());
gnamem->getQueue(REGION_STATES)
->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size, 64);
->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size);
gnamem->getQueue(REGION_AUTO)
->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
} else {
@ -2422,7 +2403,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
ptr,
&nextMemoryLayer.gna_ptr,
getOffsetForBinding(layer),
ALIGN64(num_data_bytes_out));
num_data_bytes_out);
}
return;
}
@ -2515,8 +2496,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
});
if (included == concat_connection.end()) {
auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2);
gnamem->getQueue(REGION_SCRATCH)
->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64);
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, outputSize);
std::function<void(GNAConcatLayer, GnaInputs&, ConcatConnection&)> allocate_input_recursively =
[&allocate_input_recursively](GNAConcatLayer clayer,
@ -2576,7 +2556,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
if (LayerInfo(layer).isConst()) {
mem_region = REGION_RO;
}
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64);
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, num_data_bytes_out);
}
ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
@ -2616,18 +2596,18 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// real allocation pointer will be kept in ptr not in ptr_inputs_global
if (!connectTo) {
gnamem->getQueue(REGION_INPUTS)->push_value(layer, ptr, static_cast<uint8_t>(0), num_data_bytes_in, 64);
gnamem->getQueue(REGION_INPUTS)->push_value(layer, ptr, static_cast<uint8_t>(0), num_data_bytes_in);
} else {
gnamem->getQueue(REGION_INPUTS)
->push_value(layer,
&inputs_ptr_->at(prevLayer->name).ptrs.front(),
static_cast<uint8_t>(0),
num_data_bytes_in,
64);
num_data_bytes_in);
}
inputs_ptr_->at(prevLayer->name).allocated_size = num_data_bytes_in;
}
if (ALIGN(num_data_bytes_in, 64) > ALIGN(inputs_ptr_->at(prevLayer->name).get_allocated_size(), 64)) {
if (ALIGN(num_data_bytes_in, gnamem->getDataMemAlignment()) >
ALIGN(inputs_ptr_->at(prevLayer->name).get_allocated_size(), gnamem->getDataMemAlignment())) {
THROW_GNA_EXCEPTION << "Layer: " << layer->name << " Cannot bind pointer to already allocated input("
<< prevLayer->name
<< "), due to size_allocated=" << inputs_ptr_->at(prevLayer->name).get_allocated_size()
@ -2636,18 +2616,10 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
if (connectTo) {
gnamem->getQueue(REGION_AUTO)
->bind_ptr(layer,
ptr,
&inputs_ptr_->at(prevLayer->name).ptrs.front(),
offset,
ALIGN(num_data_bytes_in, 64));
->bind_ptr(layer, ptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), offset, num_data_bytes_in);
} else {
gnamem->getQueue(REGION_AUTO)
->bind_ptr(layer,
&inputs_ptr_->at(prevLayer->name).ptrs.front(),
ptr,
offset,
ALIGN(num_data_bytes_in, 64));
->bind_ptr(layer, &inputs_ptr_->at(prevLayer->name).ptrs.front(), ptr, offset, num_data_bytes_in);
}
return prevLayer;
@ -2655,9 +2627,11 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// const input
if (LayerInfo(prevLayer).isConst()) {
if (connectTo) {
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
gnamem->getQueue(REGION_AUTO)
->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset, num_data_bytes_in);
} else {
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
gnamem->getQueue(REGION_AUTO)
->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset, num_data_bytes_in);
}
return prevLayer;
@ -2702,7 +2676,7 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
auto& concatLayerInfoItem = concatLayerInfo->second;
// dnnLayer that is input for concat layer
gnamem->getQueue(REGION_AUTO)
->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset, num_data_bytes_in);
->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset, num_data_bytes_in, false);
// return layer over concat
return CNNNetPrevLayer(prevLayer);
}
@ -2718,7 +2692,8 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// check for generic prev layer
if (prevDnnLayer != nullptr) {
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in);
gnamem->getQueue(REGION_AUTO)
->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in, false);
return prevLayer;
}
@ -2731,26 +2706,25 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// TODO: this is duplicate with connect output
auto& memoryLayer = prevMemoryLayer->second;
if (memoryLayer.reserved_size == 0) {
memoryLayer.reserved_size = ALIGN64(memoryLayer.getByteSize());
memoryLayer.reserved_size = ALIGN(memoryLayer.getByteSize(), gnamem->getDataMemAlignment());
// connectTo used for indicate that memory layer should be bound to given buffer
if (connectTo) {
memoryLayer.reserved_size = ALIGN64(std::max(memoryLayer.reserved_size, num_data_bytes_in));
gnamem->getQueue(REGION_STATES)
->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size, 64);
memoryLayer.reserved_size =
ALIGN(std::max(memoryLayer.reserved_size, num_data_bytes_in), gnamem->getDataMemAlignment());
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size);
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
} else {
if (ALIGN64(num_data_bytes_in) < ALIGN64(memoryLayer.reserved_size + offset)) {
if (ALIGN(num_data_bytes_in, gnamem->getDataMemAlignment()) <
ALIGN(memoryLayer.reserved_size + offset, gnamem->getDataMemAlignment())) {
THROW_GNA_LAYER_EXCEPTION(layer)
<< " invalid allocation request of " << num_data_bytes_in
<< " is more then state tensor size of: " << memoryLayer.reserved_size + offset;
}
gnamem->getQueue(REGION_AUTO)
->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, num_data_bytes_in);
}
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->getQueue(REGION_AUTO)
->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, num_data_bytes_in);
}
return prevLayer;
}

View File

@ -376,7 +376,8 @@ void GNAPlugin::InitGNADevice() {
gnaFlags->performance_counting,
!config.embedded_export_path.empty());
size_t page_size_bytes = 4096;
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), page_size_bytes);
size_t mem_alignment = gnadevice->getMemAlignment();
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), mem_alignment, page_size_bytes);
}
graphCompiler.setGNAMemoryPtr(gnamem);
}
@ -852,10 +853,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
}
portId++;
}
// TODO: how active list will work in multioutput case
// make room for active list
gnamem->getQueue(REGION_OUTPUTS)
->reserve_ptr(nullptr, nullptr, ALIGN64(outputs_.Get().begin()->get_required_size()), 64);
void* pParallelExecutionData = nullptr;
@ -866,7 +863,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
rwSegmentSize += gnamem->getRegionBytes(REGION_OUTPUTS);
if (gnaFlags->num_requests > 1) {
gnamem->getQueue(REGION_SCRATCH)
->reserve_ptr(nullptr, &pParallelExecutionData, rwSegmentSize * (gnaFlags->num_requests - 1), 64);
->reserve_ptr(nullptr, &pParallelExecutionData, rwSegmentSize * (gnaFlags->num_requests - 1));
}
gnamem->commit(gnaFlags->compact_mode);
@ -1447,7 +1444,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
void* basePtr = nullptr;
std::string modelLibVersion; //!< OpenVINO and GNA Library versions read from GNA model file
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &basePtr, header.gnaMemSize, false);
gnamem->commit();

View File

@ -38,7 +38,9 @@ inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
*/
class GNAMemRequestsQueue {
public:
explicit GNAMemRequestsQueue(rRegion region) : _region_type(region) {}
explicit GNAMemRequestsQueue(rRegion region, size_t mem_alignment)
: _region_type(region),
m_alignment(mem_alignment) {}
virtual ~GNAMemRequestsQueue() {}
rRegion _region_type;
@ -46,31 +48,27 @@ public:
std::vector<MemRequest> _mem_requests;
std::list<std::vector<char>> _local_storage;
std::shared_ptr<uint8_t> _basePtr = nullptr;
size_t m_alignment;
/**
* @brief register initialiser to access memory once it is actually allocated
* @param ptr_out
* @param ptr_in
* @param num_bytes
* @param alignment
* @param initializer
*/
void push_initializer(InferenceEngine::CNNLayerPtr layer,
void* ptr_out,
size_t num_bytes,
std::function<void(void* data, size_t size)> initializer,
size_t alignment = 1) {
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment});
std::function<void(void* data, size_t size)> initializer) {
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, m_alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
void push_ptr(InferenceEngine::CNNLayerPtr layer,
void* ptr_out,
const void* ptr_in,
size_t num_bytes,
size_t alignment = 1) {
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment});
void push_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, const void* ptr_in, size_t num_bytes) {
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, m_alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
@ -82,15 +80,11 @@ public:
* @param ptr_in
* @param num_bytes
*/
void push_local_ptr(InferenceEngine::CNNLayerPtr layer,
void* ptr_out,
const void* ptr_in,
size_t num_bytes,
size_t alignment = 1) {
void push_local_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, const void* ptr_in, size_t num_bytes) {
localStorage().emplace_back(reinterpret_cast<const uint8_t*>(ptr_in),
reinterpret_cast<const uint8_t*>(ptr_in) + num_bytes);
futureHeap().push_back(
{regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment});
{regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, m_alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
@ -100,9 +94,16 @@ public:
*
* @param ptr_out
* @param num_bytes
* @param align_num_bytes
*/
void reserve_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, size_t num_bytes, size_t alignment = 1) {
futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment});
void reserve_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, size_t num_bytes, bool align_num_bytes = true) {
futureHeap().push_back({regionType(),
REQUEST_ALLOCATE,
ptr_out,
nullptr,
1,
align_num_bytes ? ALIGN(num_bytes, m_alignment) : num_bytes,
m_alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
}
@ -114,14 +115,22 @@ public:
* @param dest - source is binded to dest pointer after allocation
* @param offset - offset in bytes in source that will be set in dest
* @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(),
* if that happens - reserved request parameters will be updated before committing memory
* @param align_num_bytes - when true then align memory size to the alignment predefined for the queue
*/
void bind_ptr(InferenceEngine::CNNLayerPtr layer,
void* source,
const void* dest,
size_t offset = 0,
size_t num_bytes = 0) {
futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset});
size_t num_bytes = 0,
bool align_num_bytes = true) {
futureHeap().push_back({regionType(),
REQUEST_BIND,
source,
dest,
1,
align_num_bytes ? ALIGN(num_bytes, m_alignment) : num_bytes,
1,
offset});
if (layer != nullptr) {
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
}
@ -145,12 +154,8 @@ public:
* @brief allocates buffer and set all its values to T value
*/
template <class T>
void push_value(InferenceEngine::CNNLayerPtr layer,
void* ptr_out,
T value,
size_t num_elements,
size_t alignment = 1) {
futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment});
void push_value(InferenceEngine::CNNLayerPtr layer, void* ptr_out, T value, size_t num_elements) {
futureHeap().push_back({regionType(), ptr_out, value, num_elements, m_alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
@ -217,17 +222,17 @@ public:
class GNAMemRequestsInputsQueue : public GNAMemRequestsQueue {
public:
explicit GNAMemRequestsInputsQueue() : GNAMemRequestsQueue(REGION_INPUTS) {}
explicit GNAMemRequestsInputsQueue(size_t alignment) : GNAMemRequestsQueue(REGION_INPUTS, alignment) {}
};
class GNAMemRequestsOutputsQueue : public GNAMemRequestsQueue {
public:
explicit GNAMemRequestsOutputsQueue() : GNAMemRequestsQueue(REGION_OUTPUTS) {}
explicit GNAMemRequestsOutputsQueue(size_t alignment) : GNAMemRequestsQueue(REGION_OUTPUTS, alignment) {}
};
class GNAMemRequestsScratchQueue : public GNAMemRequestsQueue {
public:
explicit GNAMemRequestsScratchQueue() : GNAMemRequestsQueue(REGION_SCRATCH) {}
explicit GNAMemRequestsScratchQueue(size_t alignment) : GNAMemRequestsQueue(REGION_SCRATCH, alignment) {}
/**
* @brief optimize memory region by reusing buffers
*/
@ -266,17 +271,17 @@ public:
class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
public:
explicit GNAMemRequestsReadOnlyQueue() : GNAMemRequestsQueue(REGION_RO) {}
explicit GNAMemRequestsReadOnlyQueue(size_t alignment) : GNAMemRequestsQueue(REGION_RO, alignment) {}
};
class GNAMemRequestsStatesQueue : public GNAMemRequestsQueue {
public:
explicit GNAMemRequestsStatesQueue() : GNAMemRequestsQueue(REGION_STATES) {}
explicit GNAMemRequestsStatesQueue(size_t alignment) : GNAMemRequestsQueue(REGION_STATES, alignment) {}
};
class GNAMemRequestsBindingsQueue : public GNAMemRequestsQueue {
public:
explicit GNAMemRequestsBindingsQueue() : GNAMemRequestsQueue(REGION_AUTO) {}
explicit GNAMemRequestsBindingsQueue(size_t alignment) : GNAMemRequestsQueue(REGION_AUTO, alignment) {}
};
} // namespace memory

View File

@ -44,6 +44,7 @@ public:
virtual void commit(bool isCompact = false) = 0;
virtual std::pair<bool, uint32_t> getOffsetForMerged(void* ptr) = 0;
virtual size_t getRegionBytes(rRegion region) = 0;
virtual size_t getDataMemAlignment() const = 0;
virtual ~GNAMemoryInterface() = default;
};
@ -58,25 +59,31 @@ protected:
std::map<rRegion, std::unique_ptr<GNAMemRequestsQueue>> _mem_queues;
size_t _total = 0;
Allocator _allocator;
size_t _page_alignment = 1;
size_t _data_alignment;
size_t _page_alignment;
bool _is_compact_mode = false;
private:
void initMemQueses() {
_mem_queues[REGION_RO] = tools::make_unique<GNAMemRequestsReadOnlyQueue>();
_mem_queues[REGION_INPUTS] = tools::make_unique<GNAMemRequestsInputsQueue>();
_mem_queues[REGION_OUTPUTS] = tools::make_unique<GNAMemRequestsOutputsQueue>();
_mem_queues[REGION_SCRATCH] = tools::make_unique<GNAMemRequestsScratchQueue>();
_mem_queues[REGION_STATES] = tools::make_unique<GNAMemRequestsStatesQueue>();
_mem_queues[REGION_AUTO] = tools::make_unique<GNAMemRequestsBindingsQueue>();
_mem_queues[REGION_RO] = tools::make_unique<GNAMemRequestsReadOnlyQueue>(_data_alignment);
_mem_queues[REGION_INPUTS] = tools::make_unique<GNAMemRequestsInputsQueue>(_data_alignment);
_mem_queues[REGION_OUTPUTS] = tools::make_unique<GNAMemRequestsOutputsQueue>(_data_alignment);
_mem_queues[REGION_SCRATCH] = tools::make_unique<GNAMemRequestsScratchQueue>(_data_alignment);
_mem_queues[REGION_STATES] = tools::make_unique<GNAMemRequestsStatesQueue>(_data_alignment);
_mem_queues[REGION_AUTO] = tools::make_unique<GNAMemRequestsBindingsQueue>(_data_alignment);
}
public:
explicit GNAMemory(size_t pageAlignment = 1) : _page_alignment(pageAlignment) {
explicit GNAMemory(size_t dataAlignment = 1, size_t pageAlignment = 1)
: _data_alignment(dataAlignment),
_page_alignment(pageAlignment) {
initMemQueses();
}
explicit GNAMemory(const Allocator& a, size_t pageAlignment = 1) : _allocator(a), _page_alignment(pageAlignment) {
explicit GNAMemory(const Allocator& a, size_t dataAlignment = 1, size_t pageAlignment = 1)
: _allocator(a),
_data_alignment(dataAlignment),
_page_alignment(pageAlignment) {
initMemQueses();
}
@ -146,7 +153,7 @@ public:
return {true, curOffset};
}
const auto size = queuePair.second->getSize();
curOffset += ALIGN64(size);
curOffset += ALIGN(size, _data_alignment);
}
return {false, 0};
}
@ -173,6 +180,10 @@ public:
#endif
}
size_t getDataMemAlignment() const override {
return _data_alignment;
}
protected:
std::shared_ptr<uint8_t> allocate(size_t bytes) {
Allocator nA = _allocator;

View File

@ -47,8 +47,7 @@ void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
IE_ASSERT(data_ptr != nullptr);
auto data_size = newState->byteSize();
auto data_elements = data_size / newState->element_size();
if (ALIGN64(state->reserved_size) !=
ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
if (state->reserved_size > (data_size / (newState->element_size() / state->elementSizeBytes()))) {
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. (" << state->reserved_size
<< " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
}

View File

@ -19,7 +19,8 @@ const std::vector<ShapeRelatedParams> shapeRelatedParams = {{{{5, 1}, true}, {{5
std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {ngraph::helpers::InputLayerType::CONSTANT};
std::map<std::string, std::string> additional_config = {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}};
std::vector<std::map<std::string, std::string>> additional_config = {{{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}},
{{"GNA_DEVICE_MODE", "GNA_SW_FP32"}}};
INSTANTIATE_TEST_SUITE_P(smoke_MatMul,
MatMulTest,
@ -30,7 +31,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMul,
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(secondaryInputTypes),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::Values(additional_config)),
::testing::ValuesIn(additional_config)),
MatMulTest::getTestCaseName);
} // namespace

View File

@ -54,11 +54,12 @@ TEST_F(GNAMemoryTest, canStore2Blobs) {
}
TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
GNAMemory<memory::GNAFloatAllocator> dataAlignedMem(16);
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
float* pFuture = nullptr;
auto queue = mem.getQueue(REGION_SCRATCH);
queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8);
mem.commit();
auto queue = dataAlignedMem.getQueue(REGION_SCRATCH);
queue->push_ptr(nullptr, &pFuture, input, 3 * 4);
dataAlignedMem.commit();
ASSERT_EQ(16, queue->getSize());
@ -73,15 +74,16 @@ TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
}
TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
GNAMemory<memory::GNAFloatAllocator> dataAlignedMem(8);
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
float* pFuture = nullptr;
float* pFuture2 = nullptr;
auto queue = mem.getQueue(REGION_SCRATCH);
queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8);
queue->push_ptr(nullptr, &pFuture2, input, 3 * 4, 16);
mem.commit();
auto queue = dataAlignedMem.getQueue(REGION_SCRATCH);
queue->push_ptr(nullptr, &pFuture, input, 3 * 4);
queue->push_ptr(nullptr, &pFuture2, input, 1 * 4);
dataAlignedMem.commit();
ASSERT_EQ(32, queue->getSize());
ASSERT_EQ(24, queue->getSize());
ASSERT_NE(pFuture, nullptr);
@ -90,8 +92,6 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
ASSERT_EQ(pFuture[2], 3);
// least probability for next element to be equal if not copied
ASSERT_EQ(pFuture[4], 1);
ASSERT_EQ(pFuture[5], 2);
ASSERT_EQ(pFuture[6], 3);
}
TEST_F(GNAMemoryTest, canReserveData) {
@ -232,7 +232,7 @@ TEST_F(GNAMemoryTest, canPushLocal) {
{
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
mem.getQueue(REGION_SCRATCH)->push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1);
mem.getQueue(REGION_SCRATCH)->push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4);
}
// poison stack
@ -316,7 +316,7 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
}
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
GNAMemory<memory::GNAFloatAllocator> memAligned(64);
GNAMemory<memory::GNAFloatAllocator> memAligned(1, 64);
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);