GNA Input/Output buffers reusage (#7332)

* Init implementation

# Conflicts:
#	thirdparty/ade

* Switched to shared class

* Refactoring memory commit()

* Added unit tests

* Fixed output order

* Fixed input order

* Fixed split case

* fixed compiling issue in debug mode

* Enabled compact mode by default

* Fixed default order for inputs and outputs

* Changed unit test

* Enabled compact mode bye default

* reverted compac_mode flag order
This commit is contained in:
Mikhail Ryzhov 2021-11-30 10:36:54 +03:00 committed by GitHub
parent caa7d853b3
commit cccec6942e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 741 additions and 271 deletions

View File

@ -10,7 +10,7 @@ namespace GNAPluginNS {
struct GNAFlags {
uint8_t gna_lib_async_threads_num = 1;
bool compact_mode = false;
bool compact_mode = true;
bool exclusive_async_requests = false;
bool uniformPwlDesign = false;
float pwlMaxErrorPercent = 1.0f;

View File

@ -208,7 +208,7 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
// dont see practical use case when bind storage type need to be different that allocation type
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) {
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
});
}
@ -475,7 +475,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
}
if (num_conv_kernel_padding == 0) {
gnamem->readonly().push_local_ptr(ptr_weights,
gnamem->readonly().push_local_ptr(layer, ptr_weights,
transposedWeights.data(),
convolution._weights->byteSize(),
64);
@ -502,19 +502,19 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
offset += padding_zeros.size();
}
};
gnamem->readonly().push_initializer(ptr_weights,
gnamem->readonly().push_initializer(layer, ptr_weights,
paddedWeightsSize,
initializer,
64);
}
if (convolution._biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
convolution._biases->cbuffer().as<const void*>(),
convolution._biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
}
}
@ -600,7 +600,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
ptr_outputs,
ptr_weights,
ptr_biases);
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
@ -647,18 +646,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
transposedWeights.resize(transposedWeights.size() + kernelPad);
}
gnamem->readonly().push_local_ptr(ptr_weights,
gnamem->readonly().push_local_ptr(layer, ptr_weights,
transposedWeights.data(),
transposedWeights.size(),
64);
if (convolution._biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
convolution._biases->cbuffer().as<const void*>(),
convolution._biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
}
}
#endif
@ -712,14 +711,13 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
true);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
if (gnaFlags->sw_fp32) {
IE_ASSERT(quantized == nullptr);
gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
} else {
IE_ASSERT(quantized != nullptr);
if (!gnaFlags->input_low_precision) {
@ -727,15 +725,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
static_cast<float>(INT16_MAX)));
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset,
static_cast<float>(INT32_MAX)));
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
} else {
auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale,
static_cast<float>(INT8_MAX)));
auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset,
static_cast<float>(INT8_MAX)));
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
}
}
} else {
@ -799,12 +797,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_pwl_input,
ptr_pwl_outputs,
ptr_pwl_segments_target);
connectOutput(layer, ptr_pwl_outputs, num_data_bytes_out);
connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0);
if (ptr_pwl_segments_target != nullptr) {
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
&ptr_pwl_segments.front(),
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
64);
@ -876,7 +873,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs);
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
* outputs->getPrecision().size();
@ -921,7 +917,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
num_columns_out,
ptr_inputs,
ptr_outputs);
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
begin(outputs->getDims()), end(outputs->getDims())), 8)
* outputs->getPrecision().size();
@ -933,7 +928,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
if (concatLayer == nullptr) {
return;
}
@ -996,13 +990,10 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto layerInfo = LayerInfo(concatParent);
// auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock());
if (layerInfo.isInput()) {
connectInput(layer, &concatLayerInfo.gna_ptr,
inputLayer.tensorSize, inputLayer.offset, idx, false);
connectInput(layer, &concatLayerInfo.gna_ptr, inputLayer.tensorSize, inputLayer.offset, idx, false);
concatLayerInfo.input_allocated = true;
} else if (layerInfo.isMemory()) {
connectInput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size, inputLayer.offset, idx, false);
concatLayerInfo.input_allocated = true;
}
++idx;
@ -1114,7 +1105,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
false);
size_t num_data_bytes_out =
InferenceEngine::details::product(
begin(outputs->getDims()), end(outputs->getDims())) * 4;
@ -1128,8 +1118,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
(quantized == nullptr) ?
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) :
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
}
}
@ -1249,7 +1239,6 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
ptr_weights,
ptr_biases,
true);
size_t num_data_bytes_out =
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size();
@ -1262,36 +1251,36 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
switch (eltwise._operation) {
case EltwiseLayer::Sub:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
} else {
auto scaledIdentity = -quantized->_weights_quant.GetScale();
if (gnaFlags->input_low_precision == false) {
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
} else {
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
}
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
break;
case EltwiseLayer::Sum:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
} else {
auto scaledIdentity = quantized->_weights_quant.GetScale();
if (gnaFlags->input_low_precision == false) {
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
} else {
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
}
}
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
@ -1299,12 +1288,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
case EltwiseLayer::Prod:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
} else {
if (gnaFlags->input_low_precision == false) {
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
} else {
gnamem->readonly().push_value<int8_t>(ptr_biases, 0, num_rows_out, 64);
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
}
}
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
@ -1372,9 +1361,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
if (gnaFlags->sw_fp32) {
IE_ASSERT(quantized == nullptr);
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
} else {
gnamem->readonly().push_value<int32_t>(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
}
}
@ -1485,12 +1474,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
if (num_padding == 0) {
if (!transpose) {
gnamem->readonly().push_ptr(ptr_weights,
gnamem->readonly().push_ptr(layer, ptr_weights,
weightable._weights->cbuffer().as<const void*>(),
weightable._weights->byteSize(),
64);
} else {
gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
auto cbuffer = weightable._weights->cbuffer().as<const uint8_t*>() + rowOffset;
@ -1519,7 +1508,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
auto paddedWeightsSize = paddedWeights * weightable.precision.size();
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
ie_memcpy(data, size,
weightable._weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * weightable.precision.size(),
@ -1530,16 +1519,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
}
if (weightable._biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
weightable._biases->cbuffer().as<const void*>(),
weightable._biases->byteSize(),
64);
} else {
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
if (useBiasConnection) {
gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
}
}
}
@ -1557,7 +1546,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
}
gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
int out = 0;
for (int input = offset; input < num_rows_out + offset; ++input) {
auto mem_ptr = reinterpret_cast<uint8_t*>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
@ -1624,7 +1613,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
ptr_inputs,
ptr_outputs);
size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in
* inputs->getPrecision().size();
// need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron
@ -1681,7 +1669,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
size_t roffset = weights_offset;
size_t woffset = 0;
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
@ -1696,12 +1684,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
}
if (filterLayer->_biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
filterLayer->_biases->cbuffer().as<const void*>(),
filterLayer->_biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
}
}
@ -1774,18 +1762,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
gnamem->readonly().push_ptr(ptr_weights,
gnamem->readonly().push_ptr(layer, ptr_weights,
filterLayer->_weights->cbuffer().as<const void*>(),
filterLayer->_weights->byteSize(),
64);
if (filterLayer->_biases) {
gnamem->readonly().push_ptr(ptr_biases,
gnamem->readonly().push_ptr(layer, ptr_biases,
filterLayer->_biases->cbuffer().as<const void*>(),
filterLayer->_biases->byteSize(),
64);
} else {
gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
}
}
@ -2016,7 +2004,7 @@ case name:\
connectOutput(layer, ptr_outputs, num_data_bytes_out);
if (ptr_pwl_segments_target != nullptr) {
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
&ptr_pwl_segments.front(),
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
64);
@ -2152,8 +2140,9 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
}
}
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
size_t num_data_bytes_out) {
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
void *ptr,
size_t num_data_bytes_out) {
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
int32_t output_offset = 0;
if (layer->params.find("output_offset") != layer->params.end()) {
@ -2162,7 +2151,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
return output_offset;
};
gnalog() << "Connecting output " << layer->name << " ...\n";
// in case of Memory Layer it's input allocated in meminput layer
if (layer->outData.size() == 1) {
@ -2179,7 +2167,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
if (!nextLayer.first) {
gnalog() << "for layer: " << layer->name << "outData[0] has non functional connection at " << j;
}
auto nextMemoryLayerIt =
std::find_if(begin(memory_connection), end(memory_connection),
[&](MemoryConnection::value_type &comp) {
@ -2190,14 +2177,13 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
// memory layer not yet initialized
if (nextMemoryLayer.reserved_size == 0) {
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
}
return;
}
@ -2288,7 +2274,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
return it != concatItem.second.concatInputLayers.end();
});
if (included == concat_connection.end()) {
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
std::function<void(GNAConcatLayer, GNAPluginNS::InputDesc&, ConcatConnection&)> allocate_input_recursively =
[&allocate_input_recursively](GNAConcatLayer clayer, GNAPluginNS::InputDesc& inputDesc, ConcatConnection& concat_connection) {
@ -2321,26 +2307,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
if (layer->params.find("output_offset") != layer->params.end()) {
output_offset = layer->GetParamAsInt("output_offset");
}
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, output_offset);
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
}
return;
}
}
intel_dnn_component_t * unused_input = nullptr;
if (gnaFlags->compact_mode) {
unused_input = find_first_unused_input(layer);
if (unused_input != nullptr) {
gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out));
}
}
// cannot reuse suitable input
if (unused_input == nullptr) {
gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out), 64);
}
auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true,
[](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first;
// Check that layer will be an output
gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64);
}
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx, bool connectTo) {
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
void *ptr,
size_t num_data_bytes_in,
int32_t offset,
int idx,
bool connectTo) {
// selecting particular input layers
// auto prevLayer = CNNNetPrevLayer(layer, idx);
auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) {
@ -2363,12 +2347,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// real allocation pointer will be kept in ptr not in ptr_inputs_global
if (!connectTo) {
gnamem->push_value(ptr,
gnamem->push_value(nullptr, ptr,
static_cast<uint8_t>(0),
num_data_bytes_in,
64);
} else {
gnamem->push_value(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
gnamem->push_value(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
static_cast<uint8_t>(0),
num_data_bytes_in,
64);
@ -2384,9 +2368,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
}
if (connectTo) {
gnamem->bind_ptr(ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
gnamem->bind_ptr(nullptr, ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
} else {
gnamem->bind_ptr(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
gnamem->bind_ptr(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
}
return prevLayer;
@ -2394,9 +2378,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// const input
if (LayerInfo(prevLayer).isConst()) {
if (connectTo) {
gnamem->bind_ptr(ptr, const_connections[prevLayer->name], offset);
gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
} else {
gnamem->bind_ptr(const_connections[prevLayer->name], ptr, offset);
gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
}
return prevLayer;
@ -2423,6 +2407,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
if (it != splitLayerInfoItem.splitOutputLayers.end()) {
gnalog() << "Connecting " << splitName << " input \n";
// splitting layer should take the execution order from the connected layer
splittingLayer->userValue = layer->userValue;
auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
gnalog() << "Connected \n";
return res;
@ -2435,7 +2421,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
if (concatLayerInfo != concat_connection.end()) {
auto & concatLayerInfoItem = concatLayerInfo->second;
// dnnLayer that is input for concat layer
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset);
// return layer over concat
return CNNNetPrevLayer(prevLayer);
}
@ -2444,7 +2430,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
prevLayer->name);
if (cropLayerInfo != crop_connection.end()) {
auto & cropLayerInfoItem = cropLayerInfo->second;
gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset);
gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset);
return CNNNetPrevLayer(prevLayer);
}
}
@ -2452,7 +2438,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// check for generic prev layer
if (prevDnnLayer != nullptr) {
gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset);
gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset);
return prevLayer;
}
@ -2470,20 +2456,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// connectTo used for indicate that memory layer should be bound to given buffer
if (connectTo) {
memorySize = std::max(memorySize, num_data_bytes_in);
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
} else {
if (num_data_bytes_in < memorySize + offset) {
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
}
gnamem->bind_ptr(&memoryLayer.gna_ptr, ptr, offset);
gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset);
}
memoryLayer.reserved_size = ALIGN64(memorySize);
} else {
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
}
return prevLayer;

View File

@ -17,6 +17,7 @@
#include <utility>
#include <limits>
#include <ie_common.h>
#include <legacy/graph_tools.hpp>
#include <legacy/net_pass.h>
#include <debug.h>
@ -524,7 +525,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
desc.num_elements = numElem;
// binding ptr for first infer request - then others will be setup during relocation
gnamem->bind_ptr(&desc.ptrs.front(), outputPtr);
gnamem->bind_ptr(layer, &desc.ptrs.front(), outputPtr);
};
// probing gna_primitives
@ -927,7 +928,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
}
// Creating Layer primitives
uint16_t id = 0;
for (auto & layer : sortedNoMem) {
IE_SUPPRESS_DEPRECATED_START
layer->userValue.v_int = id++;
IE_SUPPRESS_DEPRECATED_END
graphCompiler.CreateLayerPrimitive(layer);
}
@ -981,7 +986,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// TODO: how active list will work in multioutput case
// make room for active list
gnamem->reserve_ptr(nullptr,
gnamem->reserve_ptr(nullptr, nullptr,
ALIGN64(outputsDesc.front().num_bytes_per_element * outputsDesc.front().num_elements), 64);
void *pParallelExecutionData = nullptr;
@ -989,10 +994,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest
rwSegmentSize = gnamem->getRWBytes();
if (gnaFlags->gna_lib_async_threads_num > 1) {
gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
}
gnamem->commit();
gnamem->commit(gnaFlags->compact_mode);
dnn->Init(gnamem->getBasePtr(),
gnamem->getTotalBytes(),
@ -1569,7 +1574,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
graphCompiler.setGNAMemoryPtr(gnamem);
void *basePtr = nullptr;
gnamem->reserve_ptr(&basePtr, header.gnaMemSize);
gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
gnamem->commit();
#if GNA_LIB_VER == 2
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>(header.layersCount)));

View File

@ -14,6 +14,8 @@
* @brief used for creating graphviz charts, and layers dump
*/
# define PLOT
# define MODEL_DUMP
# define GNA_HEAP_PROFILER
# define gnalog() std::cout
# define gnawarn() std::cerr
#else

View File

@ -8,6 +8,8 @@
#include <vector>
#include <algorithm>
#include "gna_plugin_log.hpp"
namespace GNAPluginNS {
namespace memory {
@ -26,6 +28,45 @@ enum rRegion {
REGION_AUTO,
};
#ifdef GNA_HEAP_PROFILER
inline const char* rRegionToStr(uint8_t region) {
const char* strRegion = "UNKNOWN";
switch (region) {
case REGION_RO:
strRegion = "REGION_RO";
break;
case REGION_RW:
strRegion = "REGION_RW";
break;
case REGION_AUTO:
strRegion = "REGION_AUTO";
break;
}
return strRegion;
}
inline const char* rTypeToStr(uint8_t type) {
const char* strType = "UNKNOWN";
switch (type) {
case REQUEST_STORE:
strType = "REQUEST_STORE";
break;
case REQUEST_ALLOCATE:
strType = "REQUEST_ALLOCATE";
break;
case REQUEST_BIND:
strType = "REQUEST_BIND";
break;
case REQUEST_INITIALIZER | REQUEST_STORE:
case REQUEST_INITIALIZER | REQUEST_ALLOCATE:
case REQUEST_INITIALIZER | REQUEST_BIND:
strType = "INITIALIZER";
break;
}
return strType;
}
#endif
struct MemRequest {
rRegion _region;
uint8_t _type;
@ -40,6 +81,10 @@ struct MemRequest {
size_t _offset = 0;
// expansion in bytes due to large depended layers
size_t _padding = 0;
// fields to sort regions by execution availability
std::pair<uint16_t, uint16_t> _life_limits{0, UINT16_MAX};
MemRequest(rRegion region,
rType req,
void *ptr_out,
@ -79,7 +124,8 @@ struct MemRequest {
_data.resize(sizeof(T));
std::copy(reinterpret_cast<uint8_t *>(&element), reinterpret_cast<uint8_t *>(&element) + sizeof(T), _data.begin());
}
/**
/**
* Store initializer request
* @param req
* @param ptr_out

View File

@ -8,10 +8,23 @@
#include <vector>
#include <algorithm>
#include <functional>
#include <ie_api.h>
#include <legacy/ie_layers.h>
#include "gna_mem_requests.hpp"
namespace GNAPluginNS {
namespace memory {
/**
* @brief get layer id from legacy CNNLayer
*/
inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
IE_SUPPRESS_DEPRECATED_START
return layer->userValue.v_int;
IE_SUPPRESS_DEPRECATED_END
}
/**
* Adapter for requests submission and actual request queue
*/
@ -26,12 +39,26 @@ public:
* @param num_bytes
* @param alignment
*/
void push_initializer(void *ptr_out, size_t num_bytes, std::function<void(void * data, size_t size)> initializer, size_t alignment = 1) {
void push_initializer(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
size_t num_bytes,
std::function<void(void * data, size_t size)> initializer,
size_t alignment = 1) {
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
void push_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
void push_ptr(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
const void *ptr_in,
size_t num_bytes,
size_t alignment = 1) {
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**
@ -40,10 +67,17 @@ public:
* @param ptr_in
* @param num_bytes
*/
void push_local_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
void push_local_ptr(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
const void *ptr_in,
size_t num_bytes,
size_t alignment = 1) {
localStorage().emplace_back(reinterpret_cast<const uint8_t *>(ptr_in),
reinterpret_cast<const uint8_t *>(ptr_in) + num_bytes);
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**
@ -51,8 +85,14 @@ public:
* @param ptr_out
* @param num_bytes
*/
void reserve_ptr(void *ptr_out, size_t num_bytes, size_t alignment = 1) {
void reserve_ptr(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
size_t num_bytes,
size_t alignment = 1) {
futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
}
}
/**
@ -63,8 +103,15 @@ public:
* @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(),
* if that happens - reserved request parameters will be updated before committing memory
*/
void bind_ptr(void *source, const void *dest, size_t offset = 0, size_t num_bytes = 0) {
void bind_ptr(InferenceEngine::CNNLayerPtr layer,
void *source,
const void *dest,
size_t offset = 0,
size_t num_bytes = 0) {
futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset});
if (layer != nullptr) {
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
}
}
/**
@ -72,16 +119,28 @@ public:
* @param ptr_out - previously requested buffer
* @param initializer - initialisation routine to be called on allocated memory
*/
void bind_initializer(void *ptr_out, std::function<void(void * data, size_t size)> initializer) {
void bind_initializer(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
std::function<void(void * data, size_t size)> initializer) {
futureHeap().push_back({regionType(), ptr_out, 0, initializer, REQUEST_BIND, 1});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**
* @brief allocates buffer and set all its values to T value
*/
template<class T>
void push_value(void *ptr_out, T value, size_t num_elements, size_t alignment = 1) {
void push_value(InferenceEngine::CNNLayerPtr layer,
void *ptr_out,
T value,
size_t num_elements,
size_t alignment = 1) {
futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment});
if (layer != nullptr) {
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
}
}
/**

View File

@ -13,7 +13,15 @@
#include <list>
#include <algorithm>
#include <functional>
#include <iostream>
#include "gna_lib_ver_selector.hpp"
#include "memory_solver.hpp"
#include "gna_plugin_log.hpp"
#ifdef GNA_HEAP_PROFILER
#include <iomanip>
#include <fstream>
#endif
namespace GNAPluginNS {
namespace memory {
@ -32,6 +40,7 @@ class GNAMemory : public GNAMemRequestsQueue {
Allocator _allocator;
std::shared_ptr<uint8_t> heap = nullptr;
size_t _page_alignment = 1;
bool _is_compact_mode = false;
class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
std::reference_wrapper<GNAMemRequestsQueue> _that;
@ -62,93 +71,32 @@ class GNAMemory : public GNAMemRequestsQueue {
return readOnlyFrontEnd;
}
/**
* @brief enables memory optimization (compact mode). This mode can be enable in plugin configuration (COMPACT_MODE = Yes)
*/
void setCompactMode(bool isCompact) {
_is_compact_mode = isCompact;
}
/**
* @brief calculates size required for all requests, allocates memory and updates pointers
*/
void commit() {
void commit(bool isCompact = false) {
setCompactMode(isCompact);
// 1st stage -- looking for expandable bind requests:
for (auto &originated : _future_heap) {
if (originated._type & REQUEST_BIND) continue;
size_t offset = 0;
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
if (&originated == &reference) {
offset = 0;
}
offset += binded._offset;
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
expandBindings();
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
});
}
// 2nd stage -- setup offsets:
setRegionOffsets(REGION_RO);
setRegionOffsets(REGION_RW);
updateSectionsSizes();
// 3rd stage -- allocation total memory setting to 0 internally
heap = allocate(getTotalBytes());
_total = _rw_section_size + _ro_section_size;
// allocation with memory setting to 0 internally
heap = allocate(_total);
auto setupOffsets = [&](std::function<bool(MemRequest & request)> filter, size_t offset) {
for (auto &re : _future_heap) {
if (re._type == REQUEST_BIND) continue;
if (filter(re)) continue;
auto sz = re._element_size * re._num_elements;
if (re._ptr_out != nullptr) {
auto cptr = heap.get() + offset;
size_t cptr_avail_size = _total - offset;
if (re._type & REQUEST_BIND) {
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
cptr_avail_size = sz;
} else {
*reinterpret_cast<void **>(re._ptr_out) = cptr;
}
// std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n";
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
*reinterpret_cast<void **>(binded._ptr_out) =
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
binded._num_elements = reference._num_elements;
binded._element_size = reference._element_size;
});
// std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush;
switch (re._type & ~REQUEST_BIND) {
case REQUEST_ALLOCATE :
break;
case REQUEST_STORE : {
if (re._ptr_in != nullptr) {
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
} else {
size_t of = 0;
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
}
}
break;
}
case REQUEST_INITIALIZER : {
re._initializer(cptr, sz);
break;
}
}
}
if (!(re._type & REQUEST_BIND)) {
offset += ALIGN(sz + re._padding, re._alignment);
}
}
};
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
// TODO: consume bind requests separately from storage type
return !(request._type & REQUEST_BIND) && (request._region != REGION_RW);
}, 0);
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
return (request._type & REQUEST_BIND) || request._region != REGION_RO;
}, _rw_section_size);
// 4th stage -- store data and updates pointers
allocateRegion(REGION_RW, 0);
allocateRegion(REGION_RO, _rw_section_size);
}
void *getBasePtr() {
@ -180,7 +128,7 @@ class GNAMemory : public GNAMemRequestsQueue {
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
for (auto &re : _future_heap) {
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
// std::cout << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
// std::cout << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
visitor(reference, re);
// primitive loop check
if (re._ptr_in == re._ptr_out) continue;
@ -190,7 +138,6 @@ class GNAMemory : public GNAMemRequestsQueue {
}
}
std::shared_ptr<uint8_t> allocate(size_t bytes) {
std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
_allocator.deallocate(p, bytes);
@ -200,31 +147,191 @@ class GNAMemory : public GNAMemRequestsQueue {
}
protected:
/**
* @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order
*/
void expandBindings() {
for (auto &originated : _future_heap) {
// skipping bind requests to avoid duplications
if (originated._type & REQUEST_BIND) continue;
size_t offset = 0;
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
// aligning sizes
if (&originated == &reference) offset = 0;
offset += binded._offset;
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
// set execution order
originated._life_limits.first = std::min(originated._life_limits.first, binded._life_limits.first);
originated._life_limits.second = std::max(originated._life_limits.second, binded._life_limits.second);
});
}
}
/**
* @brief set offsets for specific region
*/
size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) {
size_t region_offset = 0;
for (auto &re : _future_heap) {
if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
re._offset = region_offset;
region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
}
return region_offset;
}
/**
* @brief allocates memory and updates pointers
*/
void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) {
for (auto &re : _future_heap) {
// skipping Bind, crossregion and empty requests
if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
size_t offset = baseOffset + re._offset;
auto cptr = heap.get() + offset;
size_t cptr_avail_size = _total - offset;
auto sz = re._element_size * re._num_elements;
if (re._type & REQUEST_BIND) {
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
cptr_avail_size = sz;
} else {
*reinterpret_cast<void **>(re._ptr_out) = cptr;
}
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
*reinterpret_cast<void **>(binded._ptr_out) =
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
binded._num_elements = reference._num_elements;
binded._element_size = reference._element_size;
});
switch (re._type & ~REQUEST_BIND) {
case REQUEST_ALLOCATE :
break;
case REQUEST_STORE : {
if (re._ptr_in != nullptr) {
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
} else {
size_t of = 0;
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
}
}
break;
}
case REQUEST_INITIALIZER : {
re._initializer(cptr, sz);
break;
}
}
}
}
/**
* @brief optimize memory region by reusing buffers
*/
size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) {
size_t memSize = 0;
switch (regType) {
case REGION_AUTO:
case REGION_RW:
case REGION_RO: {
std::vector<MemorySolver::Box> boxes;
for (size_t i = 0; i < _future_heap.size(); ++i) {
// skipping BIND, cross-region and empty requests
if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) {
continue;
}
auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding,
_future_heap[i]._alignment);
int start = _future_heap[i]._life_limits.first;
int stop = _future_heap[i]._life_limits.second;
boxes.push_back({start, stop, static_cast<int64_t>(original_with_pad), static_cast<int64_t>(i)});
}
MemorySolver memSolver(boxes);
memSize = memSolver.solve();
// setting offsets
for (auto const & box : boxes) {
_future_heap[box.id]._offset = memSolver.getOffset(box.id);
}
}
break;
default:
break;
}
return memSize;
}
#ifdef GNA_HEAP_PROFILER
void memoryDump(std::function<bool(MemRequest & re)> filter) {
std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out);
for (auto &re : _future_heap) {
if (filter(re)) continue;
dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", "
<< "type: " << std::setw(17) << rTypeToStr(re._type) << " "
<< "ptr_in: " << std::setw(15) << re._ptr_in << " "
<< "ptr_out: " << std::setw(15) << re._ptr_out << " "
<< std::setw(8) << re._num_elements << ", "
<< static_cast<int>(re._element_size) << ", "
<< re._padding << ", "
<< std::setw(3) << re._alignment << ", "
<< std::setw(8) << re._offset << ", "
<< "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", "
<< std::endl;
}
}
#endif
void updateSectionsSizes() {
// count total size and size of read/write regions
_rw_section_size = 0;
_ro_section_size = 0;
for (auto &re : _future_heap) {
auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
#ifdef GNA_HEAP_PROFILER
std::cout << "chunk: " << " region: " << re._region << ", " <<
"type: " << (re._type == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind " : "alloc ") <<
std::setw(10) << re._num_elements << ", " <<
static_cast<int>(re._element_size) << ", " <<
re._padding << ", " <<
re._offset << ", " <<
re._alignment << std::endl;
memoryDump([](GNAPluginNS::memory::MemRequest & request) {
return false;
});
#endif
if (re._type == REQUEST_BIND) continue;
for (auto &re : _future_heap) {
if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
if (re._region == REGION_RW) {
_rw_section_size += current;
} else {
_ro_section_size += current;
}
}
if (_is_compact_mode) {
_rw_section_size = getSectionSizeOptimized(REGION_RW);
}
gnalog() << "ro_section_size: " << _ro_section_size << std::endl;
gnalog() << "rw_section_size: " << _rw_section_size << std::endl;
gnalog() << "total: " << _total << std::endl;
_rw_section_size = ALIGN(_rw_section_size, _page_alignment);
_ro_section_size = ALIGN(_ro_section_size, _page_alignment);
_total = _rw_section_size + _ro_section_size;
gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl;
gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl;
}
};
} // namespace memory

View File

@ -0,0 +1,250 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <gtest/gtest.h>
#include <legacy/ie_layers.h>
#include "memory/gna_memory.hpp"
using namespace InferenceEngine;
using namespace GNAPluginNS::memory;
class GNAMemoryCompactTest : public ::testing::Test {
protected:
GNAMemory<std::allocator<uint8_t>> mem;
bool isCompact = true;
void SetUp() override {
}
};
TEST_F(GNAMemoryCompactTest, canOptimizeReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
mem.reserve_ptr(layer1, pFuture1, 3 * sizeof(float));
mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float));
ASSERT_EQ(mem.getTotalBytes(), 3 * sizeof(float));
}
TEST_F(GNAMemoryCompactTest, canOptimizePushValue) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
mem.push_value(layer1, pFuture1, 1.f, 2);
mem.push_value(layer2, pFuture2, 2.f, 3);
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float));
ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float));
}
TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
mem.push_value(layer1, pFuture1, 3.f, 2);
mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
ASSERT_EQ(mem.getTotalBytes(), 2 * sizeof(float));
}
TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
CNNLayerPtr layer4 = std::make_shared<CNNLayer>(LayerParams("layer4", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
layer4->userValue.v_int = 4;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
mem.push_value(layer1, pFuture1, 1.f, 2);
mem.push_value(layer2, pFuture2, 2.f, 3);
mem.reserve_ptr(layer3, pFuture3, 5 * sizeof(float));
mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float));
ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float));
}
TEST_F(GNAMemoryCompactTest, canOptimizePushPtrAndReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
IE_SUPPRESS_DEPRECATED_END
float input[] = {1, 2, 3};
size_t input_size = sizeof(input);
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
mem.push_ptr(layer1, pFuture1, input, input_size);
mem.reserve_ptr(layer2, pFuture2, input_size);
mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), input_size);
ASSERT_EQ(mem.getTotalBytes(), input_size);
}
TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
size_t input_size;
{
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
input_size = input.size() * sizeof(float);
mem.push_local_ptr(layer1, pFuture1, &*input.begin(), input_size);
}
mem.reserve_ptr(layer2, pFuture2, input_size);
mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), input_size);
ASSERT_EQ(mem.getTotalBytes(), input_size);
}
TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
size_t input_size;
{
std::vector<float> input = {1.0f, 2.0f, 3.0f};
input_size = input.size() * sizeof(float);
mem.push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size){
ie_memcpy(data, size, &input[0], input.size());
});
}
mem.reserve_ptr(layer2, pFuture2, 2 * input_size);
mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 2 * input_size);
ASSERT_EQ(mem.getTotalBytes(), 2 * input_size);
}
TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
CNNLayerPtr layer4 = std::make_shared<CNNLayer>(LayerParams("layer4", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
layer4->userValue.v_int = 4;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
float* pFuture4 = reinterpret_cast<float*>(&pFuture4);
{
std::vector<float> input = {1.0f, 2.0f, 3.0f};
mem.bind_initializer(layer2, pFuture1, [=](void* data, size_t size){
ie_memcpy(data, size, &input[0], input.size());
});
}
mem.reserve_ptr(layer1, pFuture1, 4 * sizeof(float));
mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
mem.bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float));
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float));
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
}
TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) {
IE_SUPPRESS_DEPRECATED_START
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
layer1->userValue.v_int = 1;
layer2->userValue.v_int = 2;
layer3->userValue.v_int = 3;
IE_SUPPRESS_DEPRECATED_END
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
mem.reserve_ptr(layer1, pFuture1, 2 * sizeof(float));
mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
mem.bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float));
mem.commit(isCompact);
ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float));
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
}

View File

@ -9,7 +9,6 @@
using namespace GNAPluginNS::memory;
class GNAMemoryTest : public ::testing::Test {
protected:
GNAMemory<std::allocator<uint8_t>> mem;
@ -17,12 +16,12 @@ class GNAMemoryTest : public ::testing::Test {
}
};
TEST_F(GNAMemoryTest, canStoreActualBlob){
float input [] = {1,2,3};
TEST_F(GNAMemoryTest, canStoreActualBlob) {
float input[] = {1, 2, 3};
float* pFuture = nullptr;
size_t len = sizeof(input);
mem.push_ptr(&pFuture, input, len);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.commit();
ASSERT_NE(pFuture, nullptr);
@ -33,12 +32,12 @@ TEST_F(GNAMemoryTest, canStoreActualBlob){
}
TEST_F(GNAMemoryTest, canStore2Blobs) {
float input [] = {1,2,3,4};
float input[] = {1, 2, 3, 4};
float* pFuture = nullptr;
float* pFuture2 = nullptr;
mem.push_ptr(&pFuture, input, 3*4);
mem.push_ptr(&pFuture2, input+1, 3*4);
mem.push_ptr(nullptr, &pFuture, input, 3*4);
mem.push_ptr(nullptr, &pFuture2, input+1, 3*4);
mem.commit();
ASSERT_NE(pFuture, input);
@ -54,10 +53,10 @@ TEST_F(GNAMemoryTest, canStore2Blobs) {
}
TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
float input [] = {1,2,3,4,5,6,7,8};
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
float* pFuture = nullptr;
mem.push_ptr(&pFuture, input, 3*4, 8);
mem.push_ptr(nullptr, &pFuture, input, 3*4, 8);
mem.commit();
ASSERT_EQ(16 , mem.getTotalBytes());
@ -73,12 +72,12 @@ TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
}
TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
float input [] = {1,2,3,4,5,6,7,8};
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
float* pFuture = nullptr;
float* pFuture2 = nullptr;
mem.push_ptr(&pFuture, input, 3*4, 8);
mem.push_ptr(&pFuture2, input, 3*4, 16);
mem.push_ptr(nullptr, &pFuture, input, 3*4, 8);
mem.push_ptr(nullptr, &pFuture2, input, 3*4, 16);
mem.commit();
ASSERT_EQ(32 , mem.getTotalBytes());
@ -92,33 +91,30 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
ASSERT_EQ(pFuture[4], 1);
ASSERT_EQ(pFuture[5], 2);
ASSERT_EQ(pFuture[6], 3);
}
TEST_F(GNAMemoryTest, canReserveData) {
float* pFuture = nullptr;
mem.reserve_ptr(&pFuture, 3*4);
mem.reserve_ptr(nullptr, &pFuture, 3*4);
mem.commit();
ASSERT_NE(pFuture, nullptr);
}
TEST_F(GNAMemoryTest, canReserveDataByVoid) {
mem.reserve_ptr(nullptr, 3*4);
mem.reserve_ptr(nullptr, nullptr, 3*4);
ASSERT_NO_THROW(mem.commit());
}
TEST_F(GNAMemoryTest, canReserveAndPushData) {
float input[] = {1, 2, 3};
float *pFuture = nullptr;
float* pFuture2 = nullptr;
size_t len = sizeof(input) ;
size_t len = sizeof(input);
mem.push_ptr(&pFuture, input, len);
mem.reserve_ptr(&pFuture2, 3*4);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.reserve_ptr(nullptr, &pFuture2, 3*4);
mem.commit();
ASSERT_NE(pFuture, nullptr);
@ -136,16 +132,15 @@ TEST_F(GNAMemoryTest, canReserveAndPushData) {
}
TEST_F(GNAMemoryTest, canBindAndResolve) {
float input[] = {1, 2, 3};
float *pFuture = nullptr;
float *pFuture2 = nullptr;
float *pFuture3 = nullptr;
size_t len = sizeof(input);
mem.bind_ptr(&pFuture3, &pFuture);
mem.push_ptr(&pFuture, input, len);
mem.bind_ptr(&pFuture2, &pFuture);
mem.bind_ptr(nullptr, &pFuture3, &pFuture);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.bind_ptr(nullptr, &pFuture2, &pFuture);
mem.commit();
@ -160,16 +155,15 @@ TEST_F(GNAMemoryTest, canBindAndResolve) {
}
TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) {
float input[] = {1, 2, 3};
float *pFuture = nullptr;
float *pFuture3 = nullptr;
float *pFuture4 = nullptr;
size_t len = sizeof(input);
mem.bind_ptr(&pFuture4, &pFuture3);
mem.bind_ptr(&pFuture3, &pFuture);
mem.push_ptr(&pFuture, input, len);
mem.bind_ptr(nullptr, &pFuture4, &pFuture3);
mem.bind_ptr(nullptr, &pFuture3, &pFuture);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.commit();
@ -185,16 +179,15 @@ TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) {
}
TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) {
float input[] = {1, 2, 3};
float *pFuture = nullptr;
float *pFuture3 = nullptr;
float *pFuture4 = nullptr;
size_t len = sizeof(input);
mem.bind_ptr(&pFuture4, &pFuture3, 4);
mem.bind_ptr(&pFuture3, &pFuture, 4);
mem.push_ptr(&pFuture, input, len);
mem.bind_ptr(nullptr, &pFuture4, &pFuture3, 4);
mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.commit();
@ -210,16 +203,15 @@ TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) {
}
TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) {
float input[] = {1, 2, 3};
float *pFuture = nullptr;
float *pFuture2 = nullptr;
float *pFuture3 = nullptr;
size_t len = sizeof(input);
mem.bind_ptr(&pFuture3, &pFuture, 4);
mem.push_ptr(&pFuture, input, len);
mem.bind_ptr(&pFuture2, &pFuture);
mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.bind_ptr(nullptr, &pFuture2, &pFuture);
mem.commit();
@ -237,12 +229,11 @@ TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) {
TEST_F(GNAMemoryTest, canPushLocal) {
float* pFuture = (float*)&pFuture;
float* pFuture = reinterpret_cast<float*>(&pFuture);
{
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
mem.push_local_ptr(pFuture, &*input.begin(), 4 * 4, 1);
mem.push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1);
}
//poison stack
@ -255,13 +246,12 @@ TEST_F(GNAMemoryTest, canPushLocal) {
}
TEST_F(GNAMemoryTest, canPushValue) {
float* pFuture = (float*)&pFuture;
float* pFuture2 = (float*)&pFuture2;
float* pFuture = reinterpret_cast<float*>(&pFuture);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
{
mem.push_value(pFuture, 3.f, 2);
mem.push_value(pFuture2, 13.f, 2);
mem.push_value(nullptr, pFuture, 3.f, 2);
mem.push_value(nullptr, pFuture2, 13.f, 2);
}
mem.commit();
@ -273,13 +263,12 @@ TEST_F(GNAMemoryTest, canPushValue) {
}
TEST_F(GNAMemoryTest, canPushReadOnlyValue) {
float* pFuture = (float*)&pFuture;
float* pFuture2 = (float*)&pFuture2;
float* pFuture = reinterpret_cast<float*>(&pFuture);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
{
mem.push_value(pFuture, 3.f, 2);
mem.readonly().push_value(pFuture2, 13.f, 2);
mem.push_value(nullptr, pFuture, 3.f, 2);
mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
}
mem.commit();
@ -290,10 +279,37 @@ TEST_F(GNAMemoryTest, canPushReadOnlyValue) {
ASSERT_FLOAT_EQ(pFuture[3], 13);
}
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeEmptyReqs) {
mem.push_value(nullptr, nullptr, 3.f, 2);
mem.readonly().push_value(nullptr, nullptr, 13.f, 2);
mem.commit();
mem.push_value(nullptr, 3.f, 2);
mem.readonly().push_value(nullptr, 13.f, 2);
ASSERT_EQ(mem.getTotalBytes(), 0);
ASSERT_EQ(mem.getRWBytes(), 0);
}
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithEmptyReqs) {
// empty request before
mem.push_value(nullptr, nullptr, 3.f, 2);
// not empty requests
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
mem.push_value(nullptr, pFuture1, 3.f, 2);
mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
// empty request after
mem.readonly().push_value(nullptr, nullptr, 13.f, 2);
mem.commit();
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
}
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
mem.push_value(nullptr, pFuture1, 3.f, 2);
mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
mem.commit();
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
@ -301,11 +317,12 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
}
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
GNAMemory<std::allocator<uint8_t>> memAligned(64);
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
memAligned.push_value(nullptr, 3.f, 2);
memAligned.readonly().push_value(nullptr, 13.f, 2);
memAligned.push_value(nullptr, pFuture1, 3.f, 2);
memAligned.readonly().push_value(nullptr, pFuture2, 13.f, 2);
memAligned.commit();
ASSERT_EQ(memAligned.getTotalBytes(), 128);
@ -313,15 +330,13 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
}
TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) {
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
float* pFuture2 = (float*)&pFuture2;
float* pFuture1 = (float*)&pFuture1;
float* pFuture3 = (float*)&pFuture3;
mem.readonly().push_value(pFuture1, 3.f, 2);
mem.push_value(pFuture2, 13.f, 3);
mem.readonly().push_value(pFuture3, 32.f, 4);
mem.readonly().push_value(nullptr, pFuture1, 3.f, 2);
mem.push_value(nullptr, pFuture2, 13.f, 3);
mem.readonly().push_value(nullptr, pFuture3, 32.f, 4);
mem.commit();
ASSERT_EQ(mem.getTotalBytes(), (2+3+4) * sizeof(float));
@ -346,16 +361,15 @@ TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) {
TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequest) {
float input[] = {1, 2, 3};
float *pFuture = nullptr;
float *pFuture2 = nullptr;
float *pFuture3 = nullptr;
size_t len = sizeof(input);
mem.push_ptr(&pFuture, input, len);
mem.bind_ptr(&pFuture2, &pFuture, len, len);
mem.bind_ptr(&pFuture3, &pFuture2, 2 * len, len);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
mem.bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len);
mem.commit();
@ -385,9 +399,9 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenPush) {
size_t len = sizeof(input);
mem.push_ptr(&pFuture, input, len);
mem.bind_ptr(&pFuture2, &pFuture, len, len);
mem.push_ptr(&pFutureInput2, input2, len);
mem.push_ptr(nullptr, &pFuture, input, len);
mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
mem.push_ptr(nullptr, &pFutureInput2, input2, len);
mem.commit();
@ -416,9 +430,9 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) {
size_t len = sizeof(input);
mem.reserve_ptr(&pFuture, len);
mem.bind_ptr(&pFuture2, &pFuture, len, len);
mem.push_ptr(&pFutureInput, input, len);
mem.reserve_ptr(nullptr, &pFuture, len);
mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
mem.push_ptr(nullptr, &pFutureInput, input, len);
mem.commit();

View File

@ -19,7 +19,7 @@ const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
{GNA_CONFIG_KEY(EXEC_TARGET), ""},
{GNA_CONFIG_KEY(COMPILE_TARGET), ""},
{GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT},
{GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)},
{GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(YES)},
{CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
{GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name()},
{GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(NO)},

View File

@ -8,6 +8,7 @@
*/
#pragma once
#include <ie_common.h>
#include <stdint.h>
#include <algorithm>