From cccec6942eb7c67c74ffca23dd8a1533f6361d32 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Tue, 30 Nov 2021 10:36:54 +0300 Subject: [PATCH] GNA Input/Output buffers reusage (#7332) * Init implementation # Conflicts: # thirdparty/ade * Switched to shared class * Refactoring memory commit() * Added unit tests * Fixed output order * Fixed input order * Fixed split case * fixed compiling issue in debug mode * Enabled compact mode by default * Fixed default order for inputs and outputs * Changed unit test * Enabled compact mode bye default * reverted compac_mode flag order --- .../src/gna_plugin/descriptions/gna_flags.hpp | 2 +- .../src/gna_plugin/gna_graph_compiler.cpp | 166 +++++----- .../src/gna_plugin/gna_plugin.cpp | 15 +- .../src/gna_plugin/gna_plugin_log.hpp | 2 + .../gna_plugin/memory/gna_mem_requests.hpp | 50 ++- .../memory/gna_mem_requests_queue.hpp | 73 ++++- .../src/gna_plugin/memory/gna_memory.hpp | 291 ++++++++++++------ .../unit/gna/gna_memory_compact_test.cpp | 250 +++++++++++++++ .../unit}/gna/gna_memory_test.cpp | 160 +++++----- .../tests/unit/gna/gna_plugin_config_test.cpp | 2 +- src/inference/dev_api/memory_solver.hpp | 1 + 11 files changed, 741 insertions(+), 271 deletions(-) create mode 100644 inference-engine/tests/unit/gna/gna_memory_compact_test.cpp rename inference-engine/{tests_deprecated/unit/engines => tests/unit}/gna/gna_memory_test.cpp (67%) diff --git a/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp b/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp index 79b10be7944..d15d526320d 100644 --- a/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp +++ b/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp @@ -10,7 +10,7 @@ namespace GNAPluginNS { struct GNAFlags { uint8_t gna_lib_async_threads_num = 1; - bool compact_mode = false; + bool compact_mode = true; bool exclusive_async_requests = false; bool uniformPwlDesign = false; float pwlMaxErrorPercent = 1.0f; diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 6f8b455e9b8..6d29a360fd2 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -208,7 +208,7 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize()); // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests // dont see practical use case when bind storage type need to be different that allocation type - gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) { + gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) { ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize()); }); } @@ -475,7 +475,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP } if (num_conv_kernel_padding == 0) { - gnamem->readonly().push_local_ptr(ptr_weights, + gnamem->readonly().push_local_ptr(layer, ptr_weights, transposedWeights.data(), convolution._weights->byteSize(), 64); @@ -502,19 +502,19 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP offset += padding_zeros.size(); } }; - gnamem->readonly().push_initializer(ptr_weights, + gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, initializer, 64); } if (convolution._biases) { - gnamem->readonly().push_ptr(ptr_biases, + gnamem->readonly().push_ptr(layer, ptr_biases, convolution._biases->cbuffer().as(), convolution._biases->byteSize(), 64); } else { - gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64); } } @@ -600,7 +600,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP ptr_outputs, ptr_weights, ptr_biases); - currentComponent.num_bytes_per_input = inputs->getPrecision().size(); currentComponent.num_bytes_per_output = outputs->getPrecision().size(); @@ -647,18 +646,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP transposedWeights.resize(transposedWeights.size() + kernelPad); } - gnamem->readonly().push_local_ptr(ptr_weights, + gnamem->readonly().push_local_ptr(layer, ptr_weights, transposedWeights.data(), transposedWeights.size(), 64); if (convolution._biases) { - gnamem->readonly().push_ptr(ptr_biases, + gnamem->readonly().push_ptr(layer, ptr_biases, convolution._biases->cbuffer().as(), convolution._biases->byteSize(), 64); } else { - gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64); } } #endif @@ -712,14 +711,13 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { ptr_weights, ptr_biases, true); - connectOutput(layer, ptr_outputs, num_data_bytes_out); connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0); if (gnaFlags->sw_fp32) { IE_ASSERT(quantized == nullptr); - gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64); - gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64); } else { IE_ASSERT(quantized != nullptr); if (!gnaFlags->input_low_precision) { @@ -727,15 +725,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { static_cast(INT16_MAX))); auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast(INT32_MAX))); - gnamem->readonly().push_value(ptr_weights, quantizedScale, num_rows_out, 64); - gnamem->readonly().push_value(ptr_biases, quantizedOffset, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); } else { auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast(INT8_MAX))); auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast(INT8_MAX))); - gnamem->readonly().push_value(ptr_weights, quantizedScale, num_rows_out, 64); - gnamem->readonly().push_value(ptr_biases, quantizedOffset, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); } } } else { @@ -799,12 +797,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { ptr_pwl_input, ptr_pwl_outputs, ptr_pwl_segments_target); - connectOutput(layer, ptr_pwl_outputs, num_data_bytes_out); connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0); if (ptr_pwl_segments_target != nullptr) { - gnamem->readonly().push_local_ptr(ptr_pwl_segments_target, + gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target, &ptr_pwl_segments.front(), ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t), 64); @@ -876,7 +873,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs); - size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size(); @@ -921,7 +917,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) { num_columns_out, ptr_inputs, ptr_outputs); - size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product( begin(outputs->getDims()), end(outputs->getDims())), 8) * outputs->getPrecision().size(); @@ -933,7 +928,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) { void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) { auto concatLayer = dynamic_cast (layer.get()); - if (concatLayer == nullptr) { return; } @@ -996,13 +990,10 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) { auto layerInfo = LayerInfo(concatParent); // auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock()); if (layerInfo.isInput()) { - connectInput(layer, &concatLayerInfo.gna_ptr, - inputLayer.tensorSize, inputLayer.offset, idx, false); - + connectInput(layer, &concatLayerInfo.gna_ptr, inputLayer.tensorSize, inputLayer.offset, idx, false); concatLayerInfo.input_allocated = true; } else if (layerInfo.isMemory()) { connectInput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size, inputLayer.offset, idx, false); - concatLayerInfo.input_allocated = true; } ++idx; @@ -1114,7 +1105,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { ptr_weights, ptr_biases, false); - size_t num_data_bytes_out = InferenceEngine::details::product( begin(outputs->getDims()), end(outputs->getDims())) * 4; @@ -1128,8 +1118,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true); (quantized == nullptr) ? - gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) : - gnamem->readonly().push_value(ptr_biases, 0, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) : + gnamem->readonly().push_value(layer, ptr_biases, 0, num_rows_out, 64); } } @@ -1249,7 +1239,6 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { ptr_weights, ptr_biases, true); - size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size(); @@ -1262,36 +1251,36 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { switch (eltwise._operation) { case EltwiseLayer::Sub: if (quantized == nullptr) { - gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64); } else { auto scaledIdentity = -quantized->_weights_quant.GetScale(); if (gnaFlags->input_low_precision == false) { auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); - gnamem->readonly().push_value(ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } else { auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast(INT8_MAX))); - gnamem->readonly().push_value(ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } } connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx); break; case EltwiseLayer::Sum: if (quantized == nullptr) { - gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64); } else { auto scaledIdentity = quantized->_weights_quant.GetScale(); if (gnaFlags->input_low_precision == false) { auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); - gnamem->readonly().push_value(ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } else { auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast(INT8_MAX))); - gnamem->readonly().push_value(ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } } connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx); @@ -1299,12 +1288,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { case EltwiseLayer::Prod: if (quantized == nullptr) { - gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } else { if (gnaFlags->input_low_precision == false) { - gnamem->readonly().push_value(ptr_biases, 0, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0, num_rows_out, 64); } else { - gnamem->readonly().push_value(ptr_biases, 0, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0, num_rows_out, 64); } } connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx); @@ -1372,9 +1361,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) { connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1); if (gnaFlags->sw_fp32) { IE_ASSERT(quantized == nullptr); - gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } else { - gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } } @@ -1485,12 +1474,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool if (num_padding == 0) { if (!transpose) { - gnamem->readonly().push_ptr(ptr_weights, + gnamem->readonly().push_ptr(layer, ptr_weights, weightable._weights->cbuffer().as(), weightable._weights->byteSize(), 64); } else { - gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) { + gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) { for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) { auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size(); auto cbuffer = weightable._weights->cbuffer().as() + rowOffset; @@ -1519,7 +1508,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out; auto paddedWeightsSize = paddedWeights * weightable.precision.size(); - gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) { + gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) { for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) { ie_memcpy(data, size, weightable._weights->cbuffer().as() + num_rows_in * i * weightable.precision.size(), @@ -1530,16 +1519,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool } if (weightable._biases) { - gnamem->readonly().push_ptr(ptr_biases, + gnamem->readonly().push_ptr(layer, ptr_biases, weightable._biases->cbuffer().as(), weightable._biases->byteSize(), 64); } else { // in that case input from previous layer goes into biases, so we have to initialize input pointer by zero if (useBiasConnection) { - gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64); + gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64); } else { - gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64); } } } @@ -1557,7 +1546,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!"; } - gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) { + gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) { int out = 0; for (int input = offset; input < num_rows_out + offset; ++input) { auto mem_ptr = reinterpret_cast(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size(); @@ -1624,7 +1613,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l ptr_inputs, ptr_outputs); - size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in * inputs->getPrecision().size(); // need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron @@ -1681,7 +1669,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize; size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize; - gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) { + gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) { size_t roffset = weights_offset; size_t woffset = 0; for (int i = 0; i < num_rows_out && size >= woffset; i++) { @@ -1696,12 +1684,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l } if (filterLayer->_biases) { - gnamem->readonly().push_ptr(ptr_biases, + gnamem->readonly().push_ptr(layer, ptr_biases, filterLayer->_biases->cbuffer().as(), filterLayer->_biases->byteSize(), 64); } else { - gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } } @@ -1774,18 +1762,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0); connectOutput(layer, ptr_outputs, num_data_bytes_out); - gnamem->readonly().push_ptr(ptr_weights, + gnamem->readonly().push_ptr(layer, ptr_weights, filterLayer->_weights->cbuffer().as(), filterLayer->_weights->byteSize(), 64); if (filterLayer->_biases) { - gnamem->readonly().push_ptr(ptr_biases, + gnamem->readonly().push_ptr(layer, ptr_biases, filterLayer->_biases->cbuffer().as(), filterLayer->_biases->byteSize(), 64); } else { - gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64); + gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64); } } @@ -2016,7 +2004,7 @@ case name:\ connectOutput(layer, ptr_outputs, num_data_bytes_out); if (ptr_pwl_segments_target != nullptr) { - gnamem->readonly().push_local_ptr(ptr_pwl_segments_target, + gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target, &ptr_pwl_segments.front(), ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t), 64); @@ -2152,8 +2140,9 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) { } } -void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, - size_t num_data_bytes_out) { +void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, + void *ptr, + size_t num_data_bytes_out) { auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) { int32_t output_offset = 0; if (layer->params.find("output_offset") != layer->params.end()) { @@ -2162,7 +2151,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p return output_offset; }; - gnalog() << "Connecting output " << layer->name << " ...\n"; // in case of Memory Layer it's input allocated in meminput layer if (layer->outData.size() == 1) { @@ -2179,7 +2167,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p if (!nextLayer.first) { gnalog() << "for layer: " << layer->name << "outData[0] has non functional connection at " << j; } - auto nextMemoryLayerIt = std::find_if(begin(memory_connection), end(memory_connection), [&](MemoryConnection::value_type &comp) { @@ -2190,14 +2177,13 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p // memory layer not yet initialized if (nextMemoryLayer.reserved_size == 0) { auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes(); - - gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64); - gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer)); + gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64); + gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer)); nextMemoryLayer.reserved_size = ALIGN64(memorySize); } else { // We may need to extend memory buffer if connected input size is bigger, for example for concat connection - gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out)); + gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out)); } return; } @@ -2288,7 +2274,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p return it != concatItem.second.concatInputLayers.end(); }); if (included == concat_connection.end()) { - gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64); + gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64); std::function allocate_input_recursively = [&allocate_input_recursively](GNAConcatLayer clayer, GNAPluginNS::InputDesc& inputDesc, ConcatConnection& concat_connection) { @@ -2321,26 +2307,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p if (layer->params.find("output_offset") != layer->params.end()) { output_offset = layer->GetParamAsInt("output_offset"); } - gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, output_offset); + gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset); } return; } } - intel_dnn_component_t * unused_input = nullptr; - if (gnaFlags->compact_mode) { - unused_input = find_first_unused_input(layer); - if (unused_input != nullptr) { - gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out)); - } - } - // cannot reuse suitable input - if (unused_input == nullptr) { - gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out), 64); - } + auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true, + [](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first; + // Check that layer will be an output + gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64); } -GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx, bool connectTo) { +GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, + void *ptr, + size_t num_data_bytes_in, + int32_t offset, + int idx, + bool connectTo) { // selecting particular input layers // auto prevLayer = CNNNetPrevLayer(layer, idx); auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) { @@ -2363,12 +2347,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // real allocation pointer will be kept in ptr not in ptr_inputs_global if (!connectTo) { - gnamem->push_value(ptr, + gnamem->push_value(nullptr, ptr, static_cast(0), num_data_bytes_in, 64); } else { - gnamem->push_value(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), + gnamem->push_value(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), static_cast(0), num_data_bytes_in, 64); @@ -2384,9 +2368,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, } if (connectTo) { - gnamem->bind_ptr(ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64)); + gnamem->bind_ptr(nullptr, ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64)); } else { - gnamem->bind_ptr(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64)); + gnamem->bind_ptr(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64)); } return prevLayer; @@ -2394,9 +2378,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // const input if (LayerInfo(prevLayer).isConst()) { if (connectTo) { - gnamem->bind_ptr(ptr, const_connections[prevLayer->name], offset); + gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset); } else { - gnamem->bind_ptr(const_connections[prevLayer->name], ptr, offset); + gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset); } return prevLayer; @@ -2423,6 +2407,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, if (it != splitLayerInfoItem.splitOutputLayers.end()) { gnalog() << "Connecting " << splitName << " input \n"; + // splitting layer should take the execution order from the connected layer + splittingLayer->userValue = layer->userValue; auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0); gnalog() << "Connected \n"; return res; @@ -2435,7 +2421,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, if (concatLayerInfo != concat_connection.end()) { auto & concatLayerInfoItem = concatLayerInfo->second; // dnnLayer that is input for concat layer - gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset); + gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset); // return layer over concat return CNNNetPrevLayer(prevLayer); } @@ -2444,7 +2430,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, prevLayer->name); if (cropLayerInfo != crop_connection.end()) { auto & cropLayerInfoItem = cropLayerInfo->second; - gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset); + gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset); return CNNNetPrevLayer(prevLayer); } } @@ -2452,7 +2438,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // check for generic prev layer if (prevDnnLayer != nullptr) { - gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset); + gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset); return prevLayer; } @@ -2470,20 +2456,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // connectTo used for indicate that memory layer should be bound to given buffer if (connectTo) { memorySize = std::max(memorySize, num_data_bytes_in); - gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64); - gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset); + gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64); + gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset); } else { if (num_data_bytes_in < memorySize + offset) { THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of " << num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset; } - gnamem->bind_ptr(&memoryLayer.gna_ptr, ptr, offset); + gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset); } memoryLayer.reserved_size = ALIGN64(memorySize); } else { // We may need to extend memory buffer if connected input size is bigger, for example for concat connection - gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in)); + gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in)); } return prevLayer; diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index 974a2a2130b..7c6dd757fea 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -524,7 +525,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer) desc.num_elements = numElem; // binding ptr for first infer request - then others will be setup during relocation - gnamem->bind_ptr(&desc.ptrs.front(), outputPtr); + gnamem->bind_ptr(layer, &desc.ptrs.front(), outputPtr); }; // probing gna_primitives @@ -927,7 +928,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { } // Creating Layer primitives + uint16_t id = 0; for (auto & layer : sortedNoMem) { + IE_SUPPRESS_DEPRECATED_START + layer->userValue.v_int = id++; + IE_SUPPRESS_DEPRECATED_END graphCompiler.CreateLayerPrimitive(layer); } @@ -981,7 +986,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { // TODO: how active list will work in multioutput case // make room for active list - gnamem->reserve_ptr(nullptr, + gnamem->reserve_ptr(nullptr, nullptr, ALIGN64(outputsDesc.front().num_bytes_per_element * outputsDesc.front().num_elements), 64); void *pParallelExecutionData = nullptr; @@ -989,10 +994,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { // reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest rwSegmentSize = gnamem->getRWBytes(); if (gnaFlags->gna_lib_async_threads_num > 1) { - gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64); + gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64); } - gnamem->commit(); + gnamem->commit(gnaFlags->compact_mode); dnn->Init(gnamem->getBasePtr(), gnamem->getTotalBytes(), @@ -1569,7 +1574,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i graphCompiler.setGNAMemoryPtr(gnamem); void *basePtr = nullptr; - gnamem->reserve_ptr(&basePtr, header.gnaMemSize); + gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize); gnamem->commit(); #if GNA_LIB_VER == 2 gnaModels.push_back(std::make_tuple(make_shared>(header.layersCount))); diff --git a/inference-engine/src/gna_plugin/gna_plugin_log.hpp b/inference-engine/src/gna_plugin/gna_plugin_log.hpp index de3b9dec8f9..6e807b6ecbc 100644 --- a/inference-engine/src/gna_plugin/gna_plugin_log.hpp +++ b/inference-engine/src/gna_plugin/gna_plugin_log.hpp @@ -14,6 +14,8 @@ * @brief used for creating graphviz charts, and layers dump */ # define PLOT +# define MODEL_DUMP +# define GNA_HEAP_PROFILER # define gnalog() std::cout # define gnawarn() std::cerr #else diff --git a/inference-engine/src/gna_plugin/memory/gna_mem_requests.hpp b/inference-engine/src/gna_plugin/memory/gna_mem_requests.hpp index 88fc8a0278d..6332981e3f6 100644 --- a/inference-engine/src/gna_plugin/memory/gna_mem_requests.hpp +++ b/inference-engine/src/gna_plugin/memory/gna_mem_requests.hpp @@ -8,6 +8,8 @@ #include #include +#include "gna_plugin_log.hpp" + namespace GNAPluginNS { namespace memory { @@ -26,6 +28,45 @@ enum rRegion { REGION_AUTO, }; +#ifdef GNA_HEAP_PROFILER +inline const char* rRegionToStr(uint8_t region) { + const char* strRegion = "UNKNOWN"; + switch (region) { + case REGION_RO: + strRegion = "REGION_RO"; + break; + case REGION_RW: + strRegion = "REGION_RW"; + break; + case REGION_AUTO: + strRegion = "REGION_AUTO"; + break; + } + return strRegion; +} + +inline const char* rTypeToStr(uint8_t type) { + const char* strType = "UNKNOWN"; + switch (type) { + case REQUEST_STORE: + strType = "REQUEST_STORE"; + break; + case REQUEST_ALLOCATE: + strType = "REQUEST_ALLOCATE"; + break; + case REQUEST_BIND: + strType = "REQUEST_BIND"; + break; + case REQUEST_INITIALIZER | REQUEST_STORE: + case REQUEST_INITIALIZER | REQUEST_ALLOCATE: + case REQUEST_INITIALIZER | REQUEST_BIND: + strType = "INITIALIZER"; + break; + } + return strType; +} +#endif + struct MemRequest { rRegion _region; uint8_t _type; @@ -40,6 +81,10 @@ struct MemRequest { size_t _offset = 0; // expansion in bytes due to large depended layers size_t _padding = 0; + + // fields to sort regions by execution availability + std::pair _life_limits{0, UINT16_MAX}; + MemRequest(rRegion region, rType req, void *ptr_out, @@ -79,7 +124,8 @@ struct MemRequest { _data.resize(sizeof(T)); std::copy(reinterpret_cast(&element), reinterpret_cast(&element) + sizeof(T), _data.begin()); } -/** + + /** * Store initializer request * @param req * @param ptr_out @@ -103,4 +149,4 @@ struct MemRequest { } }; } // namespace memory -} // namespace GNAPluginNS \ No newline at end of file +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/memory/gna_mem_requests_queue.hpp b/inference-engine/src/gna_plugin/memory/gna_mem_requests_queue.hpp index 0faaa922e1c..b18ea9b0da8 100644 --- a/inference-engine/src/gna_plugin/memory/gna_mem_requests_queue.hpp +++ b/inference-engine/src/gna_plugin/memory/gna_mem_requests_queue.hpp @@ -8,10 +8,23 @@ #include #include #include + +#include +#include #include "gna_mem_requests.hpp" namespace GNAPluginNS { namespace memory { + +/** +* @brief get layer id from legacy CNNLayer +*/ +inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) { + IE_SUPPRESS_DEPRECATED_START + return layer->userValue.v_int; + IE_SUPPRESS_DEPRECATED_END +} + /** * Adapter for requests submission and actual request queue */ @@ -26,12 +39,26 @@ public: * @param num_bytes * @param alignment */ - void push_initializer(void *ptr_out, size_t num_bytes, std::function initializer, size_t alignment = 1) { + void push_initializer(InferenceEngine::CNNLayerPtr layer, + void *ptr_out, + size_t num_bytes, + std::function initializer, + size_t alignment = 1) { futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {0, getCNNLayerId(layer)}; + } } - void push_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) { + void push_ptr(InferenceEngine::CNNLayerPtr layer, + void *ptr_out, + const void *ptr_in, + size_t num_bytes, + size_t alignment = 1) { futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {0, getCNNLayerId(layer)}; + } } /** @@ -40,10 +67,17 @@ public: * @param ptr_in * @param num_bytes */ - void push_local_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) { + void push_local_ptr(InferenceEngine::CNNLayerPtr layer, + void *ptr_out, + const void *ptr_in, + size_t num_bytes, + size_t alignment = 1) { localStorage().emplace_back(reinterpret_cast(ptr_in), reinterpret_cast(ptr_in) + num_bytes); futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {0, getCNNLayerId(layer)}; + } } /** @@ -51,8 +85,14 @@ public: * @param ptr_out * @param num_bytes */ - void reserve_ptr(void *ptr_out, size_t num_bytes, size_t alignment = 1) { + void reserve_ptr(InferenceEngine::CNNLayerPtr layer, + void *ptr_out, + size_t num_bytes, + size_t alignment = 1) { futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)}; + } } /** @@ -63,8 +103,15 @@ public: * @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(), * if that happens - reserved request parameters will be updated before committing memory */ - void bind_ptr(void *source, const void *dest, size_t offset = 0, size_t num_bytes = 0) { + void bind_ptr(InferenceEngine::CNNLayerPtr layer, + void *source, + const void *dest, + size_t offset = 0, + size_t num_bytes = 0) { futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)}; + } } /** @@ -72,16 +119,28 @@ public: * @param ptr_out - previously requested buffer * @param initializer - initialisation routine to be called on allocated memory */ - void bind_initializer(void *ptr_out, std::function initializer) { + void bind_initializer(InferenceEngine::CNNLayerPtr layer, + void *ptr_out, + std::function initializer) { futureHeap().push_back({regionType(), ptr_out, 0, initializer, REQUEST_BIND, 1}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {0, getCNNLayerId(layer)}; + } } /** * @brief allocates buffer and set all its values to T value */ template - void push_value(void *ptr_out, T value, size_t num_elements, size_t alignment = 1) { + void push_value(InferenceEngine::CNNLayerPtr layer, + void *ptr_out, + T value, + size_t num_elements, + size_t alignment = 1) { futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment}); + if (layer != nullptr) { + futureHeap().back()._life_limits = {0, getCNNLayerId(layer)}; + } } /** diff --git a/inference-engine/src/gna_plugin/memory/gna_memory.hpp b/inference-engine/src/gna_plugin/memory/gna_memory.hpp index cc52398b95f..8219f7918fa 100644 --- a/inference-engine/src/gna_plugin/memory/gna_memory.hpp +++ b/inference-engine/src/gna_plugin/memory/gna_memory.hpp @@ -13,7 +13,15 @@ #include #include #include +#include #include "gna_lib_ver_selector.hpp" +#include "memory_solver.hpp" +#include "gna_plugin_log.hpp" + +#ifdef GNA_HEAP_PROFILER +#include +#include +#endif namespace GNAPluginNS { namespace memory { @@ -32,6 +40,7 @@ class GNAMemory : public GNAMemRequestsQueue { Allocator _allocator; std::shared_ptr heap = nullptr; size_t _page_alignment = 1; + bool _is_compact_mode = false; class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue { std::reference_wrapper _that; @@ -62,93 +71,32 @@ class GNAMemory : public GNAMemRequestsQueue { return readOnlyFrontEnd; } + /** + * @brief enables memory optimization (compact mode). This mode can be enable in plugin configuration (COMPACT_MODE = Yes) + */ + void setCompactMode(bool isCompact) { + _is_compact_mode = isCompact; + } + /** * @brief calculates size required for all requests, allocates memory and updates pointers */ - void commit() { + void commit(bool isCompact = false) { + setCompactMode(isCompact); + // 1st stage -- looking for expandable bind requests: - for (auto &originated : _future_heap) { - if (originated._type & REQUEST_BIND) continue; - size_t offset = 0; - iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) { - if (&originated == &reference) { - offset = 0; - } - offset += binded._offset; - auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment); - auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment); - auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment); + expandBindings(); - originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad; - }); - } + // 2nd stage -- setup offsets: + setRegionOffsets(REGION_RO); + setRegionOffsets(REGION_RW); - updateSectionsSizes(); + // 3rd stage -- allocation total memory setting to 0 internally + heap = allocate(getTotalBytes()); - _total = _rw_section_size + _ro_section_size; - - // allocation with memory setting to 0 internally - heap = allocate(_total); - auto setupOffsets = [&](std::function filter, size_t offset) { - for (auto &re : _future_heap) { - if (re._type == REQUEST_BIND) continue; - if (filter(re)) continue; - - auto sz = re._element_size * re._num_elements; - - if (re._ptr_out != nullptr) { - auto cptr = heap.get() + offset; - size_t cptr_avail_size = _total - offset; - if (re._type & REQUEST_BIND) { - cptr = reinterpret_cast(*reinterpret_cast(re._ptr_out)); - cptr_avail_size = sz; - } else { - *reinterpret_cast(re._ptr_out) = cptr; - } - // std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n"; - iterate_binded(re, [](MemRequest & reference, MemRequest & binded) { - *reinterpret_cast(binded._ptr_out) = - binded._offset + reinterpret_cast(*reinterpret_cast(reference._ptr_out)); - binded._num_elements = reference._num_elements; - binded._element_size = reference._element_size; - }); - - // std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush; - - switch (re._type & ~REQUEST_BIND) { - case REQUEST_ALLOCATE : - break; - case REQUEST_STORE : { - if (re._ptr_in != nullptr) { - ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz); - } else { - size_t of = 0; - for (int i = 0; i < re._num_elements; i++, of += re._element_size) { - std::copy(std::begin(re._data), std::end(re._data), cptr + of); - } - } - break; - } - case REQUEST_INITIALIZER : { - re._initializer(cptr, sz); - break; - } - } - } - if (!(re._type & REQUEST_BIND)) { - offset += ALIGN(sz + re._padding, re._alignment); - } - } - }; - - setupOffsets([](GNAPluginNS::memory::MemRequest & request) { - // TODO: consume bind requests separately from storage type - return !(request._type & REQUEST_BIND) && (request._region != REGION_RW); - }, 0); - - setupOffsets([](GNAPluginNS::memory::MemRequest & request) { - return (request._type & REQUEST_BIND) || request._region != REGION_RO; - }, _rw_section_size); + // 4th stage -- store data and updates pointers + allocateRegion(REGION_RW, 0); + allocateRegion(REGION_RO, _rw_section_size); } void *getBasePtr() { @@ -180,7 +128,7 @@ class GNAMemory : public GNAMemRequestsQueue { void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) { for (auto &re : _future_heap) { if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) { - // std::cout << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n"; + // std::cout << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n"; visitor(reference, re); // primitive loop check if (re._ptr_in == re._ptr_out) continue; @@ -190,7 +138,6 @@ class GNAMemory : public GNAMemRequestsQueue { } } - std::shared_ptr allocate(size_t bytes) { std::shared_ptr sp(_allocator.allocate(bytes), [=](uint8_t *p) { _allocator.deallocate(p, bytes); @@ -200,31 +147,191 @@ class GNAMemory : public GNAMemRequestsQueue { } protected: + /** + * @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order + */ + void expandBindings() { + for (auto &originated : _future_heap) { + // skipping bind requests to avoid duplications + if (originated._type & REQUEST_BIND) continue; + + size_t offset = 0; + iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) { + // aligning sizes + if (&originated == &reference) offset = 0; + + offset += binded._offset; + auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment); + auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment); + auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment); + + originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad; + + // set execution order + originated._life_limits.first = std::min(originated._life_limits.first, binded._life_limits.first); + originated._life_limits.second = std::max(originated._life_limits.second, binded._life_limits.second); + }); + } + } + + /** + * @brief set offsets for specific region + */ + size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) { + size_t region_offset = 0; + for (auto &re : _future_heap) { + if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue; + + re._offset = region_offset; + region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment); + } + return region_offset; + } + + /** + * @brief allocates memory and updates pointers + */ + void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) { + for (auto &re : _future_heap) { + // skipping Bind, crossregion and empty requests + if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue; + + size_t offset = baseOffset + re._offset; + auto cptr = heap.get() + offset; + size_t cptr_avail_size = _total - offset; + + auto sz = re._element_size * re._num_elements; + if (re._type & REQUEST_BIND) { + cptr = reinterpret_cast(*reinterpret_cast(re._ptr_out)); + cptr_avail_size = sz; + } else { + *reinterpret_cast(re._ptr_out) = cptr; + } + iterate_binded(re, [](MemRequest & reference, MemRequest & binded) { + *reinterpret_cast(binded._ptr_out) = + binded._offset + reinterpret_cast(*reinterpret_cast(reference._ptr_out)); + binded._num_elements = reference._num_elements; + binded._element_size = reference._element_size; + }); + + switch (re._type & ~REQUEST_BIND) { + case REQUEST_ALLOCATE : + break; + case REQUEST_STORE : { + if (re._ptr_in != nullptr) { + ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz); + } else { + size_t of = 0; + for (int i = 0; i < re._num_elements; i++, of += re._element_size) { + std::copy(std::begin(re._data), std::end(re._data), cptr + of); + } + } + break; + } + case REQUEST_INITIALIZER : { + re._initializer(cptr, sz); + break; + } + } + } + } + + /** + * @brief optimize memory region by reusing buffers + */ + size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) { + size_t memSize = 0; + switch (regType) { + case REGION_AUTO: + case REGION_RW: + case REGION_RO: { + std::vector boxes; + for (size_t i = 0; i < _future_heap.size(); ++i) { + // skipping BIND, cross-region and empty requests + if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) { + continue; + } + + auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding, + _future_heap[i]._alignment); + int start = _future_heap[i]._life_limits.first; + int stop = _future_heap[i]._life_limits.second; + + boxes.push_back({start, stop, static_cast(original_with_pad), static_cast(i)}); + } + MemorySolver memSolver(boxes); + memSize = memSolver.solve(); + + // setting offsets + for (auto const & box : boxes) { + _future_heap[box.id]._offset = memSolver.getOffset(box.id); + } + } + break; + + default: + break; + } + + return memSize; + } + + +#ifdef GNA_HEAP_PROFILER + void memoryDump(std::function filter) { + std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out); + + for (auto &re : _future_heap) { + if (filter(re)) continue; + dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", " + << "type: " << std::setw(17) << rTypeToStr(re._type) << " " + << "ptr_in: " << std::setw(15) << re._ptr_in << " " + << "ptr_out: " << std::setw(15) << re._ptr_out << " " + << std::setw(8) << re._num_elements << ", " + << static_cast(re._element_size) << ", " + << re._padding << ", " + << std::setw(3) << re._alignment << ", " + << std::setw(8) << re._offset << ", " + << "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", " + << std::endl; + } + } +#endif + void updateSectionsSizes() { // count total size and size of read/write regions _rw_section_size = 0; _ro_section_size = 0; - for (auto &re : _future_heap) { - auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment); #ifdef GNA_HEAP_PROFILER - std::cout << "chunk: " << " region: " << re._region << ", " << - "type: " << (re._type == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind " : "alloc ") << - std::setw(10) << re._num_elements << ", " << - static_cast(re._element_size) << ", " << - re._padding << ", " << - re._offset << ", " << - re._alignment << std::endl; + memoryDump([](GNAPluginNS::memory::MemRequest & request) { + return false; + }); #endif - if (re._type == REQUEST_BIND) continue; + for (auto &re : _future_heap) { + if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue; + size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment); if (re._region == REGION_RW) { _rw_section_size += current; } else { _ro_section_size += current; } } + + if (_is_compact_mode) { + _rw_section_size = getSectionSizeOptimized(REGION_RW); + } + + gnalog() << "ro_section_size: " << _ro_section_size << std::endl; + gnalog() << "rw_section_size: " << _rw_section_size << std::endl; + gnalog() << "total: " << _total << std::endl; + _rw_section_size = ALIGN(_rw_section_size, _page_alignment); _ro_section_size = ALIGN(_ro_section_size, _page_alignment); + _total = _rw_section_size + _ro_section_size; + + gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl; + gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl; } }; } // namespace memory diff --git a/inference-engine/tests/unit/gna/gna_memory_compact_test.cpp b/inference-engine/tests/unit/gna/gna_memory_compact_test.cpp new file mode 100644 index 00000000000..7aabe0a27ff --- /dev/null +++ b/inference-engine/tests/unit/gna/gna_memory_compact_test.cpp @@ -0,0 +1,250 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include "memory/gna_memory.hpp" + +using namespace InferenceEngine; +using namespace GNAPluginNS::memory; + +class GNAMemoryCompactTest : public ::testing::Test { + protected: + GNAMemory> mem; + bool isCompact = true; + + void SetUp() override { + } +}; + +TEST_F(GNAMemoryCompactTest, canOptimizeReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + + mem.reserve_ptr(layer1, pFuture1, 3 * sizeof(float)); + mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float)); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float)); + ASSERT_EQ(mem.getTotalBytes(), 3 * sizeof(float)); +} + +TEST_F(GNAMemoryCompactTest, canOptimizePushValue) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + + mem.push_value(layer1, pFuture1, 1.f, 2); + mem.push_value(layer2, pFuture2, 2.f, 3); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float)); + ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float)); +} + +TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + + mem.push_value(layer1, pFuture1, 3.f, 2); + mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2); + mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float)); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float)); + ASSERT_EQ(mem.getTotalBytes(), 2 * sizeof(float)); +} + +TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + CNNLayerPtr layer4 = std::make_shared(LayerParams("layer4", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + layer4->userValue.v_int = 4; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + + mem.push_value(layer1, pFuture1, 1.f, 2); + mem.push_value(layer2, pFuture2, 2.f, 3); + mem.reserve_ptr(layer3, pFuture3, 5 * sizeof(float)); + mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float)); + ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float)); +} + + +TEST_F(GNAMemoryCompactTest, canOptimizePushPtrAndReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + IE_SUPPRESS_DEPRECATED_END + + float input[] = {1, 2, 3}; + size_t input_size = sizeof(input); + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + + mem.push_ptr(layer1, pFuture1, input, input_size); + mem.reserve_ptr(layer2, pFuture2, input_size); + mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), input_size); + ASSERT_EQ(mem.getTotalBytes(), input_size); +} + +TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + + size_t input_size; + { + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; + input_size = input.size() * sizeof(float); + mem.push_local_ptr(layer1, pFuture1, &*input.begin(), input_size); + } + + mem.reserve_ptr(layer2, pFuture2, input_size); + mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), input_size); + ASSERT_EQ(mem.getTotalBytes(), input_size); +} + +TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + + size_t input_size; + { + std::vector input = {1.0f, 2.0f, 3.0f}; + input_size = input.size() * sizeof(float); + mem.push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size){ + ie_memcpy(data, size, &input[0], input.size()); + }); + } + + mem.reserve_ptr(layer2, pFuture2, 2 * input_size); + mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 2 * input_size); + ASSERT_EQ(mem.getTotalBytes(), 2 * input_size); +} + +TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + CNNLayerPtr layer4 = std::make_shared(LayerParams("layer4", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + layer4->userValue.v_int = 4; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + float* pFuture4 = reinterpret_cast(&pFuture4); + + { + std::vector input = {1.0f, 2.0f, 3.0f}; + mem.bind_initializer(layer2, pFuture1, [=](void* data, size_t size){ + ie_memcpy(data, size, &input[0], input.size()); + }); + } + + mem.reserve_ptr(layer1, pFuture1, 4 * sizeof(float)); + mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float)); + mem.bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float)); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float)); + ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); +} + +TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) { + IE_SUPPRESS_DEPRECATED_START + CNNLayerPtr layer1 = std::make_shared(LayerParams("layer1", "test", Precision::FP32)); + CNNLayerPtr layer2 = std::make_shared(LayerParams("layer2", "test", Precision::FP32)); + CNNLayerPtr layer3 = std::make_shared(LayerParams("layer3", "test", Precision::FP32)); + layer1->userValue.v_int = 1; + layer2->userValue.v_int = 2; + layer3->userValue.v_int = 3; + IE_SUPPRESS_DEPRECATED_END + + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); + + mem.reserve_ptr(layer1, pFuture1, 2 * sizeof(float)); + mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float)); + mem.bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float)); + + mem.commit(isCompact); + ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float)); + ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); +} \ No newline at end of file diff --git a/inference-engine/tests_deprecated/unit/engines/gna/gna_memory_test.cpp b/inference-engine/tests/unit/gna/gna_memory_test.cpp similarity index 67% rename from inference-engine/tests_deprecated/unit/engines/gna/gna_memory_test.cpp rename to inference-engine/tests/unit/gna/gna_memory_test.cpp index 6dfa38fc27d..d400a2f2a26 100644 --- a/inference-engine/tests_deprecated/unit/engines/gna/gna_memory_test.cpp +++ b/inference-engine/tests/unit/gna/gna_memory_test.cpp @@ -9,7 +9,6 @@ using namespace GNAPluginNS::memory; class GNAMemoryTest : public ::testing::Test { - protected: GNAMemory> mem; @@ -17,12 +16,12 @@ class GNAMemoryTest : public ::testing::Test { } }; -TEST_F(GNAMemoryTest, canStoreActualBlob){ - float input [] = {1,2,3}; +TEST_F(GNAMemoryTest, canStoreActualBlob) { + float input[] = {1, 2, 3}; float* pFuture = nullptr; size_t len = sizeof(input); - mem.push_ptr(&pFuture, input, len); + mem.push_ptr(nullptr, &pFuture, input, len); mem.commit(); ASSERT_NE(pFuture, nullptr); @@ -33,12 +32,12 @@ TEST_F(GNAMemoryTest, canStoreActualBlob){ } TEST_F(GNAMemoryTest, canStore2Blobs) { - float input [] = {1,2,3,4}; + float input[] = {1, 2, 3, 4}; float* pFuture = nullptr; float* pFuture2 = nullptr; - mem.push_ptr(&pFuture, input, 3*4); - mem.push_ptr(&pFuture2, input+1, 3*4); + mem.push_ptr(nullptr, &pFuture, input, 3*4); + mem.push_ptr(nullptr, &pFuture2, input+1, 3*4); mem.commit(); ASSERT_NE(pFuture, input); @@ -54,10 +53,10 @@ TEST_F(GNAMemoryTest, canStore2Blobs) { } TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) { - float input [] = {1,2,3,4,5,6,7,8}; + float input[] = {1, 2, 3, 4, 5, 6, 7, 8}; float* pFuture = nullptr; - mem.push_ptr(&pFuture, input, 3*4, 8); + mem.push_ptr(nullptr, &pFuture, input, 3*4, 8); mem.commit(); ASSERT_EQ(16 , mem.getTotalBytes()); @@ -73,12 +72,12 @@ TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) { } TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) { - float input [] = {1,2,3,4,5,6,7,8}; + float input[] = {1, 2, 3, 4, 5, 6, 7, 8}; float* pFuture = nullptr; float* pFuture2 = nullptr; - mem.push_ptr(&pFuture, input, 3*4, 8); - mem.push_ptr(&pFuture2, input, 3*4, 16); + mem.push_ptr(nullptr, &pFuture, input, 3*4, 8); + mem.push_ptr(nullptr, &pFuture2, input, 3*4, 16); mem.commit(); ASSERT_EQ(32 , mem.getTotalBytes()); @@ -92,33 +91,30 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) { ASSERT_EQ(pFuture[4], 1); ASSERT_EQ(pFuture[5], 2); ASSERT_EQ(pFuture[6], 3); - } TEST_F(GNAMemoryTest, canReserveData) { - float* pFuture = nullptr; - mem.reserve_ptr(&pFuture, 3*4); + mem.reserve_ptr(nullptr, &pFuture, 3*4); mem.commit(); ASSERT_NE(pFuture, nullptr); } TEST_F(GNAMemoryTest, canReserveDataByVoid) { - mem.reserve_ptr(nullptr, 3*4); + mem.reserve_ptr(nullptr, nullptr, 3*4); ASSERT_NO_THROW(mem.commit()); } TEST_F(GNAMemoryTest, canReserveAndPushData) { - float input[] = {1, 2, 3}; float *pFuture = nullptr; float* pFuture2 = nullptr; - size_t len = sizeof(input) ; + size_t len = sizeof(input); - mem.push_ptr(&pFuture, input, len); - mem.reserve_ptr(&pFuture2, 3*4); + mem.push_ptr(nullptr, &pFuture, input, len); + mem.reserve_ptr(nullptr, &pFuture2, 3*4); mem.commit(); ASSERT_NE(pFuture, nullptr); @@ -136,16 +132,15 @@ TEST_F(GNAMemoryTest, canReserveAndPushData) { } TEST_F(GNAMemoryTest, canBindAndResolve) { - float input[] = {1, 2, 3}; float *pFuture = nullptr; float *pFuture2 = nullptr; float *pFuture3 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(&pFuture3, &pFuture); - mem.push_ptr(&pFuture, input, len); - mem.bind_ptr(&pFuture2, &pFuture); + mem.bind_ptr(nullptr, &pFuture3, &pFuture); + mem.push_ptr(nullptr, &pFuture, input, len); + mem.bind_ptr(nullptr, &pFuture2, &pFuture); mem.commit(); @@ -160,16 +155,15 @@ TEST_F(GNAMemoryTest, canBindAndResolve) { } TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) { - float input[] = {1, 2, 3}; float *pFuture = nullptr; float *pFuture3 = nullptr; float *pFuture4 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(&pFuture4, &pFuture3); - mem.bind_ptr(&pFuture3, &pFuture); - mem.push_ptr(&pFuture, input, len); + mem.bind_ptr(nullptr, &pFuture4, &pFuture3); + mem.bind_ptr(nullptr, &pFuture3, &pFuture); + mem.push_ptr(nullptr, &pFuture, input, len); mem.commit(); @@ -185,16 +179,15 @@ TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) { } TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) { - float input[] = {1, 2, 3}; float *pFuture = nullptr; float *pFuture3 = nullptr; float *pFuture4 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(&pFuture4, &pFuture3, 4); - mem.bind_ptr(&pFuture3, &pFuture, 4); - mem.push_ptr(&pFuture, input, len); + mem.bind_ptr(nullptr, &pFuture4, &pFuture3, 4); + mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4); + mem.push_ptr(nullptr, &pFuture, input, len); mem.commit(); @@ -210,16 +203,15 @@ TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) { } TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) { - float input[] = {1, 2, 3}; float *pFuture = nullptr; float *pFuture2 = nullptr; float *pFuture3 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(&pFuture3, &pFuture, 4); - mem.push_ptr(&pFuture, input, len); - mem.bind_ptr(&pFuture2, &pFuture); + mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4); + mem.push_ptr(nullptr, &pFuture, input, len); + mem.bind_ptr(nullptr, &pFuture2, &pFuture); mem.commit(); @@ -237,12 +229,11 @@ TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) { TEST_F(GNAMemoryTest, canPushLocal) { - - float* pFuture = (float*)&pFuture; + float* pFuture = reinterpret_cast(&pFuture); { std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; - mem.push_local_ptr(pFuture, &*input.begin(), 4 * 4, 1); + mem.push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1); } //poison stack @@ -255,13 +246,12 @@ TEST_F(GNAMemoryTest, canPushLocal) { } TEST_F(GNAMemoryTest, canPushValue) { - - float* pFuture = (float*)&pFuture; - float* pFuture2 = (float*)&pFuture2; + float* pFuture = reinterpret_cast(&pFuture); + float* pFuture2 = reinterpret_cast(&pFuture2); { - mem.push_value(pFuture, 3.f, 2); - mem.push_value(pFuture2, 13.f, 2); + mem.push_value(nullptr, pFuture, 3.f, 2); + mem.push_value(nullptr, pFuture2, 13.f, 2); } mem.commit(); @@ -273,13 +263,12 @@ TEST_F(GNAMemoryTest, canPushValue) { } TEST_F(GNAMemoryTest, canPushReadOnlyValue) { - - float* pFuture = (float*)&pFuture; - float* pFuture2 = (float*)&pFuture2; + float* pFuture = reinterpret_cast(&pFuture); + float* pFuture2 = reinterpret_cast(&pFuture2); { - mem.push_value(pFuture, 3.f, 2); - mem.readonly().push_value(pFuture2, 13.f, 2); + mem.push_value(nullptr, pFuture, 3.f, 2); + mem.readonly().push_value(nullptr, pFuture2, 13.f, 2); } mem.commit(); @@ -290,10 +279,37 @@ TEST_F(GNAMemoryTest, canPushReadOnlyValue) { ASSERT_FLOAT_EQ(pFuture[3], 13); } -TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) { +TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeEmptyReqs) { + mem.push_value(nullptr, nullptr, 3.f, 2); + mem.readonly().push_value(nullptr, nullptr, 13.f, 2); + mem.commit(); - mem.push_value(nullptr, 3.f, 2); - mem.readonly().push_value(nullptr, 13.f, 2); + ASSERT_EQ(mem.getTotalBytes(), 0); + ASSERT_EQ(mem.getRWBytes(), 0); +} + +TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithEmptyReqs) { + // empty request before + mem.push_value(nullptr, nullptr, 3.f, 2); + // not empty requests + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + mem.push_value(nullptr, pFuture1, 3.f, 2); + mem.readonly().push_value(nullptr, pFuture2, 13.f, 2); + // empty request after + mem.readonly().push_value(nullptr, nullptr, 13.f, 2); + + mem.commit(); + + ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); + ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float)); +} + +TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) { + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + mem.push_value(nullptr, pFuture1, 3.f, 2); + mem.readonly().push_value(nullptr, pFuture2, 13.f, 2); mem.commit(); ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); @@ -301,11 +317,12 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) { } TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) { - GNAMemory> memAligned(64); + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); - memAligned.push_value(nullptr, 3.f, 2); - memAligned.readonly().push_value(nullptr, 13.f, 2); + memAligned.push_value(nullptr, pFuture1, 3.f, 2); + memAligned.readonly().push_value(nullptr, pFuture2, 13.f, 2); memAligned.commit(); ASSERT_EQ(memAligned.getTotalBytes(), 128); @@ -313,15 +330,13 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) { } TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) { + float* pFuture1 = reinterpret_cast(&pFuture1); + float* pFuture2 = reinterpret_cast(&pFuture2); + float* pFuture3 = reinterpret_cast(&pFuture3); - float* pFuture2 = (float*)&pFuture2; - float* pFuture1 = (float*)&pFuture1; - float* pFuture3 = (float*)&pFuture3; - - - mem.readonly().push_value(pFuture1, 3.f, 2); - mem.push_value(pFuture2, 13.f, 3); - mem.readonly().push_value(pFuture3, 32.f, 4); + mem.readonly().push_value(nullptr, pFuture1, 3.f, 2); + mem.push_value(nullptr, pFuture2, 13.f, 3); + mem.readonly().push_value(nullptr, pFuture3, 32.f, 4); mem.commit(); ASSERT_EQ(mem.getTotalBytes(), (2+3+4) * sizeof(float)); @@ -346,16 +361,15 @@ TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) { TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequest) { float input[] = {1, 2, 3}; - float *pFuture = nullptr; float *pFuture2 = nullptr; float *pFuture3 = nullptr; size_t len = sizeof(input); - mem.push_ptr(&pFuture, input, len); - mem.bind_ptr(&pFuture2, &pFuture, len, len); - mem.bind_ptr(&pFuture3, &pFuture2, 2 * len, len); + mem.push_ptr(nullptr, &pFuture, input, len); + mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len); + mem.bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len); mem.commit(); @@ -385,9 +399,9 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenPush) { size_t len = sizeof(input); - mem.push_ptr(&pFuture, input, len); - mem.bind_ptr(&pFuture2, &pFuture, len, len); - mem.push_ptr(&pFutureInput2, input2, len); + mem.push_ptr(nullptr, &pFuture, input, len); + mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len); + mem.push_ptr(nullptr, &pFutureInput2, input2, len); mem.commit(); @@ -416,9 +430,9 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) { size_t len = sizeof(input); - mem.reserve_ptr(&pFuture, len); - mem.bind_ptr(&pFuture2, &pFuture, len, len); - mem.push_ptr(&pFutureInput, input, len); + mem.reserve_ptr(nullptr, &pFuture, len); + mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len); + mem.push_ptr(nullptr, &pFutureInput, input, len); mem.commit(); diff --git a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp index 7fa12a42825..d2d2112c5ad 100644 --- a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp +++ b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp @@ -19,7 +19,7 @@ const std::map supportedConfigKeysWithDefaults = { {GNA_CONFIG_KEY(EXEC_TARGET), ""}, {GNA_CONFIG_KEY(COMPILE_TARGET), ""}, {GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT}, - {GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)}, + {GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(YES)}, {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)}, {GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name()}, {GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(NO)}, diff --git a/src/inference/dev_api/memory_solver.hpp b/src/inference/dev_api/memory_solver.hpp index a1261c0497a..b9b81698c00 100644 --- a/src/inference/dev_api/memory_solver.hpp +++ b/src/inference/dev_api/memory_solver.hpp @@ -8,6 +8,7 @@ */ #pragma once +#include #include #include