GNA Input/Output buffers reusage (#7332)
* Init implementation # Conflicts: # thirdparty/ade * Switched to shared class * Refactoring memory commit() * Added unit tests * Fixed output order * Fixed input order * Fixed split case * fixed compiling issue in debug mode * Enabled compact mode by default * Fixed default order for inputs and outputs * Changed unit test * Enabled compact mode bye default * reverted compac_mode flag order
This commit is contained in:
parent
caa7d853b3
commit
cccec6942e
@ -10,7 +10,7 @@ namespace GNAPluginNS {
|
||||
struct GNAFlags {
|
||||
uint8_t gna_lib_async_threads_num = 1;
|
||||
|
||||
bool compact_mode = false;
|
||||
bool compact_mode = true;
|
||||
bool exclusive_async_requests = false;
|
||||
bool uniformPwlDesign = false;
|
||||
float pwlMaxErrorPercent = 1.0f;
|
||||
|
@ -208,7 +208,7 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
|
||||
connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
|
||||
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
|
||||
// dont see practical use case when bind storage type need to be different that allocation type
|
||||
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
|
||||
gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) {
|
||||
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
|
||||
});
|
||||
}
|
||||
@ -475,7 +475,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
}
|
||||
|
||||
if (num_conv_kernel_padding == 0) {
|
||||
gnamem->readonly().push_local_ptr(ptr_weights,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_weights,
|
||||
transposedWeights.data(),
|
||||
convolution._weights->byteSize(),
|
||||
64);
|
||||
@ -502,19 +502,19 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
offset += padding_zeros.size();
|
||||
}
|
||||
};
|
||||
gnamem->readonly().push_initializer(ptr_weights,
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights,
|
||||
paddedWeightsSize,
|
||||
initializer,
|
||||
64);
|
||||
}
|
||||
|
||||
if (convolution._biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
convolution._biases->cbuffer().as<const void*>(),
|
||||
convolution._biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -600,7 +600,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
ptr_biases);
|
||||
|
||||
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
|
||||
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
|
||||
|
||||
@ -647,18 +646,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
transposedWeights.resize(transposedWeights.size() + kernelPad);
|
||||
}
|
||||
|
||||
gnamem->readonly().push_local_ptr(ptr_weights,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_weights,
|
||||
transposedWeights.data(),
|
||||
transposedWeights.size(),
|
||||
64);
|
||||
|
||||
if (convolution._biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
convolution._biases->cbuffer().as<const void*>(),
|
||||
convolution._biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, out_channels, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -712,14 +711,13 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
true);
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
|
||||
if (gnaFlags->sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
|
||||
} else {
|
||||
IE_ASSERT(quantized != nullptr);
|
||||
if (!gnaFlags->input_low_precision) {
|
||||
@ -727,15 +725,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
static_cast<float>(INT16_MAX)));
|
||||
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset,
|
||||
static_cast<float>(INT32_MAX)));
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
} else {
|
||||
auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale,
|
||||
static_cast<float>(INT8_MAX)));
|
||||
auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset,
|
||||
static_cast<float>(INT8_MAX)));
|
||||
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -799,12 +797,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_pwl_input,
|
||||
ptr_pwl_outputs,
|
||||
ptr_pwl_segments_target);
|
||||
|
||||
connectOutput(layer, ptr_pwl_outputs, num_data_bytes_out);
|
||||
connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0);
|
||||
|
||||
if (ptr_pwl_segments_target != nullptr) {
|
||||
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
|
||||
&ptr_pwl_segments.front(),
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
|
||||
64);
|
||||
@ -876,7 +873,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
|
||||
* outputs->getPrecision().size();
|
||||
|
||||
@ -921,7 +917,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
num_columns_out,
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
|
||||
begin(outputs->getDims()), end(outputs->getDims())), 8)
|
||||
* outputs->getPrecision().size();
|
||||
@ -933,7 +928,6 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
|
||||
|
||||
if (concatLayer == nullptr) {
|
||||
return;
|
||||
}
|
||||
@ -996,13 +990,10 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto layerInfo = LayerInfo(concatParent);
|
||||
// auto layerInfo = LayerInfo(getCreatorLayer(concatLayerInput->insData[it].lock()).lock());
|
||||
if (layerInfo.isInput()) {
|
||||
connectInput(layer, &concatLayerInfo.gna_ptr,
|
||||
inputLayer.tensorSize, inputLayer.offset, idx, false);
|
||||
|
||||
connectInput(layer, &concatLayerInfo.gna_ptr, inputLayer.tensorSize, inputLayer.offset, idx, false);
|
||||
concatLayerInfo.input_allocated = true;
|
||||
} else if (layerInfo.isMemory()) {
|
||||
connectInput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size, inputLayer.offset, idx, false);
|
||||
|
||||
concatLayerInfo.input_allocated = true;
|
||||
}
|
||||
++idx;
|
||||
@ -1114,7 +1105,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
false);
|
||||
|
||||
size_t num_data_bytes_out =
|
||||
InferenceEngine::details::product(
|
||||
begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
||||
@ -1128,8 +1118,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
|
||||
|
||||
(quantized == nullptr) ?
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) :
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1249,7 +1239,6 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
true);
|
||||
|
||||
size_t num_data_bytes_out =
|
||||
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size();
|
||||
|
||||
@ -1262,36 +1251,36 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
switch (eltwise._operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
|
||||
} else {
|
||||
auto scaledIdentity = -quantized->_weights_quant.GetScale();
|
||||
|
||||
if (gnaFlags->input_low_precision == false) {
|
||||
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
} else {
|
||||
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
break;
|
||||
case EltwiseLayer::Sum:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
|
||||
} else {
|
||||
auto scaledIdentity = quantized->_weights_quant.GetScale();
|
||||
|
||||
if (gnaFlags->input_low_precision == false) {
|
||||
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
} else {
|
||||
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int8_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
@ -1299,12 +1288,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
case EltwiseLayer::Prod:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
} else {
|
||||
if (gnaFlags->input_low_precision == false) {
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
} else {
|
||||
gnamem->readonly().push_value<int8_t>(ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
@ -1372,9 +1361,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
|
||||
if (gnaFlags->sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
} else {
|
||||
gnamem->readonly().push_value<int32_t>(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1485,12 +1474,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
|
||||
if (num_padding == 0) {
|
||||
if (!transpose) {
|
||||
gnamem->readonly().push_ptr(ptr_weights,
|
||||
gnamem->readonly().push_ptr(layer, ptr_weights,
|
||||
weightable._weights->cbuffer().as<const void*>(),
|
||||
weightable._weights->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
|
||||
for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
|
||||
auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
|
||||
auto cbuffer = weightable._weights->cbuffer().as<const uint8_t*>() + rowOffset;
|
||||
@ -1519,7 +1508,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
|
||||
auto paddedWeightsSize = paddedWeights * weightable.precision.size();
|
||||
|
||||
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
|
||||
ie_memcpy(data, size,
|
||||
weightable._weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * weightable.precision.size(),
|
||||
@ -1530,16 +1519,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
}
|
||||
|
||||
if (weightable._biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
weightable._biases->cbuffer().as<const void*>(),
|
||||
weightable._biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
|
||||
if (useBiasConnection) {
|
||||
gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1557,7 +1546,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
|
||||
THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
|
||||
}
|
||||
|
||||
gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
|
||||
int out = 0;
|
||||
for (int input = offset; input < num_rows_out + offset; ++input) {
|
||||
auto mem_ptr = reinterpret_cast<uint8_t*>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
|
||||
@ -1624,7 +1613,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
|
||||
size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in
|
||||
* inputs->getPrecision().size();
|
||||
// need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron
|
||||
@ -1681,7 +1669,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
|
||||
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
|
||||
|
||||
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
size_t roffset = weights_offset;
|
||||
size_t woffset = 0;
|
||||
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
|
||||
@ -1696,12 +1684,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
}
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
filterLayer->_biases->cbuffer().as<const void*>(),
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1774,18 +1762,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
gnamem->readonly().push_ptr(ptr_weights,
|
||||
gnamem->readonly().push_ptr(layer, ptr_weights,
|
||||
filterLayer->_weights->cbuffer().as<const void*>(),
|
||||
filterLayer->_weights->byteSize(),
|
||||
64);
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
gnamem->readonly().push_ptr(layer, ptr_biases,
|
||||
filterLayer->_biases->cbuffer().as<const void*>(),
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
|
||||
gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2016,7 +2004,7 @@ case name:\
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
if (ptr_pwl_segments_target != nullptr) {
|
||||
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
|
||||
gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
|
||||
&ptr_pwl_segments.front(),
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
|
||||
64);
|
||||
@ -2152,8 +2140,9 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr,
|
||||
size_t num_data_bytes_out) {
|
||||
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr,
|
||||
size_t num_data_bytes_out) {
|
||||
auto getOffsetForBinding = [](InferenceEngine::CNNLayerPtr layer) {
|
||||
int32_t output_offset = 0;
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
@ -2162,7 +2151,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
return output_offset;
|
||||
};
|
||||
|
||||
|
||||
gnalog() << "Connecting output " << layer->name << " ...\n";
|
||||
// in case of Memory Layer it's input allocated in meminput layer
|
||||
if (layer->outData.size() == 1) {
|
||||
@ -2179,7 +2167,6 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
if (!nextLayer.first) {
|
||||
gnalog() << "for layer: " << layer->name << "outData[0] has non functional connection at " << j;
|
||||
}
|
||||
|
||||
auto nextMemoryLayerIt =
|
||||
std::find_if(begin(memory_connection), end(memory_connection),
|
||||
[&](MemoryConnection::value_type &comp) {
|
||||
@ -2190,14 +2177,13 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
// memory layer not yet initialized
|
||||
if (nextMemoryLayer.reserved_size == 0) {
|
||||
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
|
||||
|
||||
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
|
||||
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
||||
} else {
|
||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
||||
gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2288,7 +2274,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
return it != concatItem.second.concatInputLayers.end();
|
||||
});
|
||||
if (included == concat_connection.end()) {
|
||||
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
|
||||
gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
|
||||
|
||||
std::function<void(GNAConcatLayer, GNAPluginNS::InputDesc&, ConcatConnection&)> allocate_input_recursively =
|
||||
[&allocate_input_recursively](GNAConcatLayer clayer, GNAPluginNS::InputDesc& inputDesc, ConcatConnection& concat_connection) {
|
||||
@ -2321,26 +2307,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
if (layer->params.find("output_offset") != layer->params.end()) {
|
||||
output_offset = layer->GetParamAsInt("output_offset");
|
||||
}
|
||||
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, output_offset);
|
||||
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
intel_dnn_component_t * unused_input = nullptr;
|
||||
if (gnaFlags->compact_mode) {
|
||||
unused_input = find_first_unused_input(layer);
|
||||
if (unused_input != nullptr) {
|
||||
gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out));
|
||||
}
|
||||
}
|
||||
// cannot reuse suitable input
|
||||
if (unused_input == nullptr) {
|
||||
gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out), 64);
|
||||
}
|
||||
auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true,
|
||||
[](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first;
|
||||
// Check that layer will be an output
|
||||
gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64);
|
||||
}
|
||||
|
||||
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx, bool connectTo) {
|
||||
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
void *ptr,
|
||||
size_t num_data_bytes_in,
|
||||
int32_t offset,
|
||||
int idx,
|
||||
bool connectTo) {
|
||||
// selecting particular input layers
|
||||
// auto prevLayer = CNNNetPrevLayer(layer, idx);
|
||||
auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) {
|
||||
@ -2363,12 +2347,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
// real allocation pointer will be kept in ptr not in ptr_inputs_global
|
||||
if (!connectTo) {
|
||||
gnamem->push_value(ptr,
|
||||
gnamem->push_value(nullptr, ptr,
|
||||
static_cast<uint8_t>(0),
|
||||
num_data_bytes_in,
|
||||
64);
|
||||
} else {
|
||||
gnamem->push_value(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
|
||||
gnamem->push_value(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
|
||||
static_cast<uint8_t>(0),
|
||||
num_data_bytes_in,
|
||||
64);
|
||||
@ -2384,9 +2368,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
}
|
||||
|
||||
if (connectTo) {
|
||||
gnamem->bind_ptr(ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
|
||||
gnamem->bind_ptr(nullptr, ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset, ALIGN(num_data_bytes_in, 64));
|
||||
} else {
|
||||
gnamem->bind_ptr(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
|
||||
gnamem->bind_ptr(nullptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
@ -2394,9 +2378,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// const input
|
||||
if (LayerInfo(prevLayer).isConst()) {
|
||||
if (connectTo) {
|
||||
gnamem->bind_ptr(ptr, const_connections[prevLayer->name], offset);
|
||||
gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
|
||||
} else {
|
||||
gnamem->bind_ptr(const_connections[prevLayer->name], ptr, offset);
|
||||
gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
@ -2423,6 +2407,8 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
if (it != splitLayerInfoItem.splitOutputLayers.end()) {
|
||||
gnalog() << "Connecting " << splitName << " input \n";
|
||||
// splitting layer should take the execution order from the connected layer
|
||||
splittingLayer->userValue = layer->userValue;
|
||||
auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
|
||||
gnalog() << "Connected \n";
|
||||
return res;
|
||||
@ -2435,7 +2421,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
if (concatLayerInfo != concat_connection.end()) {
|
||||
auto & concatLayerInfoItem = concatLayerInfo->second;
|
||||
// dnnLayer that is input for concat layer
|
||||
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
|
||||
gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset);
|
||||
// return layer over concat
|
||||
return CNNNetPrevLayer(prevLayer);
|
||||
}
|
||||
@ -2444,7 +2430,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
prevLayer->name);
|
||||
if (cropLayerInfo != crop_connection.end()) {
|
||||
auto & cropLayerInfoItem = cropLayerInfo->second;
|
||||
gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset);
|
||||
gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset);
|
||||
return CNNNetPrevLayer(prevLayer);
|
||||
}
|
||||
}
|
||||
@ -2452,7 +2438,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
// check for generic prev layer
|
||||
if (prevDnnLayer != nullptr) {
|
||||
gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset);
|
||||
gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset);
|
||||
return prevLayer;
|
||||
}
|
||||
|
||||
@ -2470,20 +2456,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// connectTo used for indicate that memory layer should be bound to given buffer
|
||||
if (connectTo) {
|
||||
memorySize = std::max(memorySize, num_data_bytes_in);
|
||||
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
||||
gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
||||
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
|
||||
} else {
|
||||
if (num_data_bytes_in < memorySize + offset) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
||||
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
|
||||
}
|
||||
gnamem->bind_ptr(&memoryLayer.gna_ptr, ptr, offset);
|
||||
gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset);
|
||||
}
|
||||
|
||||
memoryLayer.reserved_size = ALIGN64(memorySize);
|
||||
} else {
|
||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||
gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <legacy/graph_tools.hpp>
|
||||
#include <legacy/net_pass.h>
|
||||
#include <debug.h>
|
||||
@ -524,7 +525,7 @@ bool GNAPlugin::TryToInitOutput(int portId, InferenceEngine::CNNLayerPtr layer)
|
||||
desc.num_elements = numElem;
|
||||
|
||||
// binding ptr for first infer request - then others will be setup during relocation
|
||||
gnamem->bind_ptr(&desc.ptrs.front(), outputPtr);
|
||||
gnamem->bind_ptr(layer, &desc.ptrs.front(), outputPtr);
|
||||
};
|
||||
|
||||
// probing gna_primitives
|
||||
@ -927,7 +928,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
}
|
||||
|
||||
// Creating Layer primitives
|
||||
uint16_t id = 0;
|
||||
for (auto & layer : sortedNoMem) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
layer->userValue.v_int = id++;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
graphCompiler.CreateLayerPrimitive(layer);
|
||||
}
|
||||
|
||||
@ -981,7 +986,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
|
||||
// TODO: how active list will work in multioutput case
|
||||
// make room for active list
|
||||
gnamem->reserve_ptr(nullptr,
|
||||
gnamem->reserve_ptr(nullptr, nullptr,
|
||||
ALIGN64(outputsDesc.front().num_bytes_per_element * outputsDesc.front().num_elements), 64);
|
||||
|
||||
void *pParallelExecutionData = nullptr;
|
||||
@ -989,10 +994,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
// reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest
|
||||
rwSegmentSize = gnamem->getRWBytes();
|
||||
if (gnaFlags->gna_lib_async_threads_num > 1) {
|
||||
gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
|
||||
gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
|
||||
}
|
||||
|
||||
gnamem->commit();
|
||||
gnamem->commit(gnaFlags->compact_mode);
|
||||
|
||||
dnn->Init(gnamem->getBasePtr(),
|
||||
gnamem->getTotalBytes(),
|
||||
@ -1569,7 +1574,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
|
||||
|
||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
||||
void *basePtr = nullptr;
|
||||
gnamem->reserve_ptr(&basePtr, header.gnaMemSize);
|
||||
gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
|
||||
gnamem->commit();
|
||||
#if GNA_LIB_VER == 2
|
||||
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>(header.layersCount)));
|
||||
|
@ -14,6 +14,8 @@
|
||||
* @brief used for creating graphviz charts, and layers dump
|
||||
*/
|
||||
# define PLOT
|
||||
# define MODEL_DUMP
|
||||
# define GNA_HEAP_PROFILER
|
||||
# define gnalog() std::cout
|
||||
# define gnawarn() std::cerr
|
||||
#else
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
|
||||
@ -26,6 +28,45 @@ enum rRegion {
|
||||
REGION_AUTO,
|
||||
};
|
||||
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
inline const char* rRegionToStr(uint8_t region) {
|
||||
const char* strRegion = "UNKNOWN";
|
||||
switch (region) {
|
||||
case REGION_RO:
|
||||
strRegion = "REGION_RO";
|
||||
break;
|
||||
case REGION_RW:
|
||||
strRegion = "REGION_RW";
|
||||
break;
|
||||
case REGION_AUTO:
|
||||
strRegion = "REGION_AUTO";
|
||||
break;
|
||||
}
|
||||
return strRegion;
|
||||
}
|
||||
|
||||
inline const char* rTypeToStr(uint8_t type) {
|
||||
const char* strType = "UNKNOWN";
|
||||
switch (type) {
|
||||
case REQUEST_STORE:
|
||||
strType = "REQUEST_STORE";
|
||||
break;
|
||||
case REQUEST_ALLOCATE:
|
||||
strType = "REQUEST_ALLOCATE";
|
||||
break;
|
||||
case REQUEST_BIND:
|
||||
strType = "REQUEST_BIND";
|
||||
break;
|
||||
case REQUEST_INITIALIZER | REQUEST_STORE:
|
||||
case REQUEST_INITIALIZER | REQUEST_ALLOCATE:
|
||||
case REQUEST_INITIALIZER | REQUEST_BIND:
|
||||
strType = "INITIALIZER";
|
||||
break;
|
||||
}
|
||||
return strType;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct MemRequest {
|
||||
rRegion _region;
|
||||
uint8_t _type;
|
||||
@ -40,6 +81,10 @@ struct MemRequest {
|
||||
size_t _offset = 0;
|
||||
// expansion in bytes due to large depended layers
|
||||
size_t _padding = 0;
|
||||
|
||||
// fields to sort regions by execution availability
|
||||
std::pair<uint16_t, uint16_t> _life_limits{0, UINT16_MAX};
|
||||
|
||||
MemRequest(rRegion region,
|
||||
rType req,
|
||||
void *ptr_out,
|
||||
@ -79,7 +124,8 @@ struct MemRequest {
|
||||
_data.resize(sizeof(T));
|
||||
std::copy(reinterpret_cast<uint8_t *>(&element), reinterpret_cast<uint8_t *>(&element) + sizeof(T), _data.begin());
|
||||
}
|
||||
/**
|
||||
|
||||
/**
|
||||
* Store initializer request
|
||||
* @param req
|
||||
* @param ptr_out
|
||||
|
@ -8,10 +8,23 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include <ie_api.h>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "gna_mem_requests.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
|
||||
/**
|
||||
* @brief get layer id from legacy CNNLayer
|
||||
*/
|
||||
inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
return layer->userValue.v_int;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapter for requests submission and actual request queue
|
||||
*/
|
||||
@ -26,12 +39,26 @@ public:
|
||||
* @param num_bytes
|
||||
* @param alignment
|
||||
*/
|
||||
void push_initializer(void *ptr_out, size_t num_bytes, std::function<void(void * data, size_t size)> initializer, size_t alignment = 1) {
|
||||
void push_initializer(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
size_t num_bytes,
|
||||
std::function<void(void * data, size_t size)> initializer,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
void push_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
|
||||
void push_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
const void *ptr_in,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -40,10 +67,17 @@ public:
|
||||
* @param ptr_in
|
||||
* @param num_bytes
|
||||
*/
|
||||
void push_local_ptr(void *ptr_out, const void *ptr_in, size_t num_bytes, size_t alignment = 1) {
|
||||
void push_local_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
const void *ptr_in,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
localStorage().emplace_back(reinterpret_cast<const uint8_t *>(ptr_in),
|
||||
reinterpret_cast<const uint8_t *>(ptr_in) + num_bytes);
|
||||
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -51,8 +85,14 @@ public:
|
||||
* @param ptr_out
|
||||
* @param num_bytes
|
||||
*/
|
||||
void reserve_ptr(void *ptr_out, size_t num_bytes, size_t alignment = 1) {
|
||||
void reserve_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,8 +103,15 @@ public:
|
||||
* @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(),
|
||||
* if that happens - reserved request parameters will be updated before committing memory
|
||||
*/
|
||||
void bind_ptr(void *source, const void *dest, size_t offset = 0, size_t num_bytes = 0) {
|
||||
void bind_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void *source,
|
||||
const void *dest,
|
||||
size_t offset = 0,
|
||||
size_t num_bytes = 0) {
|
||||
futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -72,16 +119,28 @@ public:
|
||||
* @param ptr_out - previously requested buffer
|
||||
* @param initializer - initialisation routine to be called on allocated memory
|
||||
*/
|
||||
void bind_initializer(void *ptr_out, std::function<void(void * data, size_t size)> initializer) {
|
||||
void bind_initializer(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
std::function<void(void * data, size_t size)> initializer) {
|
||||
futureHeap().push_back({regionType(), ptr_out, 0, initializer, REQUEST_BIND, 1});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief allocates buffer and set all its values to T value
|
||||
*/
|
||||
template<class T>
|
||||
void push_value(void *ptr_out, T value, size_t num_elements, size_t alignment = 1) {
|
||||
void push_value(InferenceEngine::CNNLayerPtr layer,
|
||||
void *ptr_out,
|
||||
T value,
|
||||
size_t num_elements,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -13,7 +13,15 @@
|
||||
#include <list>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "memory_solver.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#endif
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
@ -32,6 +40,7 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
Allocator _allocator;
|
||||
std::shared_ptr<uint8_t> heap = nullptr;
|
||||
size_t _page_alignment = 1;
|
||||
bool _is_compact_mode = false;
|
||||
|
||||
class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
|
||||
std::reference_wrapper<GNAMemRequestsQueue> _that;
|
||||
@ -62,93 +71,32 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
return readOnlyFrontEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief enables memory optimization (compact mode). This mode can be enable in plugin configuration (COMPACT_MODE = Yes)
|
||||
*/
|
||||
void setCompactMode(bool isCompact) {
|
||||
_is_compact_mode = isCompact;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief calculates size required for all requests, allocates memory and updates pointers
|
||||
*/
|
||||
void commit() {
|
||||
void commit(bool isCompact = false) {
|
||||
setCompactMode(isCompact);
|
||||
|
||||
// 1st stage -- looking for expandable bind requests:
|
||||
for (auto &originated : _future_heap) {
|
||||
if (originated._type & REQUEST_BIND) continue;
|
||||
size_t offset = 0;
|
||||
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
|
||||
if (&originated == &reference) {
|
||||
offset = 0;
|
||||
}
|
||||
offset += binded._offset;
|
||||
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
|
||||
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
|
||||
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
|
||||
expandBindings();
|
||||
|
||||
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
|
||||
});
|
||||
}
|
||||
// 2nd stage -- setup offsets:
|
||||
setRegionOffsets(REGION_RO);
|
||||
setRegionOffsets(REGION_RW);
|
||||
|
||||
updateSectionsSizes();
|
||||
// 3rd stage -- allocation total memory setting to 0 internally
|
||||
heap = allocate(getTotalBytes());
|
||||
|
||||
_total = _rw_section_size + _ro_section_size;
|
||||
|
||||
// allocation with memory setting to 0 internally
|
||||
heap = allocate(_total);
|
||||
auto setupOffsets = [&](std::function<bool(MemRequest & request)> filter, size_t offset) {
|
||||
for (auto &re : _future_heap) {
|
||||
if (re._type == REQUEST_BIND) continue;
|
||||
if (filter(re)) continue;
|
||||
|
||||
auto sz = re._element_size * re._num_elements;
|
||||
|
||||
if (re._ptr_out != nullptr) {
|
||||
auto cptr = heap.get() + offset;
|
||||
size_t cptr_avail_size = _total - offset;
|
||||
if (re._type & REQUEST_BIND) {
|
||||
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
|
||||
cptr_avail_size = sz;
|
||||
} else {
|
||||
*reinterpret_cast<void **>(re._ptr_out) = cptr;
|
||||
}
|
||||
// std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n";
|
||||
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
|
||||
*reinterpret_cast<void **>(binded._ptr_out) =
|
||||
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
|
||||
binded._num_elements = reference._num_elements;
|
||||
binded._element_size = reference._element_size;
|
||||
});
|
||||
|
||||
// std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush;
|
||||
|
||||
switch (re._type & ~REQUEST_BIND) {
|
||||
case REQUEST_ALLOCATE :
|
||||
break;
|
||||
case REQUEST_STORE : {
|
||||
if (re._ptr_in != nullptr) {
|
||||
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
|
||||
} else {
|
||||
size_t of = 0;
|
||||
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
|
||||
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case REQUEST_INITIALIZER : {
|
||||
re._initializer(cptr, sz);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!(re._type & REQUEST_BIND)) {
|
||||
offset += ALIGN(sz + re._padding, re._alignment);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
|
||||
// TODO: consume bind requests separately from storage type
|
||||
return !(request._type & REQUEST_BIND) && (request._region != REGION_RW);
|
||||
}, 0);
|
||||
|
||||
setupOffsets([](GNAPluginNS::memory::MemRequest & request) {
|
||||
return (request._type & REQUEST_BIND) || request._region != REGION_RO;
|
||||
}, _rw_section_size);
|
||||
// 4th stage -- store data and updates pointers
|
||||
allocateRegion(REGION_RW, 0);
|
||||
allocateRegion(REGION_RO, _rw_section_size);
|
||||
}
|
||||
|
||||
void *getBasePtr() {
|
||||
@ -180,7 +128,7 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
|
||||
for (auto &re : _future_heap) {
|
||||
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
|
||||
// std::cout << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
|
||||
// std::cout << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
|
||||
visitor(reference, re);
|
||||
// primitive loop check
|
||||
if (re._ptr_in == re._ptr_out) continue;
|
||||
@ -190,7 +138,6 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<uint8_t> allocate(size_t bytes) {
|
||||
std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
|
||||
_allocator.deallocate(p, bytes);
|
||||
@ -200,31 +147,191 @@ class GNAMemory : public GNAMemRequestsQueue {
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order
|
||||
*/
|
||||
void expandBindings() {
|
||||
for (auto &originated : _future_heap) {
|
||||
// skipping bind requests to avoid duplications
|
||||
if (originated._type & REQUEST_BIND) continue;
|
||||
|
||||
size_t offset = 0;
|
||||
iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
|
||||
// aligning sizes
|
||||
if (&originated == &reference) offset = 0;
|
||||
|
||||
offset += binded._offset;
|
||||
auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
|
||||
auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
|
||||
auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
|
||||
|
||||
originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
|
||||
|
||||
// set execution order
|
||||
originated._life_limits.first = std::min(originated._life_limits.first, binded._life_limits.first);
|
||||
originated._life_limits.second = std::max(originated._life_limits.second, binded._life_limits.second);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief set offsets for specific region
|
||||
*/
|
||||
size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) {
|
||||
size_t region_offset = 0;
|
||||
for (auto &re : _future_heap) {
|
||||
if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
|
||||
|
||||
re._offset = region_offset;
|
||||
region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
|
||||
}
|
||||
return region_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief allocates memory and updates pointers
|
||||
*/
|
||||
void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) {
|
||||
for (auto &re : _future_heap) {
|
||||
// skipping Bind, crossregion and empty requests
|
||||
if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
|
||||
|
||||
size_t offset = baseOffset + re._offset;
|
||||
auto cptr = heap.get() + offset;
|
||||
size_t cptr_avail_size = _total - offset;
|
||||
|
||||
auto sz = re._element_size * re._num_elements;
|
||||
if (re._type & REQUEST_BIND) {
|
||||
cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
|
||||
cptr_avail_size = sz;
|
||||
} else {
|
||||
*reinterpret_cast<void **>(re._ptr_out) = cptr;
|
||||
}
|
||||
iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
|
||||
*reinterpret_cast<void **>(binded._ptr_out) =
|
||||
binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
|
||||
binded._num_elements = reference._num_elements;
|
||||
binded._element_size = reference._element_size;
|
||||
});
|
||||
|
||||
switch (re._type & ~REQUEST_BIND) {
|
||||
case REQUEST_ALLOCATE :
|
||||
break;
|
||||
case REQUEST_STORE : {
|
||||
if (re._ptr_in != nullptr) {
|
||||
ie_memcpy(cptr, cptr_avail_size, re._ptr_in, sz);
|
||||
} else {
|
||||
size_t of = 0;
|
||||
for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
|
||||
std::copy(std::begin(re._data), std::end(re._data), cptr + of);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case REQUEST_INITIALIZER : {
|
||||
re._initializer(cptr, sz);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief optimize memory region by reusing buffers
|
||||
*/
|
||||
size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) {
|
||||
size_t memSize = 0;
|
||||
switch (regType) {
|
||||
case REGION_AUTO:
|
||||
case REGION_RW:
|
||||
case REGION_RO: {
|
||||
std::vector<MemorySolver::Box> boxes;
|
||||
for (size_t i = 0; i < _future_heap.size(); ++i) {
|
||||
// skipping BIND, cross-region and empty requests
|
||||
if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding,
|
||||
_future_heap[i]._alignment);
|
||||
int start = _future_heap[i]._life_limits.first;
|
||||
int stop = _future_heap[i]._life_limits.second;
|
||||
|
||||
boxes.push_back({start, stop, static_cast<int64_t>(original_with_pad), static_cast<int64_t>(i)});
|
||||
}
|
||||
MemorySolver memSolver(boxes);
|
||||
memSize = memSolver.solve();
|
||||
|
||||
// setting offsets
|
||||
for (auto const & box : boxes) {
|
||||
_future_heap[box.id]._offset = memSolver.getOffset(box.id);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return memSize;
|
||||
}
|
||||
|
||||
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
void memoryDump(std::function<bool(MemRequest & re)> filter) {
|
||||
std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out);
|
||||
|
||||
for (auto &re : _future_heap) {
|
||||
if (filter(re)) continue;
|
||||
dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", "
|
||||
<< "type: " << std::setw(17) << rTypeToStr(re._type) << " "
|
||||
<< "ptr_in: " << std::setw(15) << re._ptr_in << " "
|
||||
<< "ptr_out: " << std::setw(15) << re._ptr_out << " "
|
||||
<< std::setw(8) << re._num_elements << ", "
|
||||
<< static_cast<int>(re._element_size) << ", "
|
||||
<< re._padding << ", "
|
||||
<< std::setw(3) << re._alignment << ", "
|
||||
<< std::setw(8) << re._offset << ", "
|
||||
<< "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", "
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void updateSectionsSizes() {
|
||||
// count total size and size of read/write regions
|
||||
_rw_section_size = 0;
|
||||
_ro_section_size = 0;
|
||||
for (auto &re : _future_heap) {
|
||||
auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
|
||||
#ifdef GNA_HEAP_PROFILER
|
||||
std::cout << "chunk: " << " region: " << re._region << ", " <<
|
||||
"type: " << (re._type == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind " : "alloc ") <<
|
||||
std::setw(10) << re._num_elements << ", " <<
|
||||
static_cast<int>(re._element_size) << ", " <<
|
||||
re._padding << ", " <<
|
||||
re._offset << ", " <<
|
||||
re._alignment << std::endl;
|
||||
memoryDump([](GNAPluginNS::memory::MemRequest & request) {
|
||||
return false;
|
||||
});
|
||||
#endif
|
||||
if (re._type == REQUEST_BIND) continue;
|
||||
for (auto &re : _future_heap) {
|
||||
if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
|
||||
|
||||
size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
|
||||
if (re._region == REGION_RW) {
|
||||
_rw_section_size += current;
|
||||
} else {
|
||||
_ro_section_size += current;
|
||||
}
|
||||
}
|
||||
|
||||
if (_is_compact_mode) {
|
||||
_rw_section_size = getSectionSizeOptimized(REGION_RW);
|
||||
}
|
||||
|
||||
gnalog() << "ro_section_size: " << _ro_section_size << std::endl;
|
||||
gnalog() << "rw_section_size: " << _rw_section_size << std::endl;
|
||||
gnalog() << "total: " << _total << std::endl;
|
||||
|
||||
_rw_section_size = ALIGN(_rw_section_size, _page_alignment);
|
||||
_ro_section_size = ALIGN(_ro_section_size, _page_alignment);
|
||||
_total = _rw_section_size + _ro_section_size;
|
||||
|
||||
gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl;
|
||||
gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl;
|
||||
}
|
||||
};
|
||||
} // namespace memory
|
||||
|
250
inference-engine/tests/unit/gna/gna_memory_compact_test.cpp
Normal file
250
inference-engine/tests/unit/gna/gna_memory_compact_test.cpp
Normal file
@ -0,0 +1,250 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "memory/gna_memory.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace GNAPluginNS::memory;
|
||||
|
||||
class GNAMemoryCompactTest : public ::testing::Test {
|
||||
protected:
|
||||
GNAMemory<std::allocator<uint8_t>> mem;
|
||||
bool isCompact = true;
|
||||
|
||||
void SetUp() override {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizeReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
|
||||
mem.reserve_ptr(layer1, pFuture1, 3 * sizeof(float));
|
||||
mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float));
|
||||
ASSERT_EQ(mem.getTotalBytes(), 3 * sizeof(float));
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizePushValue) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
|
||||
mem.push_value(layer1, pFuture1, 1.f, 2);
|
||||
mem.push_value(layer2, pFuture2, 2.f, 3);
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float));
|
||||
ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float));
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
mem.push_value(layer1, pFuture1, 3.f, 2);
|
||||
mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
|
||||
mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
|
||||
ASSERT_EQ(mem.getTotalBytes(), 2 * sizeof(float));
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
CNNLayerPtr layer4 = std::make_shared<CNNLayer>(LayerParams("layer4", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
layer4->userValue.v_int = 4;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
mem.push_value(layer1, pFuture1, 1.f, 2);
|
||||
mem.push_value(layer2, pFuture2, 2.f, 3);
|
||||
mem.reserve_ptr(layer3, pFuture3, 5 * sizeof(float));
|
||||
mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float));
|
||||
ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float));
|
||||
}
|
||||
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizePushPtrAndReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float input[] = {1, 2, 3};
|
||||
size_t input_size = sizeof(input);
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
mem.push_ptr(layer1, pFuture1, input, input_size);
|
||||
mem.reserve_ptr(layer2, pFuture2, input_size);
|
||||
mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), input_size);
|
||||
ASSERT_EQ(mem.getTotalBytes(), input_size);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
size_t input_size;
|
||||
{
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
input_size = input.size() * sizeof(float);
|
||||
mem.push_local_ptr(layer1, pFuture1, &*input.begin(), input_size);
|
||||
}
|
||||
|
||||
mem.reserve_ptr(layer2, pFuture2, input_size);
|
||||
mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), input_size);
|
||||
ASSERT_EQ(mem.getTotalBytes(), input_size);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
size_t input_size;
|
||||
{
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f};
|
||||
input_size = input.size() * sizeof(float);
|
||||
mem.push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size){
|
||||
ie_memcpy(data, size, &input[0], input.size());
|
||||
});
|
||||
}
|
||||
|
||||
mem.reserve_ptr(layer2, pFuture2, 2 * input_size);
|
||||
mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 2 * input_size);
|
||||
ASSERT_EQ(mem.getTotalBytes(), 2 * input_size);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
CNNLayerPtr layer4 = std::make_shared<CNNLayer>(LayerParams("layer4", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
layer4->userValue.v_int = 4;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
float* pFuture4 = reinterpret_cast<float*>(&pFuture4);
|
||||
|
||||
{
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f};
|
||||
mem.bind_initializer(layer2, pFuture1, [=](void* data, size_t size){
|
||||
ie_memcpy(data, size, &input[0], input.size());
|
||||
});
|
||||
}
|
||||
|
||||
mem.reserve_ptr(layer1, pFuture1, 4 * sizeof(float));
|
||||
mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
|
||||
mem.bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float));
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float));
|
||||
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
CNNLayerPtr layer1 = std::make_shared<CNNLayer>(LayerParams("layer1", "test", Precision::FP32));
|
||||
CNNLayerPtr layer2 = std::make_shared<CNNLayer>(LayerParams("layer2", "test", Precision::FP32));
|
||||
CNNLayerPtr layer3 = std::make_shared<CNNLayer>(LayerParams("layer3", "test", Precision::FP32));
|
||||
layer1->userValue.v_int = 1;
|
||||
layer2->userValue.v_int = 2;
|
||||
layer3->userValue.v_int = 3;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
mem.reserve_ptr(layer1, pFuture1, 2 * sizeof(float));
|
||||
mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
|
||||
mem.bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float));
|
||||
|
||||
mem.commit(isCompact);
|
||||
ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float));
|
||||
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
|
||||
}
|
@ -9,7 +9,6 @@
|
||||
using namespace GNAPluginNS::memory;
|
||||
|
||||
class GNAMemoryTest : public ::testing::Test {
|
||||
|
||||
protected:
|
||||
GNAMemory<std::allocator<uint8_t>> mem;
|
||||
|
||||
@ -17,12 +16,12 @@ class GNAMemoryTest : public ::testing::Test {
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(GNAMemoryTest, canStoreActualBlob){
|
||||
float input [] = {1,2,3};
|
||||
TEST_F(GNAMemoryTest, canStoreActualBlob) {
|
||||
float input[] = {1, 2, 3};
|
||||
float* pFuture = nullptr;
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_NE(pFuture, nullptr);
|
||||
@ -33,12 +32,12 @@ TEST_F(GNAMemoryTest, canStoreActualBlob){
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canStore2Blobs) {
|
||||
float input [] = {1,2,3,4};
|
||||
float input[] = {1, 2, 3, 4};
|
||||
float* pFuture = nullptr;
|
||||
float* pFuture2 = nullptr;
|
||||
|
||||
mem.push_ptr(&pFuture, input, 3*4);
|
||||
mem.push_ptr(&pFuture2, input+1, 3*4);
|
||||
mem.push_ptr(nullptr, &pFuture, input, 3*4);
|
||||
mem.push_ptr(nullptr, &pFuture2, input+1, 3*4);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_NE(pFuture, input);
|
||||
@ -54,10 +53,10 @@ TEST_F(GNAMemoryTest, canStore2Blobs) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
|
||||
float input [] = {1,2,3,4,5,6,7,8};
|
||||
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float* pFuture = nullptr;
|
||||
|
||||
mem.push_ptr(&pFuture, input, 3*4, 8);
|
||||
mem.push_ptr(nullptr, &pFuture, input, 3*4, 8);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_EQ(16 , mem.getTotalBytes());
|
||||
@ -73,12 +72,12 @@ TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
|
||||
float input [] = {1,2,3,4,5,6,7,8};
|
||||
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float* pFuture = nullptr;
|
||||
float* pFuture2 = nullptr;
|
||||
|
||||
mem.push_ptr(&pFuture, input, 3*4, 8);
|
||||
mem.push_ptr(&pFuture2, input, 3*4, 16);
|
||||
mem.push_ptr(nullptr, &pFuture, input, 3*4, 8);
|
||||
mem.push_ptr(nullptr, &pFuture2, input, 3*4, 16);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_EQ(32 , mem.getTotalBytes());
|
||||
@ -92,33 +91,30 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
|
||||
ASSERT_EQ(pFuture[4], 1);
|
||||
ASSERT_EQ(pFuture[5], 2);
|
||||
ASSERT_EQ(pFuture[6], 3);
|
||||
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canReserveData) {
|
||||
|
||||
float* pFuture = nullptr;
|
||||
mem.reserve_ptr(&pFuture, 3*4);
|
||||
mem.reserve_ptr(nullptr, &pFuture, 3*4);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_NE(pFuture, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canReserveDataByVoid) {
|
||||
mem.reserve_ptr(nullptr, 3*4);
|
||||
mem.reserve_ptr(nullptr, nullptr, 3*4);
|
||||
ASSERT_NO_THROW(mem.commit());
|
||||
}
|
||||
|
||||
|
||||
TEST_F(GNAMemoryTest, canReserveAndPushData) {
|
||||
|
||||
float input[] = {1, 2, 3};
|
||||
float *pFuture = nullptr;
|
||||
float* pFuture2 = nullptr;
|
||||
size_t len = sizeof(input) ;
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.reserve_ptr(&pFuture2, 3*4);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
mem.reserve_ptr(nullptr, &pFuture2, 3*4);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_NE(pFuture, nullptr);
|
||||
@ -136,16 +132,15 @@ TEST_F(GNAMemoryTest, canReserveAndPushData) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canBindAndResolve) {
|
||||
|
||||
float input[] = {1, 2, 3};
|
||||
float *pFuture = nullptr;
|
||||
float *pFuture2 = nullptr;
|
||||
float *pFuture3 = nullptr;
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.bind_ptr(&pFuture3, &pFuture);
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.bind_ptr(&pFuture2, &pFuture);
|
||||
mem.bind_ptr(nullptr, &pFuture3, &pFuture);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
mem.bind_ptr(nullptr, &pFuture2, &pFuture);
|
||||
|
||||
mem.commit();
|
||||
|
||||
@ -160,16 +155,15 @@ TEST_F(GNAMemoryTest, canBindAndResolve) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) {
|
||||
|
||||
float input[] = {1, 2, 3};
|
||||
float *pFuture = nullptr;
|
||||
float *pFuture3 = nullptr;
|
||||
float *pFuture4 = nullptr;
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.bind_ptr(&pFuture4, &pFuture3);
|
||||
mem.bind_ptr(&pFuture3, &pFuture);
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.bind_ptr(nullptr, &pFuture4, &pFuture3);
|
||||
mem.bind_ptr(nullptr, &pFuture3, &pFuture);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
|
||||
mem.commit();
|
||||
|
||||
@ -185,16 +179,15 @@ TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) {
|
||||
|
||||
float input[] = {1, 2, 3};
|
||||
float *pFuture = nullptr;
|
||||
float *pFuture3 = nullptr;
|
||||
float *pFuture4 = nullptr;
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.bind_ptr(&pFuture4, &pFuture3, 4);
|
||||
mem.bind_ptr(&pFuture3, &pFuture, 4);
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.bind_ptr(nullptr, &pFuture4, &pFuture3, 4);
|
||||
mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
|
||||
mem.commit();
|
||||
|
||||
@ -210,16 +203,15 @@ TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) {
|
||||
|
||||
float input[] = {1, 2, 3};
|
||||
float *pFuture = nullptr;
|
||||
float *pFuture2 = nullptr;
|
||||
float *pFuture3 = nullptr;
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.bind_ptr(&pFuture3, &pFuture, 4);
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.bind_ptr(&pFuture2, &pFuture);
|
||||
mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
mem.bind_ptr(nullptr, &pFuture2, &pFuture);
|
||||
|
||||
mem.commit();
|
||||
|
||||
@ -237,12 +229,11 @@ TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) {
|
||||
|
||||
|
||||
TEST_F(GNAMemoryTest, canPushLocal) {
|
||||
|
||||
float* pFuture = (float*)&pFuture;
|
||||
float* pFuture = reinterpret_cast<float*>(&pFuture);
|
||||
|
||||
{
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
mem.push_local_ptr(pFuture, &*input.begin(), 4 * 4, 1);
|
||||
mem.push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1);
|
||||
}
|
||||
|
||||
//poison stack
|
||||
@ -255,13 +246,12 @@ TEST_F(GNAMemoryTest, canPushLocal) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canPushValue) {
|
||||
|
||||
float* pFuture = (float*)&pFuture;
|
||||
float* pFuture2 = (float*)&pFuture2;
|
||||
float* pFuture = reinterpret_cast<float*>(&pFuture);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
|
||||
{
|
||||
mem.push_value(pFuture, 3.f, 2);
|
||||
mem.push_value(pFuture2, 13.f, 2);
|
||||
mem.push_value(nullptr, pFuture, 3.f, 2);
|
||||
mem.push_value(nullptr, pFuture2, 13.f, 2);
|
||||
}
|
||||
|
||||
mem.commit();
|
||||
@ -273,13 +263,12 @@ TEST_F(GNAMemoryTest, canPushValue) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canPushReadOnlyValue) {
|
||||
|
||||
float* pFuture = (float*)&pFuture;
|
||||
float* pFuture2 = (float*)&pFuture2;
|
||||
float* pFuture = reinterpret_cast<float*>(&pFuture);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
|
||||
{
|
||||
mem.push_value(pFuture, 3.f, 2);
|
||||
mem.readonly().push_value(pFuture2, 13.f, 2);
|
||||
mem.push_value(nullptr, pFuture, 3.f, 2);
|
||||
mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
|
||||
}
|
||||
|
||||
mem.commit();
|
||||
@ -290,10 +279,37 @@ TEST_F(GNAMemoryTest, canPushReadOnlyValue) {
|
||||
ASSERT_FLOAT_EQ(pFuture[3], 13);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
|
||||
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeEmptyReqs) {
|
||||
mem.push_value(nullptr, nullptr, 3.f, 2);
|
||||
mem.readonly().push_value(nullptr, nullptr, 13.f, 2);
|
||||
mem.commit();
|
||||
|
||||
mem.push_value(nullptr, 3.f, 2);
|
||||
mem.readonly().push_value(nullptr, 13.f, 2);
|
||||
ASSERT_EQ(mem.getTotalBytes(), 0);
|
||||
ASSERT_EQ(mem.getRWBytes(), 0);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithEmptyReqs) {
|
||||
// empty request before
|
||||
mem.push_value(nullptr, nullptr, 3.f, 2);
|
||||
// not empty requests
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
mem.push_value(nullptr, pFuture1, 3.f, 2);
|
||||
mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
|
||||
// empty request after
|
||||
mem.readonly().push_value(nullptr, nullptr, 13.f, 2);
|
||||
|
||||
mem.commit();
|
||||
|
||||
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
|
||||
ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
mem.push_value(nullptr, pFuture1, 3.f, 2);
|
||||
mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
|
||||
@ -301,11 +317,12 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
|
||||
|
||||
GNAMemory<std::allocator<uint8_t>> memAligned(64);
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
|
||||
memAligned.push_value(nullptr, 3.f, 2);
|
||||
memAligned.readonly().push_value(nullptr, 13.f, 2);
|
||||
memAligned.push_value(nullptr, pFuture1, 3.f, 2);
|
||||
memAligned.readonly().push_value(nullptr, pFuture2, 13.f, 2);
|
||||
memAligned.commit();
|
||||
|
||||
ASSERT_EQ(memAligned.getTotalBytes(), 128);
|
||||
@ -313,15 +330,13 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) {
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
|
||||
|
||||
float* pFuture2 = (float*)&pFuture2;
|
||||
float* pFuture1 = (float*)&pFuture1;
|
||||
float* pFuture3 = (float*)&pFuture3;
|
||||
|
||||
|
||||
mem.readonly().push_value(pFuture1, 3.f, 2);
|
||||
mem.push_value(pFuture2, 13.f, 3);
|
||||
mem.readonly().push_value(pFuture3, 32.f, 4);
|
||||
mem.readonly().push_value(nullptr, pFuture1, 3.f, 2);
|
||||
mem.push_value(nullptr, pFuture2, 13.f, 3);
|
||||
mem.readonly().push_value(nullptr, pFuture3, 32.f, 4);
|
||||
mem.commit();
|
||||
|
||||
ASSERT_EQ(mem.getTotalBytes(), (2+3+4) * sizeof(float));
|
||||
@ -346,16 +361,15 @@ TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) {
|
||||
|
||||
TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequest) {
|
||||
float input[] = {1, 2, 3};
|
||||
|
||||
float *pFuture = nullptr;
|
||||
float *pFuture2 = nullptr;
|
||||
float *pFuture3 = nullptr;
|
||||
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.bind_ptr(&pFuture2, &pFuture, len, len);
|
||||
mem.bind_ptr(&pFuture3, &pFuture2, 2 * len, len);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
|
||||
mem.bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len);
|
||||
|
||||
mem.commit();
|
||||
|
||||
@ -385,9 +399,9 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenPush) {
|
||||
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.push_ptr(&pFuture, input, len);
|
||||
mem.bind_ptr(&pFuture2, &pFuture, len, len);
|
||||
mem.push_ptr(&pFutureInput2, input2, len);
|
||||
mem.push_ptr(nullptr, &pFuture, input, len);
|
||||
mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
|
||||
mem.push_ptr(nullptr, &pFutureInput2, input2, len);
|
||||
|
||||
mem.commit();
|
||||
|
||||
@ -416,9 +430,9 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) {
|
||||
|
||||
size_t len = sizeof(input);
|
||||
|
||||
mem.reserve_ptr(&pFuture, len);
|
||||
mem.bind_ptr(&pFuture2, &pFuture, len, len);
|
||||
mem.push_ptr(&pFutureInput, input, len);
|
||||
mem.reserve_ptr(nullptr, &pFuture, len);
|
||||
mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
|
||||
mem.push_ptr(nullptr, &pFutureInput, input, len);
|
||||
|
||||
mem.commit();
|
||||
|
@ -19,7 +19,7 @@ const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
|
||||
{GNA_CONFIG_KEY(EXEC_TARGET), ""},
|
||||
{GNA_CONFIG_KEY(COMPILE_TARGET), ""},
|
||||
{GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW_EXACT},
|
||||
{GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(NO)},
|
||||
{GNA_CONFIG_KEY(COMPACT_MODE), CONFIG_VALUE(YES)},
|
||||
{CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
|
||||
{GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name()},
|
||||
{GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(NO)},
|
||||
|
@ -8,6 +8,7 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
Loading…
Reference in New Issue
Block a user