[GNA] Refactor memory alignment macro usage (#15749)
* Add possibility to use memory alignment different than 64B * update tests for new memory api * Remove ineffective code * [FIX] Fix memory alignment issues for graph compiler primitives * Update after review
This commit is contained in:
parent
625890c666
commit
5311ab0938
@ -55,6 +55,8 @@ constexpr uint32_t bytesPerSplitElement = 2;
|
||||
// In fp32 mode this is not necessary but is useful for testing
|
||||
constexpr uint32_t bytesPerCropElement = 2;
|
||||
|
||||
constexpr uint32_t kMemoryAlignmentBytes = 64;
|
||||
|
||||
inline bool isCropAffinedOffset(size_t numberOfElements) {
|
||||
const auto cropOffset = numberOfElements * bytesPerCropElement;
|
||||
return (ALIGN64(cropOffset) != cropOffset);
|
||||
|
@ -48,6 +48,8 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
|
||||
GetGnaLibraryVersion();
|
||||
|
||||
maxLayersCount_ = retrieveMaxLayersCount();
|
||||
|
||||
m_mem_alignment = limitations::kMemoryAlignmentBytes;
|
||||
}
|
||||
|
||||
GNADeviceHelper::~GNADeviceHelper() {
|
||||
@ -583,6 +585,5 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
|
||||
return kMaxLayersCountGNA3_X;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -47,6 +47,7 @@ class GNADeviceHelper : public GNADevice {
|
||||
uint32_t nGnaDeviceIndex = 0;
|
||||
bool useDeviceEmbeddedExport = false;
|
||||
uint32_t maxLayersCount_ = 0;
|
||||
size_t m_mem_alignment = 0;
|
||||
|
||||
static const uint32_t TotalGna2InstrumentationPoints = 2;
|
||||
Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
|
||||
@ -127,6 +128,10 @@ public:
|
||||
return allAllocations;
|
||||
}
|
||||
|
||||
const size_t getMemAlignment() const {
|
||||
return m_mem_alignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see GNADevice::createModel()
|
||||
*/
|
||||
|
@ -619,8 +619,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
|
||||
ptr_weights,
|
||||
transposedWeights.data(),
|
||||
convolution._weights->byteSize(),
|
||||
64);
|
||||
convolution._weights->byteSize());
|
||||
} else {
|
||||
auto paddedWeights = num_filter_coefficients * num_filters;
|
||||
auto paddedWeightsSize = paddedWeights * convolution.precision.size();
|
||||
@ -651,17 +650,16 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
}
|
||||
};
|
||||
|
||||
gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, initializer, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, initializer);
|
||||
}
|
||||
|
||||
if (convolution._biases) {
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_biases,
|
||||
convolution._biases->cbuffer().as<const void*>(),
|
||||
convolution._biases->byteSize(),
|
||||
64);
|
||||
convolution._biases->byteSize());
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels);
|
||||
}
|
||||
}
|
||||
|
||||
@ -803,20 +801,15 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
transposedWeights.resize(transposedWeights.size() + kernelPad);
|
||||
}
|
||||
|
||||
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
|
||||
ptr_weights,
|
||||
transposedWeights.data(),
|
||||
transposedWeights.size(),
|
||||
64);
|
||||
gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights, transposedWeights.data(), transposedWeights.size());
|
||||
|
||||
if (convolution._biases) {
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_biases,
|
||||
convolution._biases->cbuffer().as<const void*>(),
|
||||
convolution._biases->byteSize(),
|
||||
64);
|
||||
convolution._biases->byteSize());
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels);
|
||||
}
|
||||
}
|
||||
|
||||
@ -835,13 +828,11 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
uint32_t num_rows_in = reshaped_dims[1];
|
||||
uint32_t num_columns_in = reshaped_dims[0];
|
||||
uint32_t num_rows_out = num_rows_in;
|
||||
uint32_t num_columns_out = num_columns_in;
|
||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
||||
|
||||
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) *
|
||||
outputs->getPrecision().size();
|
||||
|
||||
size_t num_data_bytes_in = InferenceEngine::details::product(begin(input->getDims()), end(input->getDims())) *
|
||||
input->getPrecision().size();
|
||||
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
|
||||
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
|
||||
|
||||
if (power.power == 1.0f) {
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -875,8 +866,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
if (gna_config.gnaFlags.sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, power.scale, num_rows_out + num_padding);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, power.offset, num_rows_out + num_padding);
|
||||
} else {
|
||||
IE_ASSERT(quantized != nullptr);
|
||||
if (!gna_config.gnaFlags.input_low_precision) {
|
||||
@ -884,15 +875,27 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast<float>(INT16_MAX)));
|
||||
auto quantizedOffset = FloatToInt32(
|
||||
std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast<float>(INT32_MAX)));
|
||||
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer,
|
||||
ptr_weights,
|
||||
quantizedScale,
|
||||
num_rows_out + num_padding);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer,
|
||||
ptr_biases,
|
||||
quantizedOffset,
|
||||
num_rows_out + num_padding);
|
||||
} else {
|
||||
auto quantizedScale = FloatToInt8(
|
||||
std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast<float>(INT8_MAX)));
|
||||
auto quantizedOffset = FloatToInt8(
|
||||
std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast<float>(INT8_MAX)));
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
|
||||
ptr_weights,
|
||||
quantizedScale,
|
||||
num_rows_out + num_padding);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
|
||||
ptr_biases,
|
||||
quantizedOffset,
|
||||
num_rows_out + num_padding);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -954,8 +957,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
|
||||
ptr_pwl_segments_target,
|
||||
&ptr_pwl_segments.front(),
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
|
||||
64);
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1284,9 +1286,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
FillWeightOfAligningFilter(layer, ptr_weights, crop_params.start_offset, (quantized == nullptr) ? false : true);
|
||||
|
||||
(quantized == nullptr)
|
||||
? gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64)
|
||||
: gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
(quantized == nullptr) ? gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out)
|
||||
: gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1383,6 +1384,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
uint32_t num_rows_in = in_4b_total_size;
|
||||
uint32_t num_columns_in = 1;
|
||||
uint32_t num_rows_out = num_rows_in;
|
||||
uint32_t num_columns_out = num_columns_in;
|
||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -1408,9 +1410,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
true);
|
||||
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) *
|
||||
outputs->getPrecision().size();
|
||||
|
||||
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
|
||||
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->getPrecision().size();
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
@ -1419,7 +1419,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
switch (eltwise._operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, -1.0f, num_rows_out + num_padding);
|
||||
} else {
|
||||
auto scaledIdentity = -quantized->_weights_quant.GetScale();
|
||||
|
||||
@ -1428,23 +1428,21 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer,
|
||||
ptr_weights,
|
||||
quantizedIdentity,
|
||||
num_rows_out,
|
||||
64);
|
||||
num_rows_out + num_padding);
|
||||
} else {
|
||||
auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
|
||||
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
|
||||
ptr_weights,
|
||||
quantizedIdentity,
|
||||
num_rows_out,
|
||||
64);
|
||||
num_rows_out + num_padding);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
break;
|
||||
case EltwiseLayer::Sum:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, 1.0f, num_rows_out + num_padding);
|
||||
} else {
|
||||
auto scaledIdentity = quantized->_weights_quant.GetScale();
|
||||
|
||||
@ -1454,16 +1452,14 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer,
|
||||
ptr_weights,
|
||||
quantizedIdentity,
|
||||
num_rows_out,
|
||||
64);
|
||||
num_rows_out + num_padding);
|
||||
} else {
|
||||
auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
|
||||
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer,
|
||||
ptr_weights,
|
||||
quantizedIdentity,
|
||||
num_rows_out,
|
||||
64);
|
||||
num_rows_out + num_padding);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
@ -1471,12 +1467,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
case EltwiseLayer::Prod:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding);
|
||||
} else {
|
||||
if (gna_config.gnaFlags.input_low_precision == false) {
|
||||
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out + num_padding);
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, 0, num_rows_out + num_padding);
|
||||
}
|
||||
}
|
||||
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
@ -1496,7 +1492,8 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto input_1 = layer->insData[0].lock();
|
||||
auto input_2 = layer->insData[1].lock(); // the second input corresponds to ptr_weights in component
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto inputPrecision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
|
||||
auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
|
||||
auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
|
||||
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
|
||||
|
||||
auto in_dims = input_1->getDims();
|
||||
@ -1505,7 +1502,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
uint32_t num_columns_in = batch_size;
|
||||
const auto out_dims = outputs->getDims();
|
||||
const auto out_dims_size = ngraph::shape_size(out_dims);
|
||||
const auto bytes_per_output = outputs->getPrecision().size();
|
||||
uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
|
||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
||||
|
||||
@ -1521,9 +1517,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
num_rows_in + num_padding,
|
||||
num_columns_in,
|
||||
num_rows_out,
|
||||
inputPrecision.size(),
|
||||
bytes_per_output,
|
||||
quantized == nullptr ? input_2->getPrecision().size() : 2,
|
||||
input1_precision.size(),
|
||||
outputs->getPrecision().size(),
|
||||
input2_precision.size(),
|
||||
quantized == nullptr ? input_2->getPrecision().size() : 4,
|
||||
GetScaleFactor(layer, QuantizedDataType::weights),
|
||||
GetScaleFactor(layer, QuantizedDataType::output),
|
||||
@ -1533,21 +1529,18 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
ptr_biases,
|
||||
false);
|
||||
|
||||
const auto num_data_bytes_out = out_dims_size * bytes_per_output;
|
||||
|
||||
size_t num_data_bytes_in_1 = InferenceEngine::details::product(begin(input_1->getDims()), end(input_1->getDims())) *
|
||||
input_1->getPrecision().size();
|
||||
size_t num_data_bytes_in_2 = InferenceEngine::details::product(begin(input_2->getDims()), end(input_2->getDims())) *
|
||||
input_2->getPrecision().size();
|
||||
size_t num_data_bytes_out = out_dims_size * outputs->getPrecision().size();
|
||||
size_t num_data_bytes_in_1 = (num_rows_in + num_padding) * num_columns_in * input1_precision.size();
|
||||
size_t num_data_bytes_in_2 = (num_rows_in + num_padding) * num_columns_in * num_rows_out * input2_precision.size();
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
connectInput(layer, ptr_input_1, num_data_bytes_in_1);
|
||||
connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
|
||||
if (gna_config.gnaFlags.sw_fp32) {
|
||||
IE_ASSERT(quantized == nullptr);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out);
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1580,6 +1573,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
|
||||
uint32_t num_columns_in = batch_size;
|
||||
uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
|
||||
uint32_t num_columns_out = num_columns_in;
|
||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
||||
uint32_t num_padding_out = isDiag ? num_padding : 0;
|
||||
|
||||
@ -1624,7 +1618,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
ptr_biases,
|
||||
isDiag);
|
||||
|
||||
size_t num_data_bytes_out = num_columns_in * (num_rows_out + num_padding_out) * outputs->getPrecision().size();
|
||||
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding_out) * outputs->getPrecision().size();
|
||||
|
||||
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->getPrecision().size();
|
||||
|
||||
@ -1667,8 +1661,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_weights,
|
||||
weightable._weights->cbuffer().as<const void*>(),
|
||||
weightable._weights->byteSize(),
|
||||
64);
|
||||
weightable._weights->byteSize());
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_initializer(
|
||||
layer,
|
||||
@ -1695,8 +1688,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
64);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
if (transpose) {
|
||||
@ -1715,22 +1707,20 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
ie_memcpy(data, size, weightsBuffer + num_rows_in * i * wpSize, num_rows_in * wpSize);
|
||||
data = reinterpret_cast<uint8_t*>(data) + (num_rows_in + num_padding) * wpSize;
|
||||
}
|
||||
},
|
||||
64);
|
||||
});
|
||||
}
|
||||
|
||||
if (weightable._biases) {
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_biases,
|
||||
weightable._biases->cbuffer().as<const void*>(),
|
||||
weightable._biases->byteSize(),
|
||||
64);
|
||||
weightable._biases->byteSize());
|
||||
} else {
|
||||
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
|
||||
if (useBiasConnection) {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding);
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1769,8 +1759,7 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
|
||||
}
|
||||
++out;
|
||||
}
|
||||
},
|
||||
64);
|
||||
});
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
@ -1880,11 +1869,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
|
||||
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
|
||||
|
||||
gnamem->getQueue(REGION_RO)->push_initializer(
|
||||
layer,
|
||||
ptr_weights,
|
||||
paddedWeightsSize,
|
||||
[=](void* data, size_t size) {
|
||||
gnamem->getQueue(REGION_RO)
|
||||
->push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
size_t roffset = weights_offset;
|
||||
size_t woffset = 0;
|
||||
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
|
||||
@ -1895,18 +1881,16 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
roffset += weights_stride;
|
||||
woffset += elementsIn * weightsElementSize;
|
||||
}
|
||||
},
|
||||
64);
|
||||
});
|
||||
}
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_biases,
|
||||
filterLayer->_biases->cbuffer().as<const void*>(),
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
filterLayer->_biases->byteSize());
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1986,17 +1970,15 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_weights,
|
||||
filterLayer->_weights->cbuffer().as<const void*>(),
|
||||
filterLayer->_weights->byteSize(),
|
||||
64);
|
||||
filterLayer->_weights->byteSize());
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->getQueue(REGION_RO)->push_ptr(layer,
|
||||
ptr_biases,
|
||||
filterLayer->_biases->cbuffer().as<const void*>(),
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
filterLayer->_biases->byteSize());
|
||||
} else {
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
|
||||
gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, numberOfFilters);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2232,8 +2214,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
gnamem->getQueue(REGION_RO)->push_local_ptr(layer,
|
||||
ptr_pwl_segments_target,
|
||||
&ptr_pwl_segments.front(),
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
|
||||
64);
|
||||
ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2409,9 +2390,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
|
||||
auto& nextMemoryLayer = nextMemoryLayerIt->second;
|
||||
// memory layer not yet initialized
|
||||
if (nextMemoryLayer.reserved_size == 0) {
|
||||
nextMemoryLayer.reserved_size = ALIGN64(nextMemoryLayer.getByteSize());
|
||||
nextMemoryLayer.reserved_size = ALIGN(nextMemoryLayer.getByteSize(), gnamem->getDataMemAlignment());
|
||||
gnamem->getQueue(REGION_STATES)
|
||||
->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size, 64);
|
||||
->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_size);
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
|
||||
} else {
|
||||
@ -2422,7 +2403,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
|
||||
ptr,
|
||||
&nextMemoryLayer.gna_ptr,
|
||||
getOffsetForBinding(layer),
|
||||
ALIGN64(num_data_bytes_out));
|
||||
num_data_bytes_out);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -2515,8 +2496,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
|
||||
});
|
||||
if (included == concat_connection.end()) {
|
||||
auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2);
|
||||
gnamem->getQueue(REGION_SCRATCH)
|
||||
->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64);
|
||||
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, outputSize);
|
||||
|
||||
std::function<void(GNAConcatLayer, GnaInputs&, ConcatConnection&)> allocate_input_recursively =
|
||||
[&allocate_input_recursively](GNAConcatLayer clayer,
|
||||
@ -2576,7 +2556,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void* p
|
||||
if (LayerInfo(layer).isConst()) {
|
||||
mem_region = REGION_RO;
|
||||
}
|
||||
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64);
|
||||
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, num_data_bytes_out);
|
||||
}
|
||||
|
||||
ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
@ -2616,18 +2596,18 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
// real allocation pointer will be kept in ptr not in ptr_inputs_global
|
||||
if (!connectTo) {
|
||||
gnamem->getQueue(REGION_INPUTS)->push_value(layer, ptr, static_cast<uint8_t>(0), num_data_bytes_in, 64);
|
||||
gnamem->getQueue(REGION_INPUTS)->push_value(layer, ptr, static_cast<uint8_t>(0), num_data_bytes_in);
|
||||
} else {
|
||||
gnamem->getQueue(REGION_INPUTS)
|
||||
->push_value(layer,
|
||||
&inputs_ptr_->at(prevLayer->name).ptrs.front(),
|
||||
static_cast<uint8_t>(0),
|
||||
num_data_bytes_in,
|
||||
64);
|
||||
num_data_bytes_in);
|
||||
}
|
||||
inputs_ptr_->at(prevLayer->name).allocated_size = num_data_bytes_in;
|
||||
}
|
||||
if (ALIGN(num_data_bytes_in, 64) > ALIGN(inputs_ptr_->at(prevLayer->name).get_allocated_size(), 64)) {
|
||||
if (ALIGN(num_data_bytes_in, gnamem->getDataMemAlignment()) >
|
||||
ALIGN(inputs_ptr_->at(prevLayer->name).get_allocated_size(), gnamem->getDataMemAlignment())) {
|
||||
THROW_GNA_EXCEPTION << "Layer: " << layer->name << " Cannot bind pointer to already allocated input("
|
||||
<< prevLayer->name
|
||||
<< "), due to size_allocated=" << inputs_ptr_->at(prevLayer->name).get_allocated_size()
|
||||
@ -2636,18 +2616,10 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
if (connectTo) {
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer,
|
||||
ptr,
|
||||
&inputs_ptr_->at(prevLayer->name).ptrs.front(),
|
||||
offset,
|
||||
ALIGN(num_data_bytes_in, 64));
|
||||
->bind_ptr(layer, ptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), offset, num_data_bytes_in);
|
||||
} else {
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer,
|
||||
&inputs_ptr_->at(prevLayer->name).ptrs.front(),
|
||||
ptr,
|
||||
offset,
|
||||
ALIGN(num_data_bytes_in, 64));
|
||||
->bind_ptr(layer, &inputs_ptr_->at(prevLayer->name).ptrs.front(), ptr, offset, num_data_bytes_in);
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
@ -2655,9 +2627,11 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// const input
|
||||
if (LayerInfo(prevLayer).isConst()) {
|
||||
if (connectTo) {
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset, num_data_bytes_in);
|
||||
} else {
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset, num_data_bytes_in);
|
||||
}
|
||||
|
||||
return prevLayer;
|
||||
@ -2702,7 +2676,7 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
auto& concatLayerInfoItem = concatLayerInfo->second;
|
||||
// dnnLayer that is input for concat layer
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset, num_data_bytes_in);
|
||||
->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset, num_data_bytes_in, false);
|
||||
// return layer over concat
|
||||
return CNNNetPrevLayer(prevLayer);
|
||||
}
|
||||
@ -2718,7 +2692,8 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
|
||||
// check for generic prev layer
|
||||
if (prevDnnLayer != nullptr) {
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in);
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in, false);
|
||||
return prevLayer;
|
||||
}
|
||||
|
||||
@ -2731,26 +2706,25 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// TODO: this is duplicate with connect output
|
||||
auto& memoryLayer = prevMemoryLayer->second;
|
||||
if (memoryLayer.reserved_size == 0) {
|
||||
memoryLayer.reserved_size = ALIGN64(memoryLayer.getByteSize());
|
||||
memoryLayer.reserved_size = ALIGN(memoryLayer.getByteSize(), gnamem->getDataMemAlignment());
|
||||
// connectTo used for indicate that memory layer should be bound to given buffer
|
||||
if (connectTo) {
|
||||
memoryLayer.reserved_size = ALIGN64(std::max(memoryLayer.reserved_size, num_data_bytes_in));
|
||||
gnamem->getQueue(REGION_STATES)
|
||||
->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size, 64);
|
||||
memoryLayer.reserved_size =
|
||||
ALIGN(std::max(memoryLayer.reserved_size, num_data_bytes_in), gnamem->getDataMemAlignment());
|
||||
gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, memoryLayer.reserved_size);
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
|
||||
} else {
|
||||
if (ALIGN64(num_data_bytes_in) < ALIGN64(memoryLayer.reserved_size + offset)) {
|
||||
if (ALIGN(num_data_bytes_in, gnamem->getDataMemAlignment()) <
|
||||
ALIGN(memoryLayer.reserved_size + offset, gnamem->getDataMemAlignment())) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer)
|
||||
<< " invalid allocation request of " << num_data_bytes_in
|
||||
<< " is more then state tensor size of: " << memoryLayer.reserved_size + offset;
|
||||
}
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, num_data_bytes_in);
|
||||
}
|
||||
} else {
|
||||
// We may need to extend memory buffer if connected input size is bigger, for example for concat connection
|
||||
gnamem->getQueue(REGION_AUTO)
|
||||
->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
|
||||
gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, num_data_bytes_in);
|
||||
}
|
||||
return prevLayer;
|
||||
}
|
||||
|
@ -376,7 +376,8 @@ void GNAPlugin::InitGNADevice() {
|
||||
gnaFlags->performance_counting,
|
||||
!config.embedded_export_path.empty());
|
||||
size_t page_size_bytes = 4096;
|
||||
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), page_size_bytes);
|
||||
size_t mem_alignment = gnadevice->getMemAlignment();
|
||||
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), mem_alignment, page_size_bytes);
|
||||
}
|
||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
||||
}
|
||||
@ -852,10 +853,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
}
|
||||
portId++;
|
||||
}
|
||||
// TODO: how active list will work in multioutput case
|
||||
// make room for active list
|
||||
gnamem->getQueue(REGION_OUTPUTS)
|
||||
->reserve_ptr(nullptr, nullptr, ALIGN64(outputs_.Get().begin()->get_required_size()), 64);
|
||||
|
||||
void* pParallelExecutionData = nullptr;
|
||||
|
||||
@ -866,7 +863,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
rwSegmentSize += gnamem->getRegionBytes(REGION_OUTPUTS);
|
||||
if (gnaFlags->num_requests > 1) {
|
||||
gnamem->getQueue(REGION_SCRATCH)
|
||||
->reserve_ptr(nullptr, &pParallelExecutionData, rwSegmentSize * (gnaFlags->num_requests - 1), 64);
|
||||
->reserve_ptr(nullptr, &pParallelExecutionData, rwSegmentSize * (gnaFlags->num_requests - 1));
|
||||
}
|
||||
|
||||
gnamem->commit(gnaFlags->compact_mode);
|
||||
@ -1447,7 +1444,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
|
||||
void* basePtr = nullptr;
|
||||
std::string modelLibVersion; //!< OpenVINO and GNA Library versions read from GNA model file
|
||||
|
||||
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
|
||||
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &basePtr, header.gnaMemSize, false);
|
||||
|
||||
gnamem->commit();
|
||||
|
||||
|
@ -38,7 +38,9 @@ inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
|
||||
*/
|
||||
class GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsQueue(rRegion region) : _region_type(region) {}
|
||||
explicit GNAMemRequestsQueue(rRegion region, size_t mem_alignment)
|
||||
: _region_type(region),
|
||||
m_alignment(mem_alignment) {}
|
||||
virtual ~GNAMemRequestsQueue() {}
|
||||
|
||||
rRegion _region_type;
|
||||
@ -46,31 +48,27 @@ public:
|
||||
std::vector<MemRequest> _mem_requests;
|
||||
std::list<std::vector<char>> _local_storage;
|
||||
std::shared_ptr<uint8_t> _basePtr = nullptr;
|
||||
size_t m_alignment;
|
||||
|
||||
/**
|
||||
* @brief register initialiser to access memory once it is actually allocated
|
||||
* @param ptr_out
|
||||
* @param ptr_in
|
||||
* @param num_bytes
|
||||
* @param alignment
|
||||
* @param initializer
|
||||
*/
|
||||
void push_initializer(InferenceEngine::CNNLayerPtr layer,
|
||||
void* ptr_out,
|
||||
size_t num_bytes,
|
||||
std::function<void(void* data, size_t size)> initializer,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, alignment});
|
||||
std::function<void(void* data, size_t size)> initializer) {
|
||||
futureHeap().push_back({regionType(), ptr_out, num_bytes, initializer, REQUEST_INITIALIZER, m_alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
}
|
||||
|
||||
void push_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void* ptr_out,
|
||||
const void* ptr_in,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, alignment});
|
||||
void push_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, const void* ptr_in, size_t num_bytes) {
|
||||
futureHeap().push_back({regionType(), REQUEST_STORE, ptr_out, ptr_in, 1, num_bytes, m_alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
@ -82,15 +80,11 @@ public:
|
||||
* @param ptr_in
|
||||
* @param num_bytes
|
||||
*/
|
||||
void push_local_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void* ptr_out,
|
||||
const void* ptr_in,
|
||||
size_t num_bytes,
|
||||
size_t alignment = 1) {
|
||||
void push_local_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, const void* ptr_in, size_t num_bytes) {
|
||||
localStorage().emplace_back(reinterpret_cast<const uint8_t*>(ptr_in),
|
||||
reinterpret_cast<const uint8_t*>(ptr_in) + num_bytes);
|
||||
futureHeap().push_back(
|
||||
{regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, alignment});
|
||||
{regionType(), REQUEST_STORE, ptr_out, &localStorage().back().front(), 1, num_bytes, m_alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
@ -100,9 +94,16 @@ public:
|
||||
*
|
||||
* @param ptr_out
|
||||
* @param num_bytes
|
||||
* @param align_num_bytes
|
||||
*/
|
||||
void reserve_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, size_t num_bytes, size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), REQUEST_ALLOCATE, ptr_out, nullptr, 1, num_bytes, alignment});
|
||||
void reserve_ptr(InferenceEngine::CNNLayerPtr layer, void* ptr_out, size_t num_bytes, bool align_num_bytes = true) {
|
||||
futureHeap().push_back({regionType(),
|
||||
REQUEST_ALLOCATE,
|
||||
ptr_out,
|
||||
nullptr,
|
||||
1,
|
||||
align_num_bytes ? ALIGN(num_bytes, m_alignment) : num_bytes,
|
||||
m_alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
|
||||
}
|
||||
@ -114,14 +115,22 @@ public:
|
||||
* @param dest - source is binded to dest pointer after allocation
|
||||
* @param offset - offset in bytes in source that will be set in dest
|
||||
* @param num_bytes - bind can request for bigger buffer that originally allocated via reserve(),
|
||||
* if that happens - reserved request parameters will be updated before committing memory
|
||||
* @param align_num_bytes - when true then align memory size to the alignment predefined for the queue
|
||||
*/
|
||||
void bind_ptr(InferenceEngine::CNNLayerPtr layer,
|
||||
void* source,
|
||||
const void* dest,
|
||||
size_t offset = 0,
|
||||
size_t num_bytes = 0) {
|
||||
futureHeap().push_back({regionType(), REQUEST_BIND, source, dest, 1, num_bytes, 1, offset});
|
||||
size_t num_bytes = 0,
|
||||
bool align_num_bytes = true) {
|
||||
futureHeap().push_back({regionType(),
|
||||
REQUEST_BIND,
|
||||
source,
|
||||
dest,
|
||||
1,
|
||||
align_num_bytes ? ALIGN(num_bytes, m_alignment) : num_bytes,
|
||||
1,
|
||||
offset});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {getCNNLayerId(layer), getCNNLayerId(layer)};
|
||||
}
|
||||
@ -145,12 +154,8 @@ public:
|
||||
* @brief allocates buffer and set all its values to T value
|
||||
*/
|
||||
template <class T>
|
||||
void push_value(InferenceEngine::CNNLayerPtr layer,
|
||||
void* ptr_out,
|
||||
T value,
|
||||
size_t num_elements,
|
||||
size_t alignment = 1) {
|
||||
futureHeap().push_back({regionType(), ptr_out, value, num_elements, alignment});
|
||||
void push_value(InferenceEngine::CNNLayerPtr layer, void* ptr_out, T value, size_t num_elements) {
|
||||
futureHeap().push_back({regionType(), ptr_out, value, num_elements, m_alignment});
|
||||
if (layer != nullptr) {
|
||||
futureHeap().back()._life_limits = {0, getCNNLayerId(layer)};
|
||||
}
|
||||
@ -217,17 +222,17 @@ public:
|
||||
|
||||
class GNAMemRequestsInputsQueue : public GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsInputsQueue() : GNAMemRequestsQueue(REGION_INPUTS) {}
|
||||
explicit GNAMemRequestsInputsQueue(size_t alignment) : GNAMemRequestsQueue(REGION_INPUTS, alignment) {}
|
||||
};
|
||||
|
||||
class GNAMemRequestsOutputsQueue : public GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsOutputsQueue() : GNAMemRequestsQueue(REGION_OUTPUTS) {}
|
||||
explicit GNAMemRequestsOutputsQueue(size_t alignment) : GNAMemRequestsQueue(REGION_OUTPUTS, alignment) {}
|
||||
};
|
||||
|
||||
class GNAMemRequestsScratchQueue : public GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsScratchQueue() : GNAMemRequestsQueue(REGION_SCRATCH) {}
|
||||
explicit GNAMemRequestsScratchQueue(size_t alignment) : GNAMemRequestsQueue(REGION_SCRATCH, alignment) {}
|
||||
/**
|
||||
* @brief optimize memory region by reusing buffers
|
||||
*/
|
||||
@ -266,17 +271,17 @@ public:
|
||||
|
||||
class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsReadOnlyQueue() : GNAMemRequestsQueue(REGION_RO) {}
|
||||
explicit GNAMemRequestsReadOnlyQueue(size_t alignment) : GNAMemRequestsQueue(REGION_RO, alignment) {}
|
||||
};
|
||||
|
||||
class GNAMemRequestsStatesQueue : public GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsStatesQueue() : GNAMemRequestsQueue(REGION_STATES) {}
|
||||
explicit GNAMemRequestsStatesQueue(size_t alignment) : GNAMemRequestsQueue(REGION_STATES, alignment) {}
|
||||
};
|
||||
|
||||
class GNAMemRequestsBindingsQueue : public GNAMemRequestsQueue {
|
||||
public:
|
||||
explicit GNAMemRequestsBindingsQueue() : GNAMemRequestsQueue(REGION_AUTO) {}
|
||||
explicit GNAMemRequestsBindingsQueue(size_t alignment) : GNAMemRequestsQueue(REGION_AUTO, alignment) {}
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
|
@ -44,6 +44,7 @@ public:
|
||||
virtual void commit(bool isCompact = false) = 0;
|
||||
virtual std::pair<bool, uint32_t> getOffsetForMerged(void* ptr) = 0;
|
||||
virtual size_t getRegionBytes(rRegion region) = 0;
|
||||
virtual size_t getDataMemAlignment() const = 0;
|
||||
virtual ~GNAMemoryInterface() = default;
|
||||
};
|
||||
|
||||
@ -58,25 +59,31 @@ protected:
|
||||
std::map<rRegion, std::unique_ptr<GNAMemRequestsQueue>> _mem_queues;
|
||||
size_t _total = 0;
|
||||
Allocator _allocator;
|
||||
size_t _page_alignment = 1;
|
||||
size_t _data_alignment;
|
||||
size_t _page_alignment;
|
||||
bool _is_compact_mode = false;
|
||||
|
||||
private:
|
||||
void initMemQueses() {
|
||||
_mem_queues[REGION_RO] = tools::make_unique<GNAMemRequestsReadOnlyQueue>();
|
||||
_mem_queues[REGION_INPUTS] = tools::make_unique<GNAMemRequestsInputsQueue>();
|
||||
_mem_queues[REGION_OUTPUTS] = tools::make_unique<GNAMemRequestsOutputsQueue>();
|
||||
_mem_queues[REGION_SCRATCH] = tools::make_unique<GNAMemRequestsScratchQueue>();
|
||||
_mem_queues[REGION_STATES] = tools::make_unique<GNAMemRequestsStatesQueue>();
|
||||
_mem_queues[REGION_AUTO] = tools::make_unique<GNAMemRequestsBindingsQueue>();
|
||||
_mem_queues[REGION_RO] = tools::make_unique<GNAMemRequestsReadOnlyQueue>(_data_alignment);
|
||||
_mem_queues[REGION_INPUTS] = tools::make_unique<GNAMemRequestsInputsQueue>(_data_alignment);
|
||||
_mem_queues[REGION_OUTPUTS] = tools::make_unique<GNAMemRequestsOutputsQueue>(_data_alignment);
|
||||
_mem_queues[REGION_SCRATCH] = tools::make_unique<GNAMemRequestsScratchQueue>(_data_alignment);
|
||||
_mem_queues[REGION_STATES] = tools::make_unique<GNAMemRequestsStatesQueue>(_data_alignment);
|
||||
_mem_queues[REGION_AUTO] = tools::make_unique<GNAMemRequestsBindingsQueue>(_data_alignment);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit GNAMemory(size_t pageAlignment = 1) : _page_alignment(pageAlignment) {
|
||||
explicit GNAMemory(size_t dataAlignment = 1, size_t pageAlignment = 1)
|
||||
: _data_alignment(dataAlignment),
|
||||
_page_alignment(pageAlignment) {
|
||||
initMemQueses();
|
||||
}
|
||||
|
||||
explicit GNAMemory(const Allocator& a, size_t pageAlignment = 1) : _allocator(a), _page_alignment(pageAlignment) {
|
||||
explicit GNAMemory(const Allocator& a, size_t dataAlignment = 1, size_t pageAlignment = 1)
|
||||
: _allocator(a),
|
||||
_data_alignment(dataAlignment),
|
||||
_page_alignment(pageAlignment) {
|
||||
initMemQueses();
|
||||
}
|
||||
|
||||
@ -146,7 +153,7 @@ public:
|
||||
return {true, curOffset};
|
||||
}
|
||||
const auto size = queuePair.second->getSize();
|
||||
curOffset += ALIGN64(size);
|
||||
curOffset += ALIGN(size, _data_alignment);
|
||||
}
|
||||
return {false, 0};
|
||||
}
|
||||
@ -173,6 +180,10 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t getDataMemAlignment() const override {
|
||||
return _data_alignment;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<uint8_t> allocate(size_t bytes) {
|
||||
Allocator nA = _allocator;
|
||||
|
@ -47,8 +47,7 @@ void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
|
||||
IE_ASSERT(data_ptr != nullptr);
|
||||
auto data_size = newState->byteSize();
|
||||
auto data_elements = data_size / newState->element_size();
|
||||
if (ALIGN64(state->reserved_size) !=
|
||||
ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
|
||||
if (state->reserved_size > (data_size / (newState->element_size() / state->elementSizeBytes()))) {
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. (" << state->reserved_size
|
||||
<< " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
|
||||
}
|
||||
|
@ -19,7 +19,8 @@ const std::vector<ShapeRelatedParams> shapeRelatedParams = {{{{5, 1}, true}, {{5
|
||||
|
||||
std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {ngraph::helpers::InputLayerType::CONSTANT};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}};
|
||||
std::vector<std::map<std::string, std::string>> additional_config = {{{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}},
|
||||
{{"GNA_DEVICE_MODE", "GNA_SW_FP32"}}};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MatMul,
|
||||
MatMulTest,
|
||||
@ -30,7 +31,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMul,
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::ValuesIn(secondaryInputTypes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(additional_config)),
|
||||
MatMulTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
@ -54,11 +54,12 @@ TEST_F(GNAMemoryTest, canStore2Blobs) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
|
||||
GNAMemory<memory::GNAFloatAllocator> dataAlignedMem(16);
|
||||
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float* pFuture = nullptr;
|
||||
auto queue = mem.getQueue(REGION_SCRATCH);
|
||||
queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8);
|
||||
mem.commit();
|
||||
auto queue = dataAlignedMem.getQueue(REGION_SCRATCH);
|
||||
queue->push_ptr(nullptr, &pFuture, input, 3 * 4);
|
||||
dataAlignedMem.commit();
|
||||
|
||||
ASSERT_EQ(16, queue->getSize());
|
||||
|
||||
@ -73,15 +74,16 @@ TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
|
||||
GNAMemory<memory::GNAFloatAllocator> dataAlignedMem(8);
|
||||
float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float* pFuture = nullptr;
|
||||
float* pFuture2 = nullptr;
|
||||
auto queue = mem.getQueue(REGION_SCRATCH);
|
||||
queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8);
|
||||
queue->push_ptr(nullptr, &pFuture2, input, 3 * 4, 16);
|
||||
mem.commit();
|
||||
auto queue = dataAlignedMem.getQueue(REGION_SCRATCH);
|
||||
queue->push_ptr(nullptr, &pFuture, input, 3 * 4);
|
||||
queue->push_ptr(nullptr, &pFuture2, input, 1 * 4);
|
||||
dataAlignedMem.commit();
|
||||
|
||||
ASSERT_EQ(32, queue->getSize());
|
||||
ASSERT_EQ(24, queue->getSize());
|
||||
|
||||
ASSERT_NE(pFuture, nullptr);
|
||||
|
||||
@ -90,8 +92,6 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
|
||||
ASSERT_EQ(pFuture[2], 3);
|
||||
// least probability for next element to be equal if not copied
|
||||
ASSERT_EQ(pFuture[4], 1);
|
||||
ASSERT_EQ(pFuture[5], 2);
|
||||
ASSERT_EQ(pFuture[6], 3);
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canReserveData) {
|
||||
@ -232,7 +232,7 @@ TEST_F(GNAMemoryTest, canPushLocal) {
|
||||
|
||||
{
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
|
||||
mem.getQueue(REGION_SCRATCH)->push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1);
|
||||
mem.getQueue(REGION_SCRATCH)->push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4);
|
||||
}
|
||||
|
||||
// poison stack
|
||||
@ -316,7 +316,7 @@ TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
|
||||
}
|
||||
|
||||
TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
|
||||
GNAMemory<memory::GNAFloatAllocator> memAligned(64);
|
||||
GNAMemory<memory::GNAFloatAllocator> memAligned(1, 64);
|
||||
float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
|
||||
float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user