1853 lines
78 KiB
C++
1853 lines
78 KiB
C++
// Copyright (C) 2018-2020 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
#define NOMINMAX
|
|
|
|
#include <vector>
|
|
#include <cstring>
|
|
#include <list>
|
|
#include <algorithm>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <memory>
|
|
#include <utility>
|
|
#include <limits>
|
|
|
|
#include <ie_layers.h>
|
|
#include <gna-api-types-xnn.h>
|
|
#include <ie_algorithm.hpp>
|
|
#include <debug.h>
|
|
|
|
#include "gna_graph_compiler.hpp"
|
|
#include "gna_data_types.hpp"
|
|
#include "gna_plugin_log.hpp"
|
|
#include "layers/gna_layer_info.hpp"
|
|
#include "ie_memcpy.h"
|
|
#include "details/caseless.hpp"
|
|
#include "gna-api.h"
|
|
#include "backend/am_intel_dnn.hpp"
|
|
#include "runtime/pwl.h"
|
|
#include "gna_graph_tools.hpp"
|
|
#include "frontend/model_quantizer.hpp"
|
|
#include "layers/layers_builder.hpp"
|
|
#include "layers/gna_concat_layer.hpp"
|
|
#include "layers/gna_crop_layer.hpp"
|
|
#include "round_float_define.hpp"
|
|
#include "gna_plugin_policy.hpp"
|
|
|
|
using namespace InferenceEngine;
|
|
using namespace std;
|
|
using namespace GNAPluginNS;
|
|
|
|
#define CREATE(name) [](GNAGraphCompiler *p, CNNLayerPtr l) {p->name(l);}
|
|
|
|
|
|
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) {
|
|
this->gnamem = std::move(gnaMemPtr);
|
|
}
|
|
|
|
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr) {
|
|
this->dnn = std::move(dnnPtr);
|
|
}
|
|
|
|
void GNAGraphCompiler::setInputDescPtr(std::shared_ptr<GNAPluginNS::InputDesc> inputDescPtr) {
|
|
this->inputDesc = std::move(inputDescPtr);
|
|
}
|
|
|
|
void GNAGraphCompiler::setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlagsPtr) {
|
|
this->gnaFlags = std::move(gnaFlagsPtr);
|
|
}
|
|
|
|
void GNAGraphCompiler::setPolicy(GNAPluginNS::Policy policyToSet) {
|
|
this->policy = policyToSet;
|
|
}
|
|
|
|
intel_dnn_component_t * GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
|
|
if (current->insData.empty())
|
|
return nullptr;
|
|
auto inData = current->insData.front().lock();
|
|
if (inData == nullptr)
|
|
return nullptr;
|
|
|
|
auto prev_layer = inData->getCreatorLayer().lock();
|
|
|
|
return dnnComponents.findComponent(prev_layer);
|
|
}
|
|
|
|
void GNAGraphCompiler::fillMemoryConnections(std::unordered_map<std::string,
|
|
std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs) {
|
|
for (auto &memory : memoryPairs) {
|
|
auto inputLayer = memory.second[1];
|
|
auto outputLayer = memory.second[0];
|
|
|
|
IE_ASSERT(1 == outputLayer->insData.size());
|
|
|
|
// creating connection for layers output as form of extramap
|
|
memory_connection.emplace_back(memory.first, GNAMemoryLayer(inputLayer, outputLayer, gnaFlags->sw_fp32 ? 4 : 2));
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
|
|
// creating connection for each layer outputs as form of extramap
|
|
GNAPluginNS::GNAConcatLayer layerInfoItem(layer);
|
|
size_t concat_size = 0;
|
|
std::string& id = layer->name;
|
|
|
|
for (size_t i = 0; i < layer->insData.size(); ++i) {
|
|
auto ptrConcatLayerInput = CNNNetPrevLayerSkipCertain(layer, i, [](CNNLayerPtr lp) {
|
|
LayerInfo info(lp);
|
|
return info.isNonFunctional();
|
|
});
|
|
auto dataInput = layer->insData[i].lock();
|
|
if (!dataInput) {
|
|
THROW_GNA_EXCEPTION << "Input layer pointer for concat is unexpectedly absent";
|
|
}
|
|
|
|
if (!ptrConcatLayerInput) {
|
|
THROW_GNA_EXCEPTION << "Input layer for concat is unexpectedly absent";
|
|
}
|
|
|
|
size_t layer_size =
|
|
InferenceEngine::details::product(begin(dataInput->getDims()),
|
|
end(dataInput->getDims())) * dataInput->getPrecision().size();
|
|
|
|
layerInfoItem.concatInputLayers.emplace_back(GNAConcatLayer::ConcatConnectedLayerInfo{ptrConcatLayerInput->name, concat_size, layer_size});
|
|
|
|
concat_size += layer_size;
|
|
}
|
|
layerInfoItem.reserved_size = concat_size;
|
|
concat_connection.emplace(id, layerInfoItem);
|
|
}
|
|
|
|
void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
|
|
// creating connection for each layer inputs as form of extramap
|
|
GNAPluginNS::GNASplitLayer layerInfoItem(layer);
|
|
size_t split_size = 0;
|
|
std::string& id = layer->name;
|
|
IE_ASSERT(!layer->insData.empty());
|
|
|
|
auto dataInput = layer->insData.begin()->lock();
|
|
if (!dataInput) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << "Input layer pointer is unexpectedly absent";
|
|
}
|
|
auto ptrSplitLayerInput = dataInput->getCreatorLayer().lock();
|
|
if (!ptrSplitLayerInput) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << "Input layer for is unexpectedly absent";
|
|
}
|
|
|
|
for (size_t i = 0; i < layer->outData.size(); ++i) {
|
|
size_t padding = 0;
|
|
size_t output_layer_size = 0;
|
|
|
|
|
|
for (int j = 0; j != layer->outData[i]->getInputTo().size(); j++) {
|
|
auto outFunctionalLayer = CNNNetGetNextLayerSkipCertain(layer, i, j, [](CNNLayerPtr l) {
|
|
return LayerInfo(l).isNonFunctional();
|
|
});
|
|
|
|
if (!outFunctionalLayer.first) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer";
|
|
}
|
|
|
|
auto dataOutput = outFunctionalLayer.first->insData[outFunctionalLayer.second].lock();
|
|
|
|
padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize())
|
|
* dataOutput->getPrecision().size();
|
|
output_layer_size =
|
|
InferenceEngine::details::product(begin(dataOutput->getDims()),
|
|
end(dataOutput->getDims())) * dataOutput->getPrecision().size();
|
|
|
|
if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
|
|
size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
|
|
layerInfoItem.splitOutputLayers.emplace_back(
|
|
outFunctionalLayer.first,
|
|
outFunctionalLayer.second,
|
|
aligned64_offset * dataOutput->getPrecision().size(),
|
|
output_layer_size);
|
|
} else {
|
|
layerInfoItem.splitOutputLayers.emplace_back(
|
|
outFunctionalLayer.first, outFunctionalLayer.second, split_size, output_layer_size);
|
|
}
|
|
}
|
|
|
|
split_size += padding + output_layer_size;
|
|
}
|
|
layerInfoItem.reserved_size = split_size;
|
|
split_connection.emplace(id, layerInfoItem);
|
|
}
|
|
|
|
void GNAGraphCompiler::DiagonalPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
AffinePrimitive(layer, true);
|
|
}
|
|
|
|
void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) {
|
|
if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
|
|
THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
|
|
}
|
|
auto const_blob = constLayer->blobs["custom"];
|
|
|
|
const_connections[constLayer->name] = &const_connections[constLayer->name];
|
|
void* ptr_for_const_blob = &const_connections[constLayer->name];
|
|
|
|
connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
|
|
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
|
|
// dont see practical use case when bind storage type need to be different that allocation type
|
|
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
|
|
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
|
|
});
|
|
}
|
|
|
|
|
|
void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto& convolution = dynamic_cast<ConvolutionLayer&>(*layer.get());
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
IE_ASSERT(!layer->insData.empty());
|
|
IE_ASSERT(!layer->outData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
auto outputs = *layer->outData.begin();
|
|
|
|
uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
|
|
uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
|
|
uint32_t w_dim_out = FROM_IR_DIM(outputs, 1);
|
|
uint32_t h_dim_out = FROM_IR_DIM(outputs, 2);
|
|
|
|
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
|
|
swap(h_dim_in, w_dim_in);
|
|
swap(h_dim_out, w_dim_out);
|
|
swap(convolution._kernel_x, convolution._kernel_y);
|
|
swap(convolution._stride_x, convolution._stride_y);
|
|
}
|
|
|
|
uint32_t num_feature_map_rows = w_dim_in / convolution._stride_x;
|
|
uint32_t num_feature_map_columns = c_dim_in * convolution._stride_x;
|
|
|
|
uint32_t num_columns_in = c_dim_in;
|
|
uint32_t num_rows_out = w_dim_out;
|
|
|
|
// padding of convolution kernel to be multiply of 8
|
|
uint32_t num_conv_kernel_padding = ALIGN(convolution._kernel_x * num_feature_map_columns, 8)
|
|
- convolution._kernel_x * num_feature_map_columns;
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
// TODO: questionable why for biases that are not in IR we inventing precision
|
|
auto biasPrecision = convolution._biases ? convolution._biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "convolution");
|
|
|
|
// have to pad input to let last kernel meets it's corresponding input
|
|
auto num_inputs = num_feature_map_columns * num_feature_map_rows + num_conv_kernel_padding;
|
|
auto num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
|
|
auto num_filter_rows = convolution._kernel_x / convolution._stride_x;
|
|
dnn->InitConvolutional1DComponent(currentComponent,
|
|
1,
|
|
num_inputs + num_input_padding,
|
|
1,
|
|
num_rows_out * convolution._out_depth,
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
convolution._weights->getTensorDesc().getPrecision().size(),
|
|
biasPrecision.size(),
|
|
convolution._out_depth,
|
|
num_filter_rows,
|
|
num_feature_map_columns * num_filter_rows + num_conv_kernel_padding,
|
|
1,
|
|
num_feature_map_rows,
|
|
num_feature_map_columns,
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases);
|
|
|
|
size_t num_data_bytes_out =
|
|
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
|
|
* outputs->getPrecision().size();
|
|
|
|
size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size();
|
|
|
|
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
|
|
|
|
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
|
|
if (LayerInfo(connectedInputLayer).isInput()) {
|
|
// Kaldi features are opposite orientation
|
|
dnn->num_rotate_rows = num_feature_map_columns;
|
|
dnn->num_rotate_columns = num_feature_map_rows;
|
|
}
|
|
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
|
|
// rotate
|
|
auto TransposeMatrix = [](uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols) {
|
|
std::vector<uint8_t> temp_buffer(num_rows * num_cols * element_size);
|
|
for (uint32_t i = 0; i < num_rows; i++) {
|
|
for (uint32_t j = 0; j < num_cols; j++) {
|
|
ie_memcpy(&temp_buffer.front() + (j * num_rows + i) * element_size,
|
|
temp_buffer.size() - (i * num_cols + j) * element_size,
|
|
ptr_matrix + (i * num_cols + j) * element_size,
|
|
element_size);
|
|
}
|
|
}
|
|
return temp_buffer;
|
|
};
|
|
|
|
std::vector<uint8_t > transposedWeights;
|
|
for (uint32_t k = 0; k < convolution._out_depth; k++) {
|
|
uint8_t* ptr_filt_current
|
|
= convolution._weights->cbuffer().as<uint8_t*>() + k * num_columns_in * convolution._kernel[X_AXIS] * convolution.precision.size();
|
|
auto transposedPart = TransposeMatrix(ptr_filt_current, convolution.precision.size(), num_columns_in, convolution._kernel[X_AXIS]);
|
|
transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end());
|
|
}
|
|
|
|
if (num_conv_kernel_padding == 0) {
|
|
gnamem->readonly().push_local_ptr(ptr_weights, transposedWeights.data(), convolution._weights->byteSize(), 64);
|
|
} else {
|
|
auto elementsIn = convolution._kernel_x * num_feature_map_columns + num_conv_kernel_padding;
|
|
auto paddedWeights = elementsIn * convolution._out_depth;
|
|
auto paddedWeightsSize = paddedWeights * convolution.precision.size();
|
|
auto elements_in_row = convolution._kernel_x * num_feature_map_columns;
|
|
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
|
size_t offset = 0;
|
|
for (int i = 0; i < convolution._out_depth && size >= offset; i++) {
|
|
ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
|
|
transposedWeights.data() + elements_in_row * i * convolution.precision.size(),
|
|
elements_in_row* convolution.precision.size());
|
|
|
|
offset += elementsIn * convolution.precision.size();
|
|
}
|
|
}, 64);
|
|
}
|
|
|
|
if (convolution._biases) {
|
|
gnamem->readonly().push_ptr(ptr_biases,
|
|
convolution._biases->cbuffer().as<const void*>(),
|
|
convolution._biases->byteSize(),
|
|
64);
|
|
} else {
|
|
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto& power = dynamic_cast<PowerLayer&>(*layer.get());
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
|
|
|
|
if (power.power != 1.0) {
|
|
THROW_IE_EXCEPTION << "[GNA plugin] unsupported power factor, expected 1 but was " << power.power;
|
|
}
|
|
|
|
auto input = layer->insData[0].lock();
|
|
|
|
auto outputs = *layer->outData.begin();
|
|
|
|
uint32_t num_rows_in = FROM_IR_DIM(input, 1);
|
|
uint32_t num_columns_in = FROM_IR_DIM(input, 2);
|
|
uint32_t num_rows_out = num_rows_in;
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "power");
|
|
|
|
dnn->InitAffineComponent(currentComponent,
|
|
num_rows_in + num_padding,
|
|
num_columns_in,
|
|
num_rows_out + num_padding,
|
|
input->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
// TODO: only fp32 and Int16 tested
|
|
quantized == nullptr ? input->getPrecision().size() : 2,
|
|
quantized == nullptr ? input->getPrecision().size() : 4,
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases,
|
|
true);
|
|
|
|
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
|
|
* outputs->getPrecision().size();
|
|
|
|
size_t num_data_bytes_in = InferenceEngine::details::product(begin(input->getDims()), end(input->getDims()))
|
|
* input->getPrecision().size();
|
|
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
|
|
|
if (gnaFlags->sw_fp32) {
|
|
gnamem->readonly().push_value(ptr_weights, power.scale, num_rows_out, 64);
|
|
gnamem->readonly().push_value(ptr_biases, power.offset, num_rows_out, 64);
|
|
} else {
|
|
auto quantizedScale = FLOAT_TO_INT16(std::min(quantized->_weights_quant.scale * power.scale,
|
|
static_cast<float>(INT16_MAX)));
|
|
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.scale * power.offset,
|
|
static_cast<float>(INT32_MAX)));
|
|
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedScale, num_rows_out, 64);
|
|
gnamem->readonly().push_value<int32_t>(ptr_biases, quantizedOffset, num_rows_out, 64);
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto& pooling = dynamic_cast<PoolingLayer&>(*layer.get());
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
IE_ASSERT(!layer->insData.empty());
|
|
IE_ASSERT(!layer->outData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
auto outputs = *layer->outData.begin();
|
|
|
|
uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
|
|
uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
|
|
uint32_t w_dim_out = FROM_IR_DIM(outputs, 1);
|
|
uint32_t h_dim_out = FROM_IR_DIM(outputs, 2);
|
|
uint32_t c_dim_out = FROM_IR_DIM(outputs, 3);
|
|
|
|
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
|
|
swap(h_dim_in, w_dim_in);
|
|
swap(h_dim_out, w_dim_out);
|
|
swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
|
|
}
|
|
|
|
uint32_t num_rows_in = w_dim_in;
|
|
uint32_t num_columns_in = c_dim_in;
|
|
uint32_t num_rows_out = w_dim_out;
|
|
uint32_t num_columns_out = c_dim_out;
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "pooling");
|
|
|
|
switch (pooling._type) {
|
|
case PoolingLayer::MAX: break;
|
|
// we are loosing precision here
|
|
case PoolingLayer::AVG:
|
|
default:
|
|
// TODO: convert to SUMM pooling
|
|
THROW_GNA_EXCEPTION << "Layer :" << layer->name << " not supported";
|
|
}
|
|
|
|
dnn->InitMaxpoolComponent(currentComponent,
|
|
1,
|
|
num_columns_in * num_rows_in,
|
|
1,
|
|
num_columns_out * num_rows_out,
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
pooling._kernel[X_AXIS],
|
|
pooling._kernel[X_AXIS],
|
|
num_columns_in,
|
|
false,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs);
|
|
|
|
size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
|
|
* outputs->getPrecision().size();
|
|
|
|
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
}
|
|
|
|
void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
IE_ASSERT(!layer->insData.empty());
|
|
IE_ASSERT(!layer->outData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
auto outputs = *layer->outData.begin();
|
|
|
|
uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
|
|
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
|
|
uint32_t num_columns_out = FROM_IR_DIM(outputs, 2);
|
|
uint32_t num_padding_out = ALIGN(num_rows_out, 8) - num_rows_out;
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
auto orientation = kDnnInterleavedOrientation;
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "copy");
|
|
|
|
dnn->InitCopyComponent(currentComponent,
|
|
orientation,
|
|
ALIGN(num_rows_in, 8),
|
|
num_columns_in,
|
|
ALIGN(num_rows_out, 8),
|
|
num_columns_out,
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
num_rows_out + num_padding_out,
|
|
num_columns_out,
|
|
ptr_inputs,
|
|
ptr_outputs);
|
|
|
|
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
|
|
begin(outputs->getDims()), end(outputs->getDims())), 8)
|
|
* outputs->getPrecision().size();
|
|
size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
}
|
|
|
|
void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
|
|
|
|
if (concatLayer == nullptr) {
|
|
return;
|
|
}
|
|
if (concatLayer->insData.size() < 2) {
|
|
THROW_GNA_EXCEPTION << "Concat layer has unsupported number of incoming layers.";
|
|
}
|
|
|
|
for (std::size_t layerIndex = 0; layerIndex < concatLayer->insData.size(); layerIndex++) {
|
|
auto input = concatLayer->insData[layerIndex].lock();
|
|
if (!input) {
|
|
THROW_GNA_EXCEPTION << "Input layer " << layerIndex << " for concat is unexpectedly absent";
|
|
}
|
|
}
|
|
|
|
std::size_t layerPrecisionSize = concatLayer->insData[0].lock()->getPrecision().size();
|
|
for (std::size_t layerIndex = 0; layerIndex < concatLayer->insData.size(); layerIndex++) {
|
|
auto currentSize = concatLayer->insData[layerIndex].lock()->getPrecision().size();
|
|
if (layerPrecisionSize != currentSize) {
|
|
THROW_GNA_EXCEPTION << "Different precision for Concat Layer '" << concatLayer->name << "' input layers." <<
|
|
"input 0 precision is '" << concatLayer->insData[0].lock()->getPrecision().name() << "' but input " << layerIndex <<
|
|
" precision is '" << concatLayer->insData[layerIndex].lock()->getPrecision().name() << "'";
|
|
}
|
|
}
|
|
|
|
auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
|
|
for (auto &&outLayer : concatLayer->outData.front()->getInputTo()) {
|
|
if ( LayerInfo(outLayer.second).isConcat() ) {
|
|
connectOutput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size);
|
|
}
|
|
}
|
|
|
|
size_t idx = 0;
|
|
for (auto && inputLayer : concatLayerInfo.concatInputLayers) {
|
|
auto concatLayerInput = concat_connection.find(concatLayer->name)->second.getConcat();
|
|
CNNLayerPtr concatParent;
|
|
int it = 0;
|
|
|
|
for (; it != concatLayerInput->insData.size(); it++) {
|
|
concatParent = CNNNetPrevLayerSkipCertain(concatLayerInput, it, [](CNNLayerPtr l) {
|
|
return LayerInfo(l).isNonFunctional();
|
|
});
|
|
if (concatParent->name.find(inputLayer.name) != std::string::npos) {
|
|
break;
|
|
}
|
|
}
|
|
IE_ASSERT(it != concatLayerInput->insData.size());
|
|
auto layerInfo = LayerInfo(concatParent);
|
|
// auto layerInfo = LayerInfo(concatLayerInput->insData[it].lock()->getCreatorLayer().lock());
|
|
if (layerInfo.isInput()) {
|
|
if (concatLayerInfo.input_allocated) {
|
|
// for concat input allocated only once, so lets mark this specific input layer also as allocated
|
|
// we will bind it to offset further in connectInput
|
|
// size need to be equal to full layer in order to pass checks
|
|
inputDesc->bytes_allocated_for_input[((InferenceEngine::CNNLayerPtr)layerInfo)->name] = concatLayerInfo.reserved_size;
|
|
}
|
|
|
|
connectInput(layer, &concatLayerInfo.gna_ptr,
|
|
concatLayerInfo.reserved_size, -static_cast<int32_t>(inputLayer.offset), idx);
|
|
|
|
// TODO: currently connectInput api accept only total size, for concat we need extension for allocated, and actual sizes
|
|
inputDesc->bytes_allocated_for_input[((InferenceEngine::CNNLayerPtr) layerInfo)->name] = inputLayer.tensorSize;
|
|
|
|
concatLayerInfo.input_allocated = true;
|
|
} else if (layerInfo.isMemory()) {
|
|
connectInput(layer, &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size, -static_cast<int>(inputLayer.offset), idx);
|
|
|
|
concatLayerInfo.input_allocated = true;
|
|
}
|
|
++idx;
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*> (layer.get());
|
|
|
|
if (cropLayer == nullptr) {
|
|
return;
|
|
}
|
|
|
|
IE_ASSERT(!layer->insData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
|
|
IE_ASSERT(!cropLayer->axis.empty());
|
|
IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size());
|
|
IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
|
|
|
|
std::vector<int> axis, dim, offset;
|
|
for (int n = 0; n < cropLayer->axis.size(); n++) {
|
|
uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
|
|
// Exclude crop layer components that do nothing
|
|
if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
|
|
continue;
|
|
}
|
|
axis.push_back(cropLayer->axis[n]);
|
|
dim.push_back(cropLayer->dim[n]);
|
|
offset.push_back(cropLayer->offset[n]);
|
|
}
|
|
|
|
if (axis.size() > 1) {
|
|
THROW_GNA_EXCEPTION <<
|
|
"Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: "
|
|
<< axis.size() << ".";
|
|
}
|
|
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
size_t cropOffset = offset.front() * cropLayer->precision.size();
|
|
size_t cropOutputSize = dim.front() * cropLayer->precision.size();
|
|
|
|
if (ALIGN64(cropOffset) == cropOffset) {
|
|
// leave crop as it is
|
|
GNAPluginNS::GNACropLayer cropLayerInfoItem(layer);
|
|
std::string& id = layer->name;
|
|
crop_connection.emplace(id, cropLayerInfoItem);
|
|
auto cropLayerInfo = crop_connection.find(cropLayer->name);
|
|
|
|
if (cropLayerInfo == crop_connection.end()) {
|
|
THROW_GNA_EXCEPTION <<
|
|
"Item is not in the storage but it was added recently...\n";
|
|
}
|
|
|
|
// calculate index idx for connectInput last parameter
|
|
connectInput(layer, &cropLayerInfo->second.gna_ptr, cropOutputSize + cropOffset, cropOffset, 0);
|
|
|
|
// cases for certain output layers
|
|
for (auto&& outLayer : layer->outData.front()->getInputTo()) {
|
|
auto& nextLayer = outLayer.second;
|
|
if (LayerInfo(nextLayer).isConcat()) {
|
|
connectOutput(layer, &cropLayerInfo->second.gna_ptr, cropOutputSize);
|
|
}
|
|
}
|
|
} else {
|
|
gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
|
|
IE_ASSERT(!layer->outData.empty());
|
|
auto outputs = *layer->outData.begin();
|
|
|
|
// only 1D crops supported
|
|
if (axis.size() != 1) {
|
|
THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name;
|
|
}
|
|
|
|
// TODO: add unit tests for 4d crops blobs
|
|
uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
|
|
uint32_t num_columns_in = 1;
|
|
|
|
uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "crop");
|
|
|
|
dnn->InitAffineComponent(currentComponent,
|
|
num_rows_in + num_padding,
|
|
num_columns_in,
|
|
num_rows_out,
|
|
inputs->getPrecision().size(),
|
|
4,
|
|
quantized == nullptr ? inputs->getPrecision().size() : 2,
|
|
4,
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases,
|
|
false);
|
|
|
|
size_t num_data_bytes_out =
|
|
InferenceEngine::details::product(
|
|
begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
|
|
|
size_t num_data_bytes_in = num_columns_in *
|
|
ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
|
|
FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
|
|
|
|
(quantized == nullptr) ?
|
|
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
|
|
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::SplitPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
// Nothing to do
|
|
}
|
|
|
|
void GNAGraphCompiler::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
// Nothing to do
|
|
}
|
|
|
|
void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
// for eltwise should be one input of 4 bytes and one of 2 bytes - detecting that
|
|
auto inputs2Bytes = layer->insData[0].lock();
|
|
auto inputs4Bytes = layer->insData[1].lock();
|
|
|
|
int biasesLayerIdx = 1;
|
|
|
|
if (quantized) {
|
|
switch (eltwise._operation) {
|
|
case InferenceEngine::EltwiseLayer::Sum:
|
|
case InferenceEngine::EltwiseLayer::Sub:
|
|
{
|
|
if (inputs4Bytes->getPrecision().size() != 4) {
|
|
std::swap(inputs4Bytes, inputs2Bytes);
|
|
biasesLayerIdx = 0;
|
|
}
|
|
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
|
|
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
|
|
break;
|
|
}
|
|
case InferenceEngine::EltwiseLayer::Prod:
|
|
{
|
|
// for mul both inputs should be 2 bytes precision
|
|
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
|
|
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
|
|
break;
|
|
}
|
|
default:
|
|
THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
|
|
}
|
|
}
|
|
|
|
auto outputs = *layer->outData.begin();
|
|
|
|
uint32_t num_rows_in = FROM_IR_DIM(inputs4Bytes, 1);
|
|
uint32_t num_columns_in = FROM_IR_DIM(inputs4Bytes, 2);
|
|
uint32_t num_rows_out = num_rows_in;
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "diagonal");
|
|
dnn->InitAffineComponent(currentComponent,
|
|
num_rows_in + num_padding,
|
|
num_columns_in,
|
|
num_rows_out + num_padding,
|
|
inputs2Bytes->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
// TODO: only fp32 and Int16 tested
|
|
quantized == nullptr ? inputs2Bytes->getPrecision().size() : 2,
|
|
quantized == nullptr ? inputs4Bytes->getPrecision().size() : 4,
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases,
|
|
true);
|
|
|
|
size_t num_data_bytes_out =
|
|
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * outputs->getPrecision().size();
|
|
|
|
size_t num_data_bytes_in =
|
|
num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->getPrecision().size();
|
|
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
|
|
|
|
switch (eltwise._operation) {
|
|
case EltwiseLayer::Sub:
|
|
if (quantized == nullptr) {
|
|
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
|
|
} else {
|
|
auto scaledIdentity = -quantized->_weights_quant.scale;
|
|
|
|
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
|
|
|
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
|
}
|
|
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
|
break;
|
|
case EltwiseLayer::Sum:
|
|
if (quantized == nullptr) {
|
|
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
|
|
} else {
|
|
auto scaledIdentity = quantized->_weights_quant.scale;
|
|
|
|
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
|
|
|
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
|
}
|
|
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
|
break;
|
|
|
|
case EltwiseLayer::Prod:
|
|
if (quantized == nullptr) {
|
|
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
|
} else {
|
|
gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
|
|
}
|
|
connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
|
|
break;
|
|
|
|
default:
|
|
THROW_GNA_EXCEPTION << "Unsupported eltwise operation: " << eltwise._operation;
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag) {
|
|
auto& weightable = dynamic_cast<WeightableLayer&>(*layer.get());
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
IE_ASSERT(!layer->insData.empty());
|
|
IE_ASSERT(!layer->outData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
auto outputs = *layer->outData.begin();
|
|
auto inputPrecision = quantized ? Precision(Precision::I16) : inputs->getPrecision();
|
|
|
|
uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
|
|
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
uint32_t num_padding_out = isDiag ? num_padding : 0;
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
// TODO: questionable why for biases that are no in Model we inventing precision
|
|
auto biasPrecision = weightable._biases ? weightable._biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
|
|
|
// layer without biases might be connected to functional layer without activations
|
|
auto prevLayer = CNNNetPrevLayer(layer);
|
|
bool useBiasConnection = false;
|
|
if (LayerInfo(prevLayer).has32BOutput()) {
|
|
if (weightable._biases) {
|
|
THROW_GNA_EXCEPTION << "Layer: "
|
|
<< layer->name << ", cannot be connected to its parent: " << prevLayer->name
|
|
<< " due to precision mismatch";
|
|
}
|
|
gnalog() << "Connection " << prevLayer->name << " to " << layer->name << " is using BIAS as input" << std::endl;
|
|
useBiasConnection = true;
|
|
}
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, (isDiag ? "diagonal" : "affine"));
|
|
|
|
dnn->InitAffineComponent(currentComponent,
|
|
num_rows_in + num_padding,
|
|
num_columns_in,
|
|
num_rows_out + num_padding_out,
|
|
inputPrecision.size(),
|
|
outputs->getPrecision().size(),
|
|
weightable._weights->getTensorDesc().getPrecision().size(),
|
|
biasPrecision.size(),
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases,
|
|
isDiag);
|
|
|
|
size_t num_data_bytes_out =
|
|
num_columns_in * (num_rows_out + num_padding_out) * outputs->getPrecision().size();
|
|
|
|
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * inputs->getPrecision().size();
|
|
|
|
auto connectionInfo = connectInput(layer, useBiasConnection ? ptr_biases : ptr_inputs, num_data_bytes_in);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
|
|
auto transpose = false;
|
|
auto transposedRows = 0;
|
|
auto transposedCols = 0;
|
|
|
|
if (0 && connectionInfo.needTransposeWeights) {
|
|
// direct order is 0, 1, 2, 3, supported order is only 0,3,2,1 where dim 2 is usually equals to 1
|
|
auto permuteOrder = connectionInfo.permute->GetParamAsInts("order");
|
|
if (permuteOrder != vector<int>({ 0, 3, 2, 1 })) {
|
|
THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
|
|
", but only support 0, 3, 2, 1";
|
|
}
|
|
|
|
/**
|
|
* TODO: weights transpose happened after quantisation might result in poor quality for in 8 - move this to passes
|
|
*/
|
|
if (weightable._weights->getTensorDesc().getPrecision() == Precision::I8) {
|
|
THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute operation for 8 bit weights for layer: " << layer->name;
|
|
}
|
|
|
|
// this affine connected to convolution via pool or activation
|
|
gnalog() << "Transposing weights for layer: " << layer->name << "\n";
|
|
|
|
transpose = !isDiag;
|
|
transposedRows = connectionInfo.permute->input()->getDims()[3];
|
|
transposedCols = connectionInfo.permute->input()->getDims()[1];
|
|
}
|
|
|
|
if (num_padding == 0) {
|
|
if (!transpose) {
|
|
gnamem->readonly().push_ptr(ptr_weights,
|
|
weightable._weights->cbuffer().as<const void*>(),
|
|
weightable._weights->byteSize(),
|
|
64);
|
|
} else {
|
|
gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void* data, size_t size) {
|
|
for (int k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
|
|
auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
|
|
auto cbuffer = weightable._weights->cbuffer().as<const uint8_t*>() + rowOffset;
|
|
auto u8Data = reinterpret_cast<uint8_t*>(data) + rowOffset;
|
|
for (int j = 0; j < transposedCols; j++) {
|
|
for (int i = 0; i < transposedRows; i++) {
|
|
auto offsetWrite = (transposedRows * j + i) * weightable.precision.size();
|
|
auto offsetRead = (i * transposedCols + j) * weightable.precision.size();
|
|
if (size < rowOffset + offsetWrite) {
|
|
// zero out dest if error detected
|
|
memset(data, 0, size);
|
|
THROW_GNA_EXCEPTION << "Size error";
|
|
}
|
|
ie_memcpy(u8Data + offsetWrite, size - rowOffset - offsetWrite,
|
|
cbuffer + offsetRead, weightable.precision.size());
|
|
}
|
|
}
|
|
}
|
|
}, 64);
|
|
}
|
|
} else {
|
|
if (transpose) {
|
|
THROW_GNA_EXCEPTION << "transposed weights with non zero padding not yet supported";
|
|
}
|
|
auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
|
|
auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
|
|
auto paddedWeightsSize = paddedWeights * weightable.precision.size();
|
|
|
|
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
|
for (int i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
|
|
ie_memcpy(data, size,
|
|
weightable._weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * weightable.precision.size(),
|
|
num_rows_in * weightable.precision.size());
|
|
data = reinterpret_cast<uint8_t*>(data) + (num_rows_in + num_padding) * weightable.precision.size();
|
|
}
|
|
}, 64);
|
|
}
|
|
|
|
if (weightable._biases) {
|
|
gnamem->readonly().push_ptr(ptr_biases,
|
|
weightable._biases->cbuffer().as<const void*>(),
|
|
weightable._biases->byteSize(),
|
|
64);
|
|
} else {
|
|
// in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
|
|
if (useBiasConnection) {
|
|
gnamem->readonly().push_value(ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
|
|
} else {
|
|
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
|
|
}
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr layer, void* ptrWeights, size_t offset, bool isQuantized) {
|
|
IE_ASSERT(!layer->outData.empty());
|
|
IE_ASSERT(!layer->insData.empty());
|
|
auto outputs = *layer->outData.begin();
|
|
auto inputs = layer->insData.begin()->lock();
|
|
|
|
uint32_t num_rows_in = InferenceEngine::details::product(++begin(inputs->getDims()), end(inputs->getDims()));
|
|
uint32_t num_rows_out = InferenceEngine::details::product(++begin(outputs->getDims()), end(outputs->getDims()));
|
|
|
|
if (!ptrWeights) {
|
|
THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
|
|
}
|
|
|
|
gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
|
|
int out = 0;
|
|
for (int input = offset; input < num_rows_out + offset; ++input) {
|
|
auto mem_ptr = reinterpret_cast<uint8_t*>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
|
|
if (!isQuantized) {
|
|
auto float_ptr = reinterpret_cast<float*>(mem_ptr);
|
|
*float_ptr = 1.0f;
|
|
} else {
|
|
auto int_ptr = reinterpret_cast<uint16_t*>(mem_ptr);
|
|
*int_ptr = 1;
|
|
}
|
|
++out;
|
|
}
|
|
}, 64);
|
|
}
|
|
|
|
void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer*> (layer.get());
|
|
|
|
if (filterLayer == nullptr) {
|
|
return;
|
|
}
|
|
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
IE_ASSERT(!layer->outData.empty());
|
|
IE_ASSERT(!layer->insData.empty());
|
|
auto outputs = *layer->outData.begin();
|
|
auto inputs = layer->insData.begin()->lock();
|
|
|
|
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
|
|
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
|
|
auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
|
|
// number of rows we handled by inserting copy layer
|
|
uint32_t num_rows_copied = 0;
|
|
// in case of left alignment succeed, but due to number of elements not multiple of 8 we need to insert align_filter
|
|
// we are improving it by inserting copy layer of size that covers most of elements - remained max of 32x31 affine filter
|
|
if (policy.ConcatAlignmentPolicy == Policy::ConcatAlignment::FAST && 0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
|
|
// can we use copy at all
|
|
num_rows_copied = ALIGN(num_rows_in, 32) - 32;
|
|
|
|
auto orientation = kDnnInterleavedOrientation;
|
|
|
|
auto& copyComponent = dnnComponents.addComponent(layer->name + "_synthetic_copy", "copy");
|
|
|
|
dnn->InitCopyComponent(copyComponent,
|
|
orientation,
|
|
num_rows_copied,
|
|
num_columns_in,
|
|
num_rows_copied,
|
|
num_columns_in,
|
|
inputs->getPrecision().size(),
|
|
inputs->getPrecision().size(),
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
num_rows_copied,
|
|
num_columns_in,
|
|
ptr_inputs,
|
|
ptr_outputs);
|
|
|
|
|
|
size_t num_data_bytes_in = num_rows_copied * num_rows_copied * num_columns_in
|
|
* inputs->getPrecision().size();
|
|
// need to reserve full tensor so using original size with assumption of identity activation attached to filter lateron
|
|
size_t num_data_bytes_out = num_rows_out * num_columns_in * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in);
|
|
auto isNonFunctional = [](CNNLayerPtr l) {
|
|
return LayerInfo(l).isNonFunctional();
|
|
};
|
|
auto identity = CNNNetGetNextLayerSkipCertain(layer, 0, 0, isNonFunctional);
|
|
connectOutput(identity.first, ptr_outputs, num_data_bytes_out);
|
|
|
|
num_rows_in -= num_rows_copied;
|
|
num_rows_out -= num_rows_copied;
|
|
}
|
|
filterLayer->params["rows_copied_offset"] = std::to_string(num_rows_copied * inputs->getPrecision().size());
|
|
|
|
|
|
auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
|
|
|
|
dnn->InitAffineComponent(currentComponent,
|
|
num_rows_in + num_padding,
|
|
num_columns_in,
|
|
num_rows_out,
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
filterLayer->_weights->getTensorDesc().getPrecision().size(),
|
|
biasPrecision.size(),
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases,
|
|
false);
|
|
|
|
size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
|
|
size_t num_data_bytes_in = num_columns_in *
|
|
ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
|
|
{
|
|
auto weightsElementSize = filterLayer->_weights->getTensorDesc().getPrecision().size();
|
|
auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
|
|
auto paddedWeights = elementsIn * num_rows_out;
|
|
auto paddedWeightsSize = paddedWeights * weightsElementSize;
|
|
|
|
// TODO: this can be improved to not generate unneeded weights at all
|
|
|
|
size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize;
|
|
size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize;
|
|
|
|
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
|
size_t roffset = weights_offset;
|
|
size_t woffset = 0;
|
|
for (int i = 0; i < num_rows_out && size >= woffset; i++) {
|
|
ie_memcpy(reinterpret_cast<uint8_t*>(data) + woffset,
|
|
size - woffset,
|
|
filterLayer->_weights->cbuffer().as<const uint8_t*>() + roffset,
|
|
num_rows_in * weightsElementSize);
|
|
roffset += weights_stride;
|
|
woffset += elementsIn * weightsElementSize;
|
|
}
|
|
}, 64);
|
|
}
|
|
|
|
if (filterLayer->_biases) {
|
|
gnamem->readonly().push_ptr(ptr_biases,
|
|
filterLayer->_biases->cbuffer().as<const void*>(),
|
|
filterLayer->_biases->byteSize(),
|
|
64);
|
|
} else {
|
|
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer*> (layer.get());
|
|
|
|
if (filterLayer == nullptr) {
|
|
return;
|
|
}
|
|
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
|
|
if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
|
|
THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
|
|
}
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
void* ptr_weights = nullptr;
|
|
void* ptr_biases = nullptr;
|
|
|
|
IE_ASSERT(!layer->outData.empty());
|
|
IE_ASSERT(!layer->insData.empty());
|
|
auto outputs = *layer->outData.begin();
|
|
auto inputs = layer->insData.begin()->lock();
|
|
|
|
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
|
|
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
|
|
|
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
|
auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
|
|
|
|
dnn->InitAffineComponent(currentComponent,
|
|
num_rows_in + num_padding,
|
|
num_columns_in,
|
|
num_rows_out,
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
filterLayer->_weights->getTensorDesc().getPrecision().size(),
|
|
biasPrecision.size(),
|
|
quantized == nullptr ? 1 : quantized->_weights_quant.scale,
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_weights,
|
|
ptr_biases,
|
|
false);
|
|
|
|
size_t num_data_bytes_out =
|
|
InferenceEngine::details::product(
|
|
begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
|
|
|
size_t num_data_bytes_in = num_columns_in *
|
|
ALIGN(num_rows_in, 8) * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
|
|
if (num_padding == 0) {
|
|
gnamem->readonly().push_ptr(ptr_weights,
|
|
filterLayer->_weights->cbuffer().as<const void*>(),
|
|
filterLayer->_weights->byteSize(),
|
|
64);
|
|
} else {
|
|
auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
|
|
auto paddedWeights = elementsIn * num_rows_out;
|
|
auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
|
|
|
|
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
|
size_t offset = 0;
|
|
for (int i = 0; i < num_rows_out && size >= offset; i++) {
|
|
ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
|
|
filterLayer->_weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * filterLayer->precision.size(),
|
|
num_rows_in* filterLayer->precision.size());
|
|
offset += (num_rows_in + num_padding) * filterLayer->precision.size();
|
|
}
|
|
}, 64);
|
|
}
|
|
|
|
if (filterLayer->_biases) {
|
|
gnamem->readonly().push_ptr(ptr_biases,
|
|
filterLayer->_biases->cbuffer().as<const void*>(),
|
|
filterLayer->_biases->byteSize(),
|
|
64);
|
|
} else {
|
|
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
auto* generic = dynamic_cast<GenericLayer*>(layer.get());
|
|
std::string type;
|
|
std::vector<intel_pwl_segment_t> ptr_pwl_segments;
|
|
uint32_t num_rows;
|
|
uint32_t num_columns;
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
|
|
do {
|
|
if (generic == nullptr) {
|
|
type = layer->type;
|
|
break;
|
|
}
|
|
|
|
if (InferenceEngine::details::CaselessEq<string>()(layer->type, "activation")) {
|
|
type = generic->GetParamAsString("type");
|
|
break;
|
|
} else {
|
|
type = layer->type;
|
|
break;
|
|
}
|
|
} while (false);
|
|
|
|
IE_ASSERT(!layer->insData.empty());
|
|
IE_ASSERT(!layer->outData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
auto outputs = *layer->outData.begin();
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
|
float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.scale : 1.0f;
|
|
|
|
auto orientation = kDnnInterleavedOrientation;
|
|
|
|
if (inputs->getDims().size() == 4) {
|
|
uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
|
|
uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
|
|
uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
|
|
|
|
num_columns = (w_dim_in == 1) ? h_dim_in * c_dim_in : w_dim_in * c_dim_in;
|
|
num_rows = 1;
|
|
} else {
|
|
num_columns = FROM_IR_DIM(inputs, 2);
|
|
num_rows = FROM_IR_DIM(inputs, 1);
|
|
}
|
|
|
|
// TODO: solve this by layer level transformations
|
|
auto concatAlignFilter = CNNNetPrevLayer(layer, 0);
|
|
if (LayerInfo(concatAlignFilter).isConcatAlignFilter()) {
|
|
auto rowsCopiedOffset = concatAlignFilter->GetParamAsInt("rows_copied_offset");
|
|
if (rowsCopiedOffset != 0) {
|
|
num_rows -= rowsCopiedOffset / outputs->getPrecision().size();
|
|
layer->params["output_offset"] = std::to_string(rowsCopiedOffset);
|
|
}
|
|
}
|
|
|
|
size_t num_data_bytes_out = num_columns * num_rows * outputs->getPrecision().size();
|
|
size_t num_data_bytes_in = num_columns * num_rows * inputs->getPrecision().size();
|
|
|
|
static InferenceEngine::details::caseless_unordered_map<std::string, DnnActivationType> supportedActivations = {
|
|
{"sigmoid", kActSigmoid},
|
|
{"tanh", kActTanh},
|
|
{"relu", kActRelu},
|
|
{"leakyrelu", kActLeakyRelu},
|
|
{"clamp", kActKaldiLstmClipping},
|
|
{"exp", kActExp},
|
|
{"log", kActLog},
|
|
{"sign", kActSign},
|
|
{"abs", kActAbs},
|
|
{"neglog", kActNegLog},
|
|
{"neghalflog", kActNegHalfLog},
|
|
{"identity", kActIdentity},
|
|
{"softsign", kActSoftSign}
|
|
};
|
|
|
|
auto it = supportedActivations.find(type);
|
|
if (it == supportedActivations.end()) {
|
|
THROW_GNA_EXCEPTION << "Activation function type not yet supported: " << type;
|
|
}
|
|
auto activation_type = DnnActivation::fromType(it->second);
|
|
if (it->second == kActRelu) {
|
|
auto reluLayer = dynamic_cast<ReLULayer*>(layer.get());
|
|
activation_type.negative_slope = reluLayer != nullptr ? reluLayer->negative_slope : 0.0f;
|
|
} else {
|
|
activation_type.negative_slope = 0.0f;
|
|
}
|
|
|
|
string actName = "unknown";
|
|
|
|
#ifdef PLOT
|
|
#define GET_ACTIVATION_NAME(name)\
|
|
case name:\
|
|
actName = #name;\
|
|
break
|
|
switch (activation_type) {
|
|
GET_ACTIVATION_NAME(kActSigmoid);
|
|
GET_ACTIVATION_NAME(kActTanh);
|
|
GET_ACTIVATION_NAME(kActRelu);
|
|
GET_ACTIVATION_NAME(kActLeakyRelu);
|
|
GET_ACTIVATION_NAME(kActKaldiLstmClipping);
|
|
GET_ACTIVATION_NAME(kActIdentity);
|
|
GET_ACTIVATION_NAME(kActSoftSign);
|
|
GET_ACTIVATION_NAME(kActCustom);
|
|
GET_ACTIVATION_NAME(kActExp);
|
|
GET_ACTIVATION_NAME(kActLog);
|
|
GET_ACTIVATION_NAME(kActSign);
|
|
GET_ACTIVATION_NAME(kActAbs);
|
|
GET_ACTIVATION_NAME(kActNegLog);
|
|
GET_ACTIVATION_NAME(kActNegHalfLog);
|
|
default: break;
|
|
}
|
|
#endif
|
|
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, actName);
|
|
|
|
intel_pwl_segment_t* ptr_pwl_segments_target = nullptr;
|
|
|
|
if (!gnaFlags->sw_fp32) {
|
|
// TODO: generalize activation function code
|
|
// now that scale factors are known, create PWL approximations to activation functions
|
|
if (gnaFlags->uniformPwlDesign) {
|
|
switch (activation_type) {
|
|
case kActSigmoid:ptr_pwl_segments.resize(SIGMOID_NUM_SEGMENTS);
|
|
break;
|
|
case kActTanh:ptr_pwl_segments.resize(TANH_NUM_SEGMENTS);
|
|
break;
|
|
case kActRelu:ptr_pwl_segments.resize(RELU_NUM_SEGMENTS);
|
|
break;
|
|
case kActLeakyRelu:ptr_pwl_segments.resize(RELU_NUM_SEGMENTS);
|
|
break;
|
|
case kActKaldiLstmClipping:
|
|
case kActIdentity:ptr_pwl_segments.resize(IDENTITY_NUM_SEGMENTS);
|
|
break;
|
|
case kActSoftSign:ptr_pwl_segments.resize(SOFTSIGN_NUM_SEGMENTS);
|
|
break;
|
|
case kActCustom:
|
|
default:
|
|
THROW_GNA_EXCEPTION << "Activation function type not yet supported " << activation_type;
|
|
}
|
|
PwlDesign16(activation_type,
|
|
&*ptr_pwl_segments.begin(),
|
|
static_cast<uint32_t>(ptr_pwl_segments.size()),
|
|
input_pwl_scale_factor,
|
|
output_pwl_scale_factor);
|
|
} else {
|
|
PwlDesignOpt16(activation_type,
|
|
ptr_pwl_segments,
|
|
input_pwl_scale_factor,
|
|
output_pwl_scale_factor);
|
|
}
|
|
ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
|
|
}
|
|
|
|
dnn->InitPiecewiseLinearComponent(currentComponent,
|
|
activation_type,
|
|
orientation,
|
|
num_rows,
|
|
num_columns,
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
ptr_pwl_segments.size(),
|
|
output_pwl_scale_factor,
|
|
input_pwl_scale_factor,
|
|
ptr_inputs,
|
|
ptr_outputs,
|
|
ptr_pwl_segments_target);
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
|
|
if (ptr_pwl_segments_target != nullptr) {
|
|
gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
|
|
&ptr_pwl_segments.front(),
|
|
ptr_pwl_segments.size() * sizeof(intel_pwl_segment_t),
|
|
64);
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|
if (LayerInfo(layer).isTrivialPermute()) {
|
|
return;
|
|
}
|
|
auto layerOrder = layer->GetParamAsInts("order");
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
IE_ASSERT(!layer->insData.empty());
|
|
auto inputs = layer->insData.begin()->lock();
|
|
auto inputsOrder = inputs->getTensorDesc().getDims();
|
|
auto outputs = layer->outData.front();
|
|
|
|
// squeeze order vector
|
|
SizeVector squeezedInputOrder;
|
|
for (auto input_shape : inputsOrder) {
|
|
if (input_shape != 1) squeezedInputOrder.push_back(input_shape);
|
|
}
|
|
SizeVector squeezedOutputOrder;
|
|
for (auto output_shape : layerOrder) {
|
|
if (output_shape != 0) squeezedOutputOrder.push_back(output_shape);
|
|
}
|
|
|
|
void* ptr_inputs = nullptr;
|
|
void* ptr_outputs = nullptr;
|
|
|
|
if (squeezedInputOrder.size() > 2) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << "unsupported permute (requested transpose is not 2D)";
|
|
}
|
|
|
|
if (std::min(squeezedInputOrder[0], squeezedInputOrder[1]) > 8) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << "unsupported permute (minor dimension="
|
|
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
|
|
}
|
|
|
|
// now this can be run on GNA
|
|
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
|
|
if (ALIGN(squeezedInputOrder[1], 8) != squeezedInputOrder[1]) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << "unsupported permute (row size not a multiple of 8)";
|
|
} else {
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
|
|
dnn->InitInterleaveComponent(currentComponent,
|
|
squeezedInputOrder[0],
|
|
squeezedInputOrder[1],
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
(quantized == nullptr) ? 1.0f : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs);
|
|
}
|
|
|
|
} else { // deinterleave case
|
|
if (ALIGN(squeezedInputOrder[0], 8) != squeezedInputOrder[0]) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) << "[GNA plugin] unsupported permute (column size not a multiple of 8)";
|
|
} else {
|
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
|
|
dnn->InitDeinterleaveComponent(currentComponent,
|
|
squeezedInputOrder[0],
|
|
squeezedInputOrder[1],
|
|
inputs->getPrecision().size(),
|
|
outputs->getPrecision().size(),
|
|
quantized == nullptr ? 1 : quantized->_dst_quant.scale,
|
|
ptr_inputs,
|
|
ptr_outputs);
|
|
}
|
|
}
|
|
|
|
size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
|
|
begin(outputs->getDims()), end(outputs->getDims())), 8)
|
|
* outputs->getPrecision().size();
|
|
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
|
|
|
|
connectInput(layer, ptr_inputs, num_data_bytes_in);
|
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
|
}
|
|
|
|
void SKIP(GNAGraphCompiler*, CNNLayerPtr) {}
|
|
|
|
void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
|
static const LayersBuilder layersBuilder[] = {
|
|
{{"Input"}, [](GNAGraphCompiler*, CNNLayerPtr l) {}}, // skip input layers they are not used in GNA lib, only as a memory blobs
|
|
{{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
|
|
{{"ScaleShift"}, CREATE(DiagonalPrimitive)},
|
|
{{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
|
|
{{"ConcatAlignFilter"}, CREATE(ConcatAlignFilterPrimitive)},
|
|
{{"Const"}, CREATE(ConstPrimitive)},
|
|
{{"Eltwise"}, CREATE(EltwisePrimitive)}, // same as diagonal while weights are not taken from network, rather than from another output
|
|
{{"Split"}, SKIP}, // skip information about which part of prev layer need to consume handle during layer creation
|
|
{{"Slice"}, SKIP},
|
|
{{"link"}, SKIP},
|
|
{{"clamp",
|
|
"sigmoid",
|
|
"relu",
|
|
"tanh",
|
|
"identity",
|
|
"softsign",
|
|
"exp",
|
|
"log",
|
|
"sign",
|
|
"abs",
|
|
"neglog",
|
|
"neghalflog"},
|
|
CREATE(PWLPrimitive)},
|
|
{{"Convolution"}, CREATE(ConvolutionPrimitive)},
|
|
{{"Permute"}, CREATE(PermutePrimitive)}, // permute of certain form (2D transpose) can be assimilated in followed FC layer
|
|
{{"Pooling"}, CREATE(PoolingPrimitive)},
|
|
{{"Power"} , CREATE(PowerPrimitive)},
|
|
{{"Concat"}, CREATE(ConcatPrimitive)},
|
|
{{"Reshape"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
|
|
{{"Squeeze"}, SKIP}, // TODO: handled not in GNA but rather in GNA plugin
|
|
{{"Crop"}, CREATE(CropPrimitive)},
|
|
{{"Copy"}, CREATE(CopyPrimitive)},
|
|
{{"TensorIterator"}, SKIP},
|
|
{{"LSTMCell"}, SKIP}
|
|
};
|
|
auto it = LayersBuilder::getStorage().find(layer->type);
|
|
if (it != LayersBuilder::getStorage().end()) {
|
|
it->second(this, layer);
|
|
} else {
|
|
THROW_GNA_EXCEPTION << "Unsupported layer: " << layer->name << ":" << layer->type;
|
|
}
|
|
}
|
|
|
|
void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, size_t num_data_bytes_out) {
|
|
gnalog() << "Connecting output " << layer->name << " ...\n";
|
|
// in case of Memory Layer it's input allocated in meminput layer
|
|
if (layer->outData.size() == 1) {
|
|
for (int j = 0; j != layer->outData.front()->getInputTo().size(); j++) {
|
|
auto isNonFunctional = [](CNNLayerPtr l) {
|
|
return LayerInfo(l).isNonFunctional();
|
|
};
|
|
|
|
if (!CNNNetHasNextLayerSkipCertain(layer, 0, j, isNonFunctional)) {
|
|
continue;
|
|
}
|
|
auto nextLayer = CNNNetGetNextLayerSkipCertain(layer, 0, j, isNonFunctional);
|
|
|
|
if (!nextLayer.first) {
|
|
gnalog() << "for layer: " << layer->name << "outData[0] has non functional connection at " << j;
|
|
}
|
|
|
|
auto nextMemoryLayerIt =
|
|
std::find_if(begin(memory_connection), end(memory_connection),
|
|
[&](MemoryConnection::value_type &comp) {
|
|
return comp.second.getOutput()->name == nextLayer.first->name;
|
|
});
|
|
if (nextMemoryLayerIt != memory_connection.end()) {
|
|
auto &nextMemoryLayer = nextMemoryLayerIt->second;
|
|
// memory layer not yet initialized
|
|
if (nextMemoryLayer.reserved_size == 0) {
|
|
auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
|
|
|
|
gnamem->reserve_ptr(&nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
|
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
|
|
|
|
nextMemoryLayer.reserved_size = ALIGN64(memorySize);
|
|
} else {
|
|
IE_ASSERT(nextMemoryLayer.reserved_size >= ALIGN64(num_data_bytes_out));
|
|
gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// if one of next direct or via split layers is concat...
|
|
auto concatChild = [](CNNLayerPtr layer) {
|
|
CNNLayerPtr concat;
|
|
for (auto &&outLayer : layer->outData.front()->getInputTo()) {
|
|
auto nextLayer = outLayer.second;
|
|
if (LayerInfo(nextLayer).isConcat()) {
|
|
concat = nextLayer;
|
|
}
|
|
}
|
|
return concat;
|
|
};
|
|
auto splitChild = [](CNNLayerPtr layer) {
|
|
std::list<CNNLayerPtr> split;
|
|
for (auto &&outLayer : layer->outData.front()->getInputTo()) {
|
|
auto nextLayer = outLayer.second;
|
|
if (LayerInfo(nextLayer).isSplit() || LayerInfo(nextLayer).isNonFunctional()) {
|
|
split.push_back(nextLayer);
|
|
}
|
|
}
|
|
return split;
|
|
};
|
|
|
|
std::list<CNNLayerPtr> splits;
|
|
auto concat = concatChild(layer);
|
|
auto concatFather = layer;
|
|
if (!concat) {
|
|
splits = splitChild(layer);
|
|
}
|
|
|
|
while (!concat && !splits.empty()) {
|
|
auto firstSplit = splits.front();
|
|
concat = concatChild(firstSplit);
|
|
// now concat prev layer would be this one
|
|
concatFather = firstSplit;
|
|
if (concat) {
|
|
break;
|
|
}
|
|
// inserting into front of queue alow DFS simulation while searching
|
|
splits.pop_front();
|
|
auto nexSplits = splitChild(firstSplit);
|
|
splits.insert(splits.begin(), nexSplits.begin(), nexSplits.end());
|
|
}
|
|
|
|
if (concat) {
|
|
// concat father might be non functional - in that case lets skip it
|
|
auto concatFatherActual =
|
|
LayerInfo(concatFather).isNonFunctional() ?
|
|
CNNNetPrevLayerSkipCertain(concatFather, 0, [](CNNLayerPtr l) {
|
|
return LayerInfo(l).isNonFunctional();
|
|
}) : concatFather;
|
|
|
|
auto& name = concatFatherActual->name;
|
|
// we look for this concat layer pointer in extra concat map
|
|
auto concatLayerInfo = concat_connection.find(concat->name);
|
|
|
|
if (concatLayerInfo == concat_connection.end()) {
|
|
THROW_GNA_EXCEPTION << "Cannot find corresponding concat layer: " << concat->name;
|
|
}
|
|
auto &concatLayerInfoItem = concatLayerInfo->second;
|
|
|
|
// find this input in vector sum all outputs in primitive
|
|
auto it = std::find_if(concatLayerInfoItem.concatInputLayers.begin(),
|
|
concatLayerInfoItem.concatInputLayers.end(),
|
|
[&name](GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) {
|
|
return item.name == name;
|
|
});
|
|
if (it != concatLayerInfoItem.concatInputLayers.end()) {
|
|
// reserve full size for concat
|
|
if (!concatLayerInfoItem.output_allocation_flag) {
|
|
// check if this concat is being included by other one
|
|
// by going thru each concat and checking inputs
|
|
auto included =
|
|
std::find_if(concat_connection.begin(),
|
|
concat_connection.end(),
|
|
[&concatLayerInfo]
|
|
(const std::pair<std::string, GNAPluginNS::GNAConcatLayer> &concatItem) -> bool {
|
|
auto it = std::find_if(concatItem.second.concatInputLayers.begin(),
|
|
concatItem.second.concatInputLayers.end(),
|
|
[&concatLayerInfo]
|
|
(const GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
|
|
return item.name == concatLayerInfo->first;
|
|
});
|
|
return it != concatItem.second.concatInputLayers.end();
|
|
});
|
|
if (included == concat_connection.end()) {
|
|
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
|
|
|
|
for (auto &&inputLayer : concatLayerInfoItem.concatInputLayers) {
|
|
if (InferenceEngine::details::CaselessEq<std::string>()
|
|
(inputLayer.name, "input")) {
|
|
inputDesc->bytes_allocated_for_input[inputLayer.name] = inputLayer.tensorSize;
|
|
}
|
|
}
|
|
concatLayerInfoItem.input_allocated = true;
|
|
}
|
|
concatLayerInfo->second.output_allocation_flag = true;
|
|
}
|
|
// output offset precalculated to serve GNAAlignment requirements
|
|
auto output_offset = it->offset;
|
|
if (layer->params.find("output_offset") != layer->params.end()) {
|
|
output_offset = layer->GetParamAsInt("output_offset");
|
|
}
|
|
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, output_offset);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
intel_dnn_component_t * unused_input = nullptr;
|
|
if (gnaFlags->compact_mode) {
|
|
unused_input = find_first_unused_input(layer);
|
|
if (unused_input != nullptr) {
|
|
gnamem->bind_ptr(ptr, &unused_input->ptr_inputs, 0, ALIGN64(num_data_bytes_out));
|
|
}
|
|
}
|
|
// cannot reuse suitable input
|
|
if (unused_input == nullptr) {
|
|
gnamem->reserve_ptr(ptr, ALIGN64(num_data_bytes_out), 64);
|
|
}
|
|
}
|
|
|
|
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx) {
|
|
// selecting particular input layers
|
|
// auto prevLayer = CNNNetPrevLayer(layer, idx);
|
|
auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) {
|
|
return LayerInfo(l).isNonFunctional();
|
|
});
|
|
|
|
|
|
gnalog() << "Connecting input " << layer->name << " to " << prevLayer->name << " ...\n";
|
|
|
|
// real input not a memory input
|
|
if (LayerInfo(prevLayer).isInput()) {
|
|
if (0 == inputDesc->bytes_allocated_for_input[prevLayer->name]) {
|
|
// real allocation pointer will be kept in ptr not in ptf_inputs_global
|
|
if (offset < 0) {
|
|
gnamem->push_value(ptr,
|
|
static_cast<uint8_t>(0),
|
|
num_data_bytes_in,
|
|
64);
|
|
} else {
|
|
gnamem->push_value(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(),
|
|
static_cast<uint8_t>(0),
|
|
num_data_bytes_in,
|
|
64);
|
|
}
|
|
|
|
inputDesc->bytes_allocated_for_input[prevLayer->name] = num_data_bytes_in;
|
|
}
|
|
if (ALIGN(num_data_bytes_in, 64) > ALIGN(inputDesc->bytes_allocated_for_input[prevLayer->name], 64)) {
|
|
THROW_GNA_EXCEPTION
|
|
<< "Layer: " << layer->name
|
|
<< " Cannot bind pointer to already allocated input(" << prevLayer->name
|
|
<< "), due to size_allocated=" << inputDesc->bytes_allocated_for_input[prevLayer->name]
|
|
<< ", and size_requested=" << num_data_bytes_in;
|
|
}
|
|
|
|
if (offset >= 0) {
|
|
gnamem->bind_ptr(ptr, &inputDesc->getPtrInputsGlobal(prevLayer->name).front(), offset);
|
|
} else {
|
|
gnamem->bind_ptr(&inputDesc->getPtrInputsGlobal(prevLayer->name).front(), ptr, -offset);
|
|
}
|
|
|
|
return prevLayer;
|
|
}
|
|
// const input
|
|
if (LayerInfo(prevLayer).isConst()) {
|
|
if (offset >= 0) {
|
|
gnamem->bind_ptr(ptr, const_connections[prevLayer->name], offset);
|
|
} else {
|
|
gnamem->bind_ptr(const_connections[prevLayer->name], ptr, -offset);
|
|
}
|
|
|
|
return prevLayer;
|
|
}
|
|
|
|
LayerInfo layerInfoObj(prevLayer);
|
|
|
|
// connecting to split/slice splitiing layers
|
|
if (layerInfoObj.isSplit() || layerInfoObj.isSlice()) {
|
|
auto& splittingLayer = prevLayer;
|
|
auto& splitName = splittingLayer->name;
|
|
|
|
// we look for this split layer pointer in pre calculated map
|
|
auto splitLayerInfo = split_connection.find(splitName);
|
|
|
|
if (splitLayerInfo != split_connection.end()) {
|
|
auto &splitLayerInfoItem = splitLayerInfo->second;
|
|
// find this input in vector sum all outputs in primitive
|
|
auto it = std::find_if(splitLayerInfoItem.splitOutputLayers.begin(),
|
|
splitLayerInfoItem.splitOutputLayers.end(),
|
|
[&idx, &layer](GNAPluginNS::GNASplitLayer::SplitConnectedLayerInfo &item) {
|
|
return item.connectedTo == layer && item.insDataIdx == idx;
|
|
});
|
|
|
|
if (it != splitLayerInfoItem.splitOutputLayers.end()) {
|
|
gnalog() << "Connecting " << splitName << " input \n";
|
|
auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
|
|
gnalog() << "Connected \n";
|
|
return res;
|
|
}
|
|
}
|
|
THROW_GNA_EXCEPTION << prevLayer->type << " layer: " << splitName << " is not included in extra map";
|
|
} else if (layerInfoObj.isConcat()) {
|
|
auto concatLayerInfo = concat_connection.find(
|
|
prevLayer->name);
|
|
if (concatLayerInfo != concat_connection.end()) {
|
|
auto & concatLayerInfoItem = concatLayerInfo->second;
|
|
// dnnLayer that is input for concat layer
|
|
gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
|
|
// return layer over concat
|
|
return CNNNetPrevLayer(prevLayer);
|
|
}
|
|
} else if (layerInfoObj.isCrop()) {
|
|
auto cropLayerInfo = crop_connection.find(
|
|
prevLayer->name);
|
|
if (cropLayerInfo != crop_connection.end()) {
|
|
auto & cropLayerInfoItem = cropLayerInfo->second;
|
|
gnamem->bind_ptr(ptr, &cropLayerInfoItem.gna_ptr, offset);
|
|
return CNNNetPrevLayer(prevLayer);
|
|
}
|
|
}
|
|
auto prevDnnLayer = dnnComponents.findComponent(prevLayer);
|
|
|
|
// check for generic prev layer
|
|
if (prevDnnLayer != nullptr) {
|
|
gnamem->bind_ptr(ptr, &prevDnnLayer->ptr_outputs, offset);
|
|
return prevLayer;
|
|
}
|
|
|
|
auto prevMemoryLayer =
|
|
std::find_if(begin(memory_connection), end(memory_connection), [&](MemoryConnection::value_type &comp) {
|
|
return comp.second.getInput()->name == prevLayer->name;
|
|
});
|
|
if (prevMemoryLayer != memory_connection.end()) {
|
|
// dnnLayer that is input for memory output layer
|
|
// TODO: this is duplicate with connect output
|
|
auto& memoryLayer = prevMemoryLayer->second;
|
|
if (memoryLayer.reserved_size == 0) {
|
|
auto memorySize = InferenceEngine::details::product(memoryLayer.getDims()) * memoryLayer.elementSizeBytes();
|
|
|
|
// negative offset used for indicate that memory layer should be bound to given buffer
|
|
if (offset >= 0) {
|
|
memorySize = std::max(memorySize, num_data_bytes_in);
|
|
gnamem->reserve_ptr(&memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
|
|
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
|
} else {
|
|
if (num_data_bytes_in > memorySize) {
|
|
THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
|
|
<< num_data_bytes_in << " is more then state tensor size of: " << memorySize;
|
|
}
|
|
gnamem->bind_ptr(&memoryLayer.gna_ptr, ptr, -offset);
|
|
}
|
|
|
|
memoryLayer.reserved_size = ALIGN64(memorySize);
|
|
} else {
|
|
IE_ASSERT(memoryLayer.reserved_size >= ALIGN64(num_data_bytes_in));
|
|
gnamem->bind_ptr(ptr, &memoryLayer.gna_ptr, offset);
|
|
}
|
|
|
|
return prevLayer;
|
|
}
|
|
|
|
// several layers are to be skipped right now
|
|
if (LayerInfo(prevLayer).isNonFunctional()) {
|
|
gnalog() << "Skipping non functional layer: " << prevLayer->name << "\n";
|
|
return connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0);
|
|
}
|
|
|
|
// permute layer resulted in trivial permute
|
|
if (LayerInfo(prevLayer).isPermute()) {
|
|
if (!LayerInfo(prevLayer).isTrivialPermute()) {
|
|
// we should have GNA primitive for it
|
|
THROW_GNA_EXCEPTION << "missed gna primitive for permute: " << prevLayer->name;
|
|
}
|
|
gnalog() << "Skipping trivial permute layer: " << prevLayer->name << "\n";
|
|
return connectInput(prevLayer, ptr, num_data_bytes_in, offset, 0);
|
|
}
|
|
|
|
|
|
THROW_GNA_EXCEPTION << "Cannot connect input for: " << layer->name;
|
|
}
|
|
|
|
void GNAGraphCompiler::Reset() {
|
|
for (auto && memLayer : memory_connection) {
|
|
std::memset(memLayer.second.gna_ptr, 0, memLayer.second.reserved_size);
|
|
}
|
|
for (auto && concatLayer : concat_connection) {
|
|
std::memset(concatLayer.second.gna_ptr, 0, concatLayer.second.reserved_size);
|
|
}
|
|
}
|