1315 lines
55 KiB
C++
1315 lines
55 KiB
C++
// Copyright (C) 2018-2020 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
#define NOMINMAX
|
|
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <cstring>
|
|
#include <list>
|
|
#include <algorithm>
|
|
#include <map>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <memory>
|
|
#include <utility>
|
|
#include <limits>
|
|
|
|
#include <legacy/graph_tools.hpp>
|
|
#include <cpp_interfaces/exception2status.hpp>
|
|
#include <legacy/net_pass.h>
|
|
#include <debug.h>
|
|
#include <gna/gna_config.hpp>
|
|
#include "gna_plugin_config.hpp"
|
|
#include <legacy/ie_util_internal.hpp>
|
|
#include "gna_plugin.hpp"
|
|
#include "optimizer/gna_pass_manager.hpp"
|
|
#include "layers/gna_layer_type.hpp"
|
|
#include "preprocessing.hpp"
|
|
#include "frontend/weights_converter.hpp"
|
|
#include "frontend/model_quantizer.hpp"
|
|
#include "gna_fused_iterator.hpp"
|
|
#include "backend/am_intel_dnn.hpp"
|
|
#include "memory/gna_allocator.hpp"
|
|
#include "memory/gna_memory_state.hpp"
|
|
#include "gna_model_serial.hpp"
|
|
#include "runtime/gna_float_runtime.hpp"
|
|
|
|
#if GNA_LIB_VER == 2
|
|
#include <gna2-model-api.h>
|
|
|
|
uint32_t ToByteSize(const Gna2DataType type) {
|
|
switch (type) {
|
|
case Gna2DataTypeInt8:
|
|
case Gna2DataTypeUint8:
|
|
return 1;
|
|
case Gna2DataTypeInt16:
|
|
case Gna2DataTypeUint16:
|
|
return 2;
|
|
case Gna2DataTypeInt32:
|
|
case Gna2DataTypeUint32:
|
|
return 4;
|
|
case Gna2DataTypeInt64:
|
|
case Gna2DataTypeUint64:
|
|
return 8;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
constexpr uint32_t GNAPluginNS::GNAPlugin::FAKE_REQUEST_CONFIG_ID;
|
|
#endif
|
|
using namespace InferenceEngine;
|
|
using namespace std;
|
|
using namespace GNAPluginNS;
|
|
using namespace InferenceEngine::details;
|
|
|
|
#ifdef __clang__
|
|
namespace InferenceEngine {
|
|
template<>
|
|
InferenceEngine::TBlob<intel_compound_bias_t, std::enable_if<true, void> >::~TBlob() { free(); }
|
|
}
|
|
#endif // __clang__
|
|
|
|
template <typename T, typename U>
|
|
void GNAPlugin::copyInputData(T *dst,
|
|
const U *src,
|
|
uint32_t num_frames,
|
|
uint32_t num_group,
|
|
uint32_t num_vector_elements,
|
|
uint32_t num_vector_stride,
|
|
intel_dnn_orientation_t orientation,
|
|
float scaleFactor) {
|
|
if (!dst || !src) {
|
|
return;
|
|
}
|
|
if (orientation == kDnnInterleavedOrientation) {
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
for (uint32_t j = 0; j < num_vector_elements; j++) {
|
|
if (!std::is_same<T, U>::value) {
|
|
dst[j * num_group + i] = GNAPluginNS::ConvertFloatToInt16(src[i * num_vector_elements + j] * scaleFactor);
|
|
} else {
|
|
dst[j * num_group + i] = src[i * num_vector_elements + j];
|
|
}
|
|
}
|
|
// pad to meet weight matrix row length requirement
|
|
for (uint32_t j = num_vector_elements; j < num_vector_stride; j++) {
|
|
dst[j * num_group + i] = 0;
|
|
}
|
|
}
|
|
// pad partial group
|
|
for (uint32_t i = num_frames; i < num_group; i++) {
|
|
for (uint32_t j = 0; j < num_vector_stride; j++) {
|
|
dst[j * num_group + i] = 0;
|
|
}
|
|
}
|
|
} else {
|
|
if (!std::is_same<T, U>::value) {
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
T *ptr_dst_vec = reinterpret_cast<T *>(dst) + i * num_vector_stride;
|
|
const U *ptr_src_vec = reinterpret_cast<const U *>(src) + i * num_vector_elements;
|
|
std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
|
|
for (int j=0; j < num_vector_elements; j++) {
|
|
ptr_dst_vec[j] = GNAPluginNS::ConvertFloatToInt16(ptr_src_vec[j] * scaleFactor);
|
|
}
|
|
}
|
|
|
|
} else {
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
void *ptr_dst_vec = reinterpret_cast<uint8_t *>(dst) + i * num_vector_stride * sizeof(T);
|
|
const void *ptr_src_vec = reinterpret_cast<const uint8_t *>(src) + i * num_vector_elements * sizeof(U);
|
|
std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
|
|
ie_memcpy(ptr_dst_vec, num_vector_elements * sizeof(T),
|
|
ptr_src_vec, num_vector_elements * sizeof(T));
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = num_frames; i < num_group; i++) {
|
|
void *ptr_dst_vec = reinterpret_cast<uint8_t *>(dst) + i * num_vector_stride * sizeof(T);
|
|
std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename T, typename U>
|
|
void GNAPlugin::copyInputDataWithSplit(T *const dst,
|
|
const U *src,
|
|
const GNASplitLayer& splitInfo,
|
|
size_t precision_size,
|
|
int idx) {
|
|
if (!dst || !src) {
|
|
return;
|
|
}
|
|
T *dst_ptr = dst;
|
|
const U *src_ptr = src;
|
|
precision_size = sizeof(T);
|
|
// we found split/slice layer connected to Input
|
|
for (auto&& outputLayer : splitInfo.splitOutputLayers) {
|
|
uint32_t begin = outputLayer.offset / precision_size;
|
|
uint32_t end = (outputLayer.offset + outputLayer.pure_size)/precision_size;
|
|
if (dst_ptr - dst >= end) {
|
|
// output layer with bind pointer as previous one. Skip
|
|
continue;
|
|
}
|
|
for (uint32_t i = begin; i < end; ++i) {
|
|
if (!std::is_same<T, U>::value) {
|
|
*(dst_ptr++) = GNAPluginNS::ConvertFloatToInt16(*(src_ptr++) * inputsDesc->getScaleFactor(idx));
|
|
} else {
|
|
*(dst_ptr++) = *(src_ptr++);
|
|
}
|
|
}
|
|
begin = end;
|
|
end = (outputLayer.offset + ALIGN64(outputLayer.pure_size))/precision_size;
|
|
std::memset(dst_ptr, 0, (end - begin )* sizeof(uint16_t));
|
|
dst_ptr += end - begin;
|
|
}
|
|
}
|
|
|
|
void GNAPlugin::ExportScores(void *ptr_dst,
|
|
const void *ptr_src,
|
|
intel_dnn_orientation_t orientation,
|
|
uint32_t num_frames,
|
|
uint32_t num_group,
|
|
uint32_t num_vector_elements,
|
|
uint32_t num_active_elements,
|
|
uint32_t num_vector_stride,
|
|
uint32_t num_bytes_per_element_input,
|
|
uint32_t num_bytes_per_element) {
|
|
// source scores are possibly padded to multiple of 8 and possibly interleaved
|
|
// rotate if necessary and only copy actual scores (not padding)
|
|
if (orientation == kDnnInterleavedOrientation) {
|
|
if (num_bytes_per_element == 2) {
|
|
int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
|
|
const int16_t *src = reinterpret_cast<const int16_t *>(ptr_src);
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
for (uint32_t j = 0; j < num_active_elements; j++) {
|
|
dst[i * num_vector_elements + j] = src[j * num_group + i];
|
|
}
|
|
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
|
|
dst[i * num_vector_elements + j] = 0;
|
|
}
|
|
}
|
|
} else if (num_bytes_per_element == 4) { // should work for both int and float
|
|
int32_t *dst = reinterpret_cast<int32_t *>(ptr_dst);
|
|
const int8_t *src = reinterpret_cast<const int8_t*>(ptr_src);
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
for (uint32_t j = 0; j < num_active_elements; j++) {
|
|
auto input_ptr = src + (j * num_group + i) * num_bytes_per_element_input;
|
|
auto dst_ptr = dst + (i * num_vector_elements + j);
|
|
|
|
switch (num_bytes_per_element_input) {
|
|
case 2 : {
|
|
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int16_t*>(input_ptr));
|
|
break;
|
|
}
|
|
case 4 : {
|
|
*dst_ptr = *reinterpret_cast<const int32_t *>(input_ptr);
|
|
break;
|
|
}
|
|
default:
|
|
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << num_bytes_per_element_input << "bytes";
|
|
}
|
|
}
|
|
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
|
|
dst[i * num_vector_elements + j] = 0;
|
|
}
|
|
}
|
|
} else {
|
|
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
|
|
}
|
|
} else {
|
|
if (num_bytes_per_element == 2) {
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
auto ptr_dst_vec = reinterpret_cast<uint8_t *>(ptr_dst) + i * num_vector_elements * sizeof(int16_t);
|
|
auto ptr_src_vec = reinterpret_cast<const uint8_t *>(ptr_src) + i * num_vector_stride * sizeof(int16_t);
|
|
memset(ptr_dst_vec, 0, num_vector_elements * sizeof(int16_t));
|
|
ie_memcpy(ptr_dst_vec, num_active_elements * sizeof(int16_t),
|
|
ptr_src_vec, num_active_elements * sizeof(int16_t));
|
|
}
|
|
} else if (num_bytes_per_element == 4) { // should work for both int and float
|
|
for (uint32_t i = 0; i < num_frames; i++) {
|
|
void *ptr_dst_vec = reinterpret_cast<uint8_t *>(ptr_dst) + i * num_vector_elements * sizeof(float);
|
|
const void *ptr_src_vec = reinterpret_cast<const uint8_t *>(ptr_src) + i * num_vector_stride * sizeof(float);
|
|
memset(ptr_dst_vec, 0, num_vector_elements * sizeof(float));
|
|
ie_memcpy(ptr_dst_vec, num_active_elements * sizeof(float),
|
|
ptr_src_vec, num_active_elements * sizeof(float));
|
|
}
|
|
} else {
|
|
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
|
|
}
|
|
}
|
|
}
|
|
|
|
void GNAPlugin::ImportFrames(
|
|
void *ptr_dst,
|
|
const void *ptr_src,
|
|
Precision input_precision,
|
|
float scaleFactor,
|
|
intel_dnn_orientation_t orientation,
|
|
uint32_t num_frames,
|
|
uint32_t num_group,
|
|
uint32_t num_vector_elements,
|
|
uint32_t num_vector_stride) {
|
|
if (orientation == kDnnInterleavedOrientation) {
|
|
// TODO : fix that as well
|
|
if (input_precision == Precision::U8) {
|
|
auto src = reinterpret_cast<const uint8_t *>(ptr_src);
|
|
auto dst = reinterpret_cast<int16_t *>(ptr_dst);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
} else if (input_precision.size() == 2) {
|
|
auto dst = reinterpret_cast<int16_t *>(ptr_dst);
|
|
auto src = reinterpret_cast<const int16_t *>(ptr_src);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
} else if (input_precision.size() == 4) {
|
|
if (!gnadevice) {
|
|
auto dst = reinterpret_cast<float *>(ptr_dst);
|
|
auto src = reinterpret_cast<const float *>(ptr_src);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
} else {
|
|
auto dst = reinterpret_cast<int16_t *>(ptr_dst);
|
|
auto src = reinterpret_cast<const float *>(ptr_src);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
}
|
|
}
|
|
} else {
|
|
if (input_precision == Precision::U8) {
|
|
auto src = reinterpret_cast<const uint8_t *>(ptr_src);
|
|
if (!gnadevice) {
|
|
auto dst = reinterpret_cast<float *>(ptr_dst);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
} else {
|
|
auto dst = reinterpret_cast<int16_t *>(ptr_dst);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
}
|
|
|
|
} else if (input_precision.size()== 2) {
|
|
auto dst = reinterpret_cast<int16_t *>(ptr_dst);
|
|
auto src = reinterpret_cast<const int16_t *>(ptr_src);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
} else if (input_precision.size() == 4) {
|
|
if (!gnadevice) {
|
|
auto dst = reinterpret_cast<float *>(ptr_dst);
|
|
auto src = reinterpret_cast<const float *>(ptr_src);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
} else {
|
|
auto dst = reinterpret_cast<uint16_t *>(ptr_dst);
|
|
auto src = reinterpret_cast<const float *>(ptr_src);
|
|
copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation, scaleFactor);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
GNAPlugin::GNAPlugin() {
|
|
Init();
|
|
UpdateFieldsFromConfig();
|
|
}
|
|
|
|
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
|
|
Init();
|
|
SetConfig(configMap);
|
|
}
|
|
|
|
void GNAPlugin::Init() {
|
|
dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN());
|
|
inputsDesc = std::make_shared<GNAPluginNS::InputDesc>(GNAPluginNS::InputDesc());
|
|
gnaFlags = std::make_shared<GNAPluginNS::GNAFlags>(GNAPluginNS::GNAFlags());
|
|
|
|
graphCompiler.setDNNPtr(dnn);
|
|
graphCompiler.setInputDescPtr(inputsDesc);
|
|
graphCompiler.setGNAFlagsPtr(gnaFlags);
|
|
}
|
|
|
|
void GNAPlugin::InitGNADevice() {
|
|
#if GNA_LIB_VER == 1
|
|
gnadevice = std::make_shared<GNADeviceHelper>(gnaFlags->gna_lib_async_threads_num,
|
|
gnaFlags->gna_openmp_multithreading,
|
|
gnaFlags->performance_counting);
|
|
#else
|
|
gnadevice = std::make_shared<GNADeviceHelper>(config.pluginGna2DeviceConsistent,
|
|
gnaFlags->gna_lib_async_threads_num,
|
|
gnaFlags->gna_openmp_multithreading,
|
|
gnaFlags->performance_counting);
|
|
#endif
|
|
size_t page_size_bytes = 4096;
|
|
gnamem = std::make_shared<gna_memory_type>(memory::make_polymorph<memory::GNAAllocator>(gnadevice), page_size_bytes);
|
|
graphCompiler.setGNAMemoryPtr(gnamem);
|
|
}
|
|
|
|
void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
|
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
|
|
if (_network.getFunction()) {
|
|
convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(_network);
|
|
}
|
|
InferenceEngine::ICNNNetwork &network = convertedNetwork ? *convertedNetwork : _network;
|
|
|
|
NetPass::ConvertPrecision(network, Precision::I64, Precision::I32);
|
|
NetPass::ConvertPrecision(network, Precision::U64, Precision::I32);
|
|
NetPass::ConvertPrecision(network, Precision::U32, Precision::I32);
|
|
|
|
// Check the input network
|
|
std::string error;
|
|
if (!AreLayersSupported(network, error)) {
|
|
THROW_GNA_EXCEPTION << error.c_str();
|
|
}
|
|
|
|
// network optimisation phases
|
|
int passIdx = 0;
|
|
auto run_passes = [&] (const CNNNetPtr& network, bool runBeforeCopy) {
|
|
auto passes = make_shared<PassManager>(PassManagerSettings{policy, runBeforeCopy}, network);
|
|
passes->registerPass<RemoveConstPass>();
|
|
passes->registerPass<UnrollTIPass>();
|
|
passes->registerPass<RemoveConstPass>();
|
|
passes->registerPass<InsertIdentityToLSTMCellPass>();
|
|
passes->registerPass<UnrollLSTMCellPass>();
|
|
passes->registerPass<RemoveSingleInputConcatPass>();
|
|
|
|
passes->registerPass<SubstitutePReluPass>();
|
|
passes->registerPass<SubstituteSoftSignPass>();
|
|
|
|
passes->registerPass<ReorderMaxPoolPass>();
|
|
passes->registerPass<EltwiseSplitOverChannelsPass>();
|
|
passes->registerPass<InsertSplitAligningFilterPass>();
|
|
|
|
passes->registerPass<InsertConcatAligningFilterPass>();
|
|
passes->registerPass<ReorderConcatInputsPass>();
|
|
if (policy.PermutePolicy != Policy::Permute::DISABLED) {
|
|
passes->registerPass<ReversePermutationsPass>();
|
|
}
|
|
if (policy.NHWCToNCHWPolicy != Policy::NHWCToNCHW::DISABLED) {
|
|
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
|
}
|
|
passes->registerPass<InsertIdentityLayerPass>();
|
|
passes->registerPass<InsertCopyLayerPass>();
|
|
passes->registerPass<InsertDiagonalLayerPass>();
|
|
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
|
passes->registerPass<SubstituteScaleShiftBroadCastPass>();
|
|
passes->registerPass<FuseMultipleIdentitiesPass>();
|
|
passes->registerPass<BroadcastConstPass>();
|
|
passIdx = passes->run(passIdx);
|
|
};
|
|
|
|
ICNNNetwork::Ptr newNet;
|
|
if (gnaFlags->sw_fp32) {
|
|
auto visitor = [&](InferenceEngine::CNNLayerPtr lp) {
|
|
transformLayer(lp, WeightsConverter());
|
|
return lp;
|
|
};
|
|
newNet = InferenceEngine::CNNNetCopy(network, visitor);
|
|
// to run all passes need to have two calls to pass manager
|
|
run_passes(newNet, true);
|
|
run_passes(newNet, false);
|
|
} else {
|
|
switch (config.gnaPrecision) {
|
|
case Precision::I16:
|
|
ModelQuantizer<QuantI16> q16;
|
|
newNet = q16.quantize(network, run_passes, inputsDesc->inputScaleFactors);
|
|
break;
|
|
case Precision::I8:
|
|
ModelQuantizer<QuantI8> q8;
|
|
newNet = q8.quantize(network, run_passes, inputsDesc->inputScaleFactors);
|
|
break;
|
|
default:
|
|
THROW_GNA_EXCEPTION << "no mans land for GNA precision";
|
|
break;
|
|
}
|
|
}
|
|
|
|
auto inputLayers = CNNNetGetAllInputLayers(*newNet);
|
|
|
|
#ifdef PLOT
|
|
std::ofstream file("gna_passes.dot");
|
|
saveGraphToDot(*newNet, file, [](const CNNLayerPtr layer,
|
|
ordered_properties &printed_properties,
|
|
ordered_properties &node_properties) {
|
|
// printing quantized params
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
if (!quantized) {
|
|
return;
|
|
}
|
|
printed_properties.emplace_back(
|
|
"scale factor", std::to_string(quantized->_dst_quant.scale));
|
|
});
|
|
#endif
|
|
|
|
auto sortedNet = CNNNetSortTopologicallyEx(*newNet, make_fuzed_order);
|
|
|
|
// passing policy to compiler
|
|
graphCompiler.setPolicy(policy);
|
|
|
|
if (sortedNet.empty()) {
|
|
THROW_GNA_EXCEPTION << "Sorted network is empty";
|
|
}
|
|
|
|
std::vector<CNNLayerPtr> sortedNoMem;
|
|
std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
|
|
// find all memory layers pairs and mark which one used as outputs
|
|
for (auto &layer : sortedNet) {
|
|
auto generic = dynamic_cast<GenericLayer *>(layer.get());
|
|
if (generic == nullptr) {
|
|
sortedNoMem.push_back(layer);
|
|
continue;
|
|
}
|
|
LayerInfo layerInfo(layer);
|
|
if (layerInfo.isMemory()) {
|
|
// collect all memory pairs
|
|
auto id = generic->GetParamAsString("id");
|
|
memoryPairs[id].resize(generic->GetParamAsInt("size"));
|
|
memoryPairs[id][generic->GetParamAsInt("index")] = layer;
|
|
continue;
|
|
} else if (layerInfo.isConcat()) {
|
|
graphCompiler.fillConcatConnections(layer);
|
|
} else if (layerInfo.isSplit() || layerInfo.isSlice()) {
|
|
graphCompiler.fillSplitConnections(layer);
|
|
}
|
|
sortedNoMem.push_back(layer);
|
|
}
|
|
|
|
// fill in extra storage with memory layers
|
|
graphCompiler.fillMemoryConnections(memoryPairs);
|
|
|
|
if (!graphCompiler.memory_connection.empty()) {
|
|
gnaFlags->gna_lib_async_threads_num = 1;
|
|
}
|
|
|
|
if (gnaFlags->sw_fp32) {
|
|
gnamem.reset(new gna_memory_type(memory::make_polymorph<std::allocator<uint8_t>>()));
|
|
graphCompiler.setGNAMemoryPtr(gnamem);
|
|
} else {
|
|
InitGNADevice();
|
|
}
|
|
|
|
// keep inputs information and create input primitives
|
|
newNet->getInputsInfo(inputsDataMap);
|
|
if (inputsDataMap.empty()) {
|
|
THROW_GNA_EXCEPTION << " No inputs for the topology";
|
|
}
|
|
|
|
// keep output dims
|
|
newNet->getOutputsInfo(outputsDataMap);
|
|
if (outputsDataMap.empty()) {
|
|
THROW_GNA_EXCEPTION << "No outputs for the topology";
|
|
}
|
|
|
|
for (auto && input : inputsDataMap) {
|
|
inputsDesc->getPtrInputsGlobal(input.first).resize(gnaFlags->gna_lib_async_threads_num);
|
|
}
|
|
|
|
// CreatingLayer primitives
|
|
for (auto & layer : sortedNoMem) {
|
|
graphCompiler.CreateLayerPrimitive(layer);
|
|
}
|
|
for (auto& inputLayer : inputLayers) {
|
|
auto layerInfo = LayerInfo(inputLayer);
|
|
if (layerInfo.isInput() && 0 == inputsDesc->bytes_allocated_for_input[inputLayer->name]) {
|
|
graphCompiler.connectOutput(inputLayer, &inputsDesc->getPtrInputsGlobal(inputLayer->name).front(), 0);
|
|
}
|
|
}
|
|
// TODO: graph might be static - should we support that
|
|
if (graphCompiler.dnnComponents.components.empty()) {
|
|
THROW_GNA_EXCEPTION << "No GNA primitives created based on topology. This might indicate trivial topology";
|
|
}
|
|
|
|
/// setting-up output layers information
|
|
outputsDesc.resize(outputsDataMap.size());
|
|
|
|
auto initOutput = [this]
|
|
(int idx, const intel_dnn_component_t & component, CNNLayerPtr layer) {
|
|
// auto idx = std::distance(outputsDataMap.begin(), outputPort);
|
|
auto & desc = outputsDesc[idx];
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
|
|
desc.orientation = component.orientation_out;
|
|
desc.num_bytes_per_element = component.num_bytes_per_output;
|
|
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
|
// TODO: this need to be fixed
|
|
desc.num_elements = component.num_rows_out;
|
|
|
|
// binding ptr for first infer request - then others will be setup during relocation
|
|
gnamem->bind_ptr(&desc.ptrs.front(), &component.ptr_outputs);
|
|
};
|
|
|
|
int portId = 0;
|
|
for (auto && outPort : outputsDataMap) {
|
|
// gets output layer pointer in original topology not in cloned
|
|
auto outLayer = getCreatorLayer(outPort.second).lock();
|
|
|
|
// Memory layers are not dnnComponents hence we need to make switch with identity layer
|
|
if (outLayer->type == "Memory") {
|
|
// traverse memory connection to find corresponding output_memory
|
|
for (auto && memConnection : graphCompiler.memory_connection) {
|
|
if (memConnection.second.getInput()->name == outLayer->name) {
|
|
// if connection is found, replace memory input layer with memory output layer
|
|
outLayer = memConnection.second.getOutput();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// searching for outData represented in GNA blob
|
|
// using ufs - upper first search
|
|
gnalog() << "[UFS] searching for : "<< outPort.first << " representation in GNA\n";
|
|
bool stopSearching = false;
|
|
|
|
CNNNetDFS(outLayer, [this, &outPort, portId, &stopSearching, &initOutput](CNNLayerPtr layer) {
|
|
auto irLayerAvatar = std::find_if(
|
|
graphCompiler.dnnComponents.components.begin(),
|
|
graphCompiler.dnnComponents.components.end(),
|
|
[&layer](const backend::DnnComponents::storage_type::value_type & value) {
|
|
return value.name == layer->name;
|
|
});
|
|
|
|
gnalog() << "[UFS] from : "<< outPort.first <<" reached: " << layer->name << "\n";
|
|
|
|
// probing gna_primitives
|
|
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
|
|
initOutput(portId, irLayerAvatar->dnnComponent, layer);
|
|
stopSearching = true;
|
|
}
|
|
|
|
// probing concatInfo
|
|
if (!stopSearching && LayerInfo(layer).isConcat()) {
|
|
auto concatConnection = graphCompiler.concat_connection.find(layer->name);
|
|
if (concatConnection != graphCompiler.concat_connection.end()) {
|
|
//initOutput(portId, irLayerAvatar->second, layer);
|
|
|
|
auto &desc = outputsDesc[portId];
|
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
|
|
|
desc.ptrs.resize(gnaFlags->gna_lib_async_threads_num);
|
|
// TODO: what is orientation for concat
|
|
desc.orientation = kDnnInterleavedOrientation;
|
|
desc.num_bytes_per_element = layer->outData.front()->getPrecision().size();
|
|
desc.scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
|
desc.num_elements = concatConnection->second.reserved_size / desc.num_bytes_per_element;
|
|
|
|
// binding ptr for first infer request - then others will be setup during relocation
|
|
gnamem->bind_ptr(&desc.ptrs.front(), &concatConnection->second.gna_ptr);
|
|
stopSearching = true;
|
|
}
|
|
}
|
|
}, true, [&stopSearching](InferenceEngine::CNNLayer* from) {
|
|
return make_upstream_order(!stopSearching ? from : nullptr);
|
|
});
|
|
if (!stopSearching) {
|
|
THROW_GNA_EXCEPTION << "unsupported topology: cannot locate " << outPort.first
|
|
<< " after compiling GNA graph";
|
|
}
|
|
portId++;
|
|
}
|
|
|
|
// TODO: how active list will work in multioutput case
|
|
// make room for active list
|
|
gnamem->reserve_ptr(nullptr,
|
|
ALIGN64(outputsDesc.front().num_bytes_per_element * outputsDesc.front().num_elements), 64);
|
|
|
|
void *pParallelExecutionData = nullptr;
|
|
|
|
// reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest
|
|
rwSegmentSize = gnamem->getRWBytes();
|
|
if (gnaFlags->gna_lib_async_threads_num > 1) {
|
|
gnamem->reserve_ptr(&pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->gna_lib_async_threads_num - 1), 64);
|
|
}
|
|
|
|
gnamem->commit();
|
|
|
|
dnn->Init(gnamem->getBasePtr(),
|
|
gnamem->getTotalBytes(),
|
|
gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt,
|
|
1);
|
|
|
|
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
|
|
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
|
|
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
|
|
|
|
// in fp32 mode last PWL cannot be computed without that
|
|
dnn->InitActiveList(NULL);
|
|
|
|
#if GNA_LIB_VER == 2
|
|
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>()));
|
|
#else
|
|
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
|
|
#endif
|
|
if (!gnaFlags->sw_fp32) {
|
|
// number of layer gets calculated inside that InitGNAStruct function
|
|
#if GNA_LIB_VER == 2
|
|
dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj);
|
|
#else
|
|
dnn->InitGNAStruct(&std::get<0>(nnets.front())->obj);
|
|
#endif
|
|
}
|
|
|
|
// creating same gna RW segment for parallel infer requests
|
|
for (int i = 1; i != gnaFlags->gna_lib_async_threads_num; i++) {
|
|
#if GNA_LIB_VER == 2
|
|
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>()));
|
|
// this can be improved by just copy all structures, but we are too lazy
|
|
dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj);
|
|
#else
|
|
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
|
|
dnn->InitGNAStruct(&std::get<0>(nnets.back())->obj);
|
|
#endif
|
|
// relocate rw pointers to new offset
|
|
auto basePtr = reinterpret_cast<uint8_t*>(pParallelExecutionData) + rwSegmentSize * (i - 1);
|
|
|
|
auto relocate = [basePtr, this](void *& ptr_out, void * ptr_in) {
|
|
if (ptr_in == nullptr) {
|
|
ptr_out = nullptr;
|
|
} else {
|
|
auto offset = reinterpret_cast<uint8_t *>(ptr_in) - reinterpret_cast<uint8_t *>(gnamem->getBasePtr());
|
|
ptr_out = basePtr + offset;
|
|
}
|
|
};
|
|
|
|
for (auto &&input : inputsDesc->ptr_inputs_global_storage) {
|
|
relocate(input[i], input[0]);
|
|
}
|
|
|
|
// relocating all output pointers
|
|
for (int j = 0; j < outputsDesc.size(); ++j) {
|
|
relocate(outputsDesc[j].ptrs[i], outputsDesc[j].ptrs[0]);
|
|
}
|
|
|
|
#if GNA_LIB_VER == 2
|
|
for (int j = 0; j != std::get<0>(gnaModels.front())->obj.NumberOfOperations; j++) {
|
|
auto & gnaOperation = std::get<0>(gnaModels[i])->obj.Operations[j];
|
|
relocate(const_cast<Gna2Tensor*>(gnaOperation.Operands[0])->Data, gnaOperation.Operands[0]->Data);
|
|
relocate(const_cast<Gna2Tensor*>(gnaOperation.Operands[1])->Data, gnaOperation.Operands[1]->Data);
|
|
#else
|
|
for (int j = 0; j != std::get<0>(nnets.front())->obj.nLayers; j++) {
|
|
auto & layer = std::get<0>(nnets[i])->obj.pLayers[j];
|
|
relocate(layer.pInputs, layer.pInputs);
|
|
relocate(layer.pOutputs, layer.pOutputs);
|
|
relocate(layer.pOutputsIntermediate, layer.pOutputsIntermediate);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
// calculating input orientation without memory layers, since their orientation not changed during infer right now
|
|
std::unordered_map<string, std::vector<string>> skippedLayers;
|
|
|
|
bool withConv = false;
|
|
for (auto &layer : sortedNet) {
|
|
auto layerInfo = LayerInfo(layer);
|
|
if (layerInfo.isConvolution()) {
|
|
withConv = true;
|
|
break;
|
|
}
|
|
}
|
|
if (withConv) {
|
|
for (auto &inputLayer : sortedNet) {
|
|
if (!LayerInfo(inputLayer).isInput()) {
|
|
continue;
|
|
}
|
|
auto doesntHaveGnaMapping = [this] (CNNLayerPtr l) {
|
|
auto dnnLayer = graphCompiler.dnnComponents.findComponent(l);
|
|
return dnnLayer == nullptr;
|
|
};
|
|
|
|
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(inputLayer, -1, doesntHaveGnaMapping);
|
|
|
|
for (auto &nextLayer : nextLayers) {
|
|
auto dnnLayer = graphCompiler.dnnComponents.findComponent(nextLayer);
|
|
// non functional layer - skipped by gna
|
|
if (nullptr == dnnLayer) {
|
|
THROW_GNA_LAYER_EXCEPTION(inputLayer) << " gna mapped layer search connection failed";
|
|
}
|
|
// input orientation might be already initialized, thus verify that it matches
|
|
if (!inputsDesc->orientation_in.count(inputLayer->name)) {
|
|
inputsDesc->orientation_in[inputLayer->name] = dnnLayer->orientation_in;
|
|
} else {
|
|
if (inputsDesc->orientation_in[inputLayer->name] != dnnLayer->orientation_in) {
|
|
THROW_GNA_EXCEPTION << "orientation for input layer: " << inputLayer->name << "cannot be calculated";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
for (auto &inputLayer : inputLayers) {
|
|
inputsDesc->orientation_in[inputLayer->name] = kDnnInterleavedOrientation;
|
|
}
|
|
}
|
|
|
|
do_rotate_input = dnn->do_rotate_input;
|
|
num_rotate_rows = dnn->num_rotate_rows;
|
|
num_rotate_columns = dnn->num_rotate_columns;
|
|
|
|
DumpXNNToFile();
|
|
|
|
#ifdef PLOT
|
|
dnn->WriteGraphWizModel("gna-blob.dot");
|
|
#endif
|
|
#if GNA_LIB_VER == 2
|
|
createRequestConfigsForGnaModels();
|
|
#endif
|
|
}
|
|
|
|
#if GNA_LIB_VER == 2
|
|
void GNAPlugin::createRequestConfigsForGnaModels() {
|
|
if (!gnadevice) {
|
|
gnaRequestConfigToRequestIdMap.push_back(std::make_tuple(FAKE_REQUEST_CONFIG_ID, -1, InferenceEngine::BlobMap()));
|
|
return;
|
|
}
|
|
for (auto& model : gnaModels) {
|
|
const auto& gnaNnet = std::get<0>(model).get()->obj;
|
|
const auto modelId = gnadevice->createModel(gnaNnet);
|
|
const auto requestConfigId = gnadevice->createRequestConfig(modelId);
|
|
gnaRequestConfigToRequestIdMap.push_back(std::make_tuple(requestConfigId, -1, InferenceEngine::BlobMap()));
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
int GNAPlugin::GetDeviceVersionFromString(const std::string deviceString) {
|
|
constexpr uint32_t embeddedSuffix = 0xE;
|
|
if (deviceString.empty())
|
|
return 0x100 + embeddedSuffix;
|
|
if (deviceString.size() == 4 && deviceString.substr(0, 3) == "GNA") {
|
|
int version = deviceString[3] - '0';
|
|
if (version > 0) {
|
|
version <<= 8;
|
|
version += embeddedSuffix;
|
|
return version;
|
|
}
|
|
}
|
|
THROW_GNA_EXCEPTION << "Wrong GNA generation for embedded model dump: " << deviceString;
|
|
}
|
|
|
|
void GNAPlugin::DumpXNNToFile() const {
|
|
// TODO: output precision as well as pointer might be incorrect, LSTM for sure
|
|
// gna looks automatically set layer 0 as output and adjust it's pointer / precision/ size respectively
|
|
if (config.dumpXNNPath.empty()) {
|
|
return;
|
|
}
|
|
|
|
const auto versionInt = GetDeviceVersionFromString(config.dumpXNNGeneration);
|
|
|
|
if (!gnadevice) {
|
|
THROW_GNA_EXCEPTION << "Cannot generate XNNDump for float network";
|
|
}
|
|
std::ofstream dumpStream(config.dumpXNNPath, std::ios::out | std::ios::binary);
|
|
#if GNA_LIB_VER == 1
|
|
if (versionInt != 0x10E)
|
|
THROW_GNA_EXCEPTION << "Wrong GNA version for embedded model dump: " << config.dumpXNNGeneration;
|
|
auto dump = gnadevice->dumpXnn(&std::get<0>(nnets.front())->obj, ptr_active_indices, num_active_indices);
|
|
dump.header.rw_region_size = gnamem->getRWBytes();
|
|
dump.header.input_scaling_factor = inputsDesc->inputScaleFactors.front();
|
|
dump.header.output_scaling_factor = outputsDesc.front().scale_factor;
|
|
dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(intel_gna_model_header));
|
|
dumpStream.write(reinterpret_cast<char*>(dump.model.get()), dump.header.model_size);
|
|
#else
|
|
auto const modelId = gnadevice->createModel(std::get<0>(gnaModels.front())->obj);
|
|
if (versionInt == Gna2DeviceVersionEmbedded1_0) {
|
|
auto dump = gnadevice->dumpXnn(modelId);
|
|
dump.header.RwRegionSize = gnamem->getRWBytes();
|
|
dump.header.InputScalingFactor = inputsDesc->inputScaleFactors.front();
|
|
dump.header.OutputScalingFactor = outputsDesc.front().scale_factor;
|
|
dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(Gna2ModelSueCreekHeader));
|
|
dumpStream.write(reinterpret_cast<char*>(dump.model.get()), dump.header.ModelSize);
|
|
} else {
|
|
static_assert(sizeof(versionInt) >= sizeof(Gna2DeviceVersion), "");
|
|
gnadevice->dumpXnnForDeviceVersion(modelId, dumpStream,
|
|
*reinterpret_cast<const Gna2DeviceVersion*>(&versionInt));
|
|
}
|
|
gnadevice->releaseModel(modelId);
|
|
#endif
|
|
}
|
|
|
|
void RotateFeatures(uint8_t *ptr_feat,
|
|
size_t element_size,
|
|
uint32_t num_feature_vectors,
|
|
uint32_t num_feature_vector_elements,
|
|
uint32_t num_rotate_rows,
|
|
uint32_t num_rotate_columns) {
|
|
if (num_feature_vector_elements == num_rotate_rows * num_rotate_columns) {
|
|
std::vector<uint8_t> temp(num_feature_vector_elements * element_size);
|
|
for (uint32_t k = 0; k < num_feature_vectors; k++) {
|
|
uint8_t *ptr_in = ptr_feat + k * num_feature_vector_elements * element_size;
|
|
for (uint32_t i = 0; i < num_rotate_rows; i++) {
|
|
for (uint32_t j = 0; j < num_rotate_columns; j++) {
|
|
ie_memcpy(&temp.front() + (j * num_rotate_rows + i)*element_size,
|
|
temp.size() - (i * num_rotate_columns + j)*element_size,
|
|
ptr_in + (i * num_rotate_columns + j)*element_size,
|
|
element_size);
|
|
}
|
|
}
|
|
ie_memcpy(ptr_in, num_feature_vector_elements * element_size,
|
|
&temp.front(), num_feature_vector_elements * element_size);
|
|
}
|
|
} else {
|
|
THROW_GNA_EXCEPTION << "Rotate dimensions (" << num_rotate_rows << "," << num_rotate_columns
|
|
<<") do not match buffer length of "<< num_feature_vector_elements <<" in RotateFeatures()!";
|
|
}
|
|
}
|
|
|
|
uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, InferenceEngine::BlobMap &result) {
|
|
#if GNA_LIB_VER == 2
|
|
auto& nnets = gnaRequestConfigToRequestIdMap;
|
|
#endif
|
|
auto freeNnet = std::find_if(std::begin(nnets), std::end(nnets), [](decltype(nnets.front()) & item) {
|
|
return std::get<1>(item) == -1;
|
|
});
|
|
|
|
if (freeNnet == nnets.end()) {
|
|
if (!graphCompiler.memory_connection.empty()) {
|
|
Wait(0);
|
|
freeNnet = nnets.begin();
|
|
} else {
|
|
THROW_IE_EXCEPTION << as_status << REQUEST_BUSY
|
|
<< "GNA executable network has max of "
|
|
<< static_cast<uint32_t >(gnaFlags->gna_lib_async_threads_num)
|
|
<< " parallel infer requests, please sync one of already running";
|
|
}
|
|
}
|
|
|
|
auto idx = static_cast<uint32_t>(std::distance(std::begin(nnets), freeNnet));
|
|
|
|
int inputNum = 0;
|
|
for (auto &input : inputs) {
|
|
auto inputLayout = input.second->getTensorDesc().getLayout();
|
|
if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
|
|
THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: "
|
|
<< input.second->getTensorDesc().getLayout();
|
|
}
|
|
if (inputLayout == NCHW) {
|
|
inputLayout = NC;
|
|
}
|
|
auto is2D = input.second->getTensorDesc().getLayout() == Layout::NC || input.second->getTensorDesc().getLayout() == Layout::CN;
|
|
|
|
if (!inputsDesc->ptr_inputs_global_id.count(input.first)) {
|
|
// should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
|
|
THROW_GNA_EXCEPTION << "network not loaded : input pointer for " << input.first << " not set";
|
|
}
|
|
|
|
if (inputsDesc->getPtrInputsGlobal(input.first)[idx] == nullptr) {
|
|
// should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
|
|
THROW_GNA_EXCEPTION << "network not loaded : input pointer for (" << input.first << " at inferRequest #"
|
|
<< idx << " not set";
|
|
}
|
|
|
|
if (inputsDesc->getOrientation(input.first) == kDnnUnknownOrientation) {
|
|
// should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
|
|
THROW_GNA_EXCEPTION << "network not loaded : input orientation for " << input.first << " not set";
|
|
}
|
|
|
|
for (auto& outputDesc : outputsDesc) {
|
|
if (outputDesc.orientation == kDnnUnknownOrientation) {
|
|
// should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
|
|
THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
|
|
}
|
|
}
|
|
|
|
auto dims = input.second->getTensorDesc().getDims();
|
|
|
|
auto importedElements = is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3];
|
|
auto importedFrames = dims[0];
|
|
auto targetGroups = is2D ? dims[dims.size() - 2] : dims[0]; // TODO: no proper support for groups yet
|
|
|
|
auto importedElementSizeBytes = gnaFlags->sw_fp32 ? 4 : 2;
|
|
auto importedBytes = importedElements * importedFrames * importedElementSizeBytes;
|
|
|
|
if (inputsDesc->bytes_allocated_for_input[input.first] < importedBytes) {
|
|
THROW_GNA_EXCEPTION << "Cannot import input frames for :" << input.first
|
|
<< ", allocated size: " << inputsDesc->bytes_allocated_for_input[input.first]
|
|
<< ", but input blob size: " << importedBytes;
|
|
}
|
|
|
|
ImportFrames(inputsDesc->getPtrInputsGlobal(input.first)[idx],
|
|
input.second->cbuffer().as<float *>(),
|
|
input.second->getTensorDesc().getPrecision(),
|
|
gnaFlags->sw_fp32 ? 1.0f : inputsDesc->getScaleFactor(inputNum),
|
|
inputsDesc->getOrientation(input.first),
|
|
importedFrames,
|
|
targetGroups,
|
|
importedElements,
|
|
importedElements);
|
|
|
|
bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
|
|
if (do_rotate_input && ((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
|
|
!= (inputsDesc->getOrientation(input.first) == kDnnInterleavedOrientation))
|
|
&& !isOneChannel) {
|
|
RotateFeatures(reinterpret_cast<uint8_t *>(inputsDesc->getPtrInputsGlobal(input.first)[idx]),
|
|
gnadevice ? 2 : 4,
|
|
// TODO: only works for cnn4a and google command so far
|
|
dims[0],
|
|
is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 3], // num_feature_vectors looks batch should be there
|
|
num_rotate_rows,
|
|
num_rotate_columns);
|
|
}
|
|
++inputNum;
|
|
}
|
|
|
|
if (!gnadevice) {
|
|
auto runtime = runtime::FP(dnn);
|
|
runtime.infer();
|
|
if (freeNnet != nnets.end()) {
|
|
std::get<1>(*freeNnet) = 1;
|
|
}
|
|
} else {
|
|
#if GNA_LIB_VER == 1
|
|
auto nnet = std::get<0>(*freeNnet).get();
|
|
std::get<1>(*freeNnet) = gnadevice->propagate(&nnet->obj, ptr_active_indices, num_active_indices, config.gna_proc_type);
|
|
#else
|
|
const auto reqConfigId = std::get<0>(*freeNnet);
|
|
if (ptr_active_indices != nullptr && num_active_indices > 0 && activeLayerIndex != 0xffffffff)
|
|
gnadevice->setUpActiveList(reqConfigId, activeLayerIndex, ptr_active_indices, num_active_indices);
|
|
std::get<1>(*freeNnet) = gnadevice->propagate(reqConfigId, config.pluginGna2AccMode);
|
|
#endif
|
|
}
|
|
|
|
#ifdef PLOT
|
|
dnn->BeginNewWrite(dnn_dump_write_index);
|
|
if (dnn->num_components() != 0) {
|
|
dnn->WriteDnnText("Net_.txt", kDnnFloat);
|
|
}
|
|
dnn_dump_write_index++;
|
|
#endif
|
|
if (freeNnet != nnets.end()) {
|
|
// TODO: GNA2: Substitute properly when using GNA 2.0 Library setting and CPU
|
|
std::get<2>(*freeNnet) = result;
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
bool GNAPlugin::Wait(uint32_t request_idx) {
|
|
return WaitFor(request_idx, MAX_TIMEOUT);
|
|
}
|
|
|
|
bool GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
|
|
#if GNA_LIB_VER == 2
|
|
auto& nnets = gnaRequestConfigToRequestIdMap;
|
|
#endif
|
|
if (nnets.size() <= request_idx) return true; // TODO: GNA2: check whether necessary
|
|
// already synced TODO: might be copy required ???
|
|
if (std::get<1>(nnets[request_idx]) == -1) return true;
|
|
|
|
if (gnadevice) {
|
|
if (!gnadevice->wait(std::get<1>(nnets[request_idx]), millisTimeout)) {
|
|
std::get<1>(nnets[request_idx]) = -1;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
std::get<1>(nnets[request_idx]) = -1;
|
|
auto &request = std::get<2>(nnets[request_idx]);
|
|
#ifdef PLOT
|
|
if (dnn->num_components() != 0) {
|
|
dnn->WriteInputAndOutputText();
|
|
}
|
|
#if GNA_LIB_VER == 1
|
|
dnn->WriteInputAndOutputTextGNA(&std::get<0>(nnets[request_idx])->obj);
|
|
#else
|
|
dnn->WriteInputAndOutputTextGNA(std::get<0>(gnaModels[request_idx])->obj);
|
|
#endif
|
|
#endif
|
|
int output_idx = 0;
|
|
for (auto && outputBlobIt : request) {
|
|
auto & outputBlob = outputBlobIt.second;
|
|
auto & outputDesc = outputsDesc[output_idx];
|
|
if (outputBlob->getTensorDesc().getLayout() == Layout::NC || outputBlob->getTensorDesc().getLayout() == Layout::CN
|
|
|| outputBlob->getTensorDesc().getLayout() == Layout::NCHW || outputBlob->getTensorDesc().getLayout() == Layout::NHWC) {
|
|
// TODO: rotate can be incorporated with exporting - used only in unit tests so far
|
|
// TODO: restore:
|
|
// if (orientation_out != kDnnInterleavedOrientation) {
|
|
// if (inputs.size() != 1) {
|
|
// THROW_GNA_EXCEPTION << "Invalid number of inputs for for deinterleave " << inputs.size()
|
|
// << ", only 1 supported";
|
|
// }
|
|
// auto dims = inputs.begin()->second->dims();
|
|
// RotateFeatures(reinterpret_cast<uint8_t*>(ptr_outputs_global),
|
|
// gnadevice ? 2 : 4,
|
|
// dims[dims.size() - 1],
|
|
// dims[0], // num_feature_vectors looks batch should be there
|
|
// dims[0],
|
|
// dims[dims.size() - 1]);
|
|
// }
|
|
auto is2D = outputBlob->getTensorDesc().getLayout() == Layout::NC || outputBlob->getTensorDesc().getLayout() == Layout::CN;
|
|
auto& exportOutputDims = outputBlob->getTensorDesc().getDims();
|
|
auto batchSize = exportOutputDims[0];
|
|
auto elementsPerBatch = is2D ? exportOutputDims[exportOutputDims.size() - 1]
|
|
: exportOutputDims[exportOutputDims.size() - 1]
|
|
* exportOutputDims[exportOutputDims.size() - 2]
|
|
* exportOutputDims[exportOutputDims.size() - 3];
|
|
|
|
ExportScores(outputBlob->buffer(),
|
|
outputDesc.ptrs[request_idx],
|
|
outputDesc.orientation,
|
|
batchSize,
|
|
batchSize,
|
|
elementsPerBatch,
|
|
elementsPerBatch,
|
|
elementsPerBatch,
|
|
outputDesc.num_bytes_per_element,
|
|
sizeof(float));
|
|
|
|
if (gnadevice) {
|
|
#ifdef PLOT
|
|
FILE* f = nullptr;
|
|
static int num_infers = 0;
|
|
{
|
|
f = fopen("ex_scores.txt", "w");
|
|
}
|
|
num_infers++;
|
|
if (f) {
|
|
auto dims = outputBlob->getTensorDesc().getDims();
|
|
for (int i = 0; i < dims[dims.size() - 2]; i++) {
|
|
for (int j = 0; j < dims[dims.size() - 1]; j++) {
|
|
fprintf(f, "%d ", outputBlob->cbuffer().as<int32_t*>()[dims[dims.size() - 1] * i + j]);
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
fprintf(f, "\n\n");
|
|
}
|
|
#endif
|
|
ConvertToFloat(outputBlob->buffer(),
|
|
outputBlob->buffer(),
|
|
elementsPerBatch,
|
|
batchSize,
|
|
outputDesc.scale_factor);
|
|
#ifdef PLOT
|
|
if (f) {
|
|
auto dims = outputBlob->getTensorDesc().getDims();
|
|
for (int i = 0; i < dims[dims.size() - 2]; i++) {
|
|
for (int j = 0; j < dims[dims.size() - 1]; j++) {
|
|
fprintf(f, "%.2f ", outputBlob->cbuffer().as<float*>()[dims[dims.size() - 1] * i + j]);
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
fclose(f);
|
|
}
|
|
#endif
|
|
}
|
|
} else {
|
|
THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC, Layout::CN, Layout::NCHW or Layout::NHWC. But was "
|
|
<< outputBlob->getTensorDesc().getLayout();
|
|
}
|
|
|
|
output_idx++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void GNAPlugin::Reset() {
|
|
graphCompiler.Reset();
|
|
}
|
|
|
|
bool GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
|
|
BlobMap bmInput;
|
|
BlobMap bmOutput;
|
|
if (inputsDataMap.size() != 1) {
|
|
THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << inputsDataMap.size() << " inputs";
|
|
}
|
|
|
|
IE_ASSERT(!inputsDataMap.empty());
|
|
bmInput[inputsDataMap.begin()->first] = std::shared_ptr<Blob>(const_cast<Blob*>(&input), [](Blob*){});
|
|
IE_ASSERT(!outputsDataMap.empty());
|
|
bmOutput[outputsDataMap.begin()->first] = std::shared_ptr<Blob>(&output, [](Blob*){});
|
|
return Infer(bmInput, bmOutput);
|
|
}
|
|
|
|
bool GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
|
|
return Wait(QueueInference(input, result));
|
|
}
|
|
|
|
Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) {
|
|
// need to have intermediate blob for interleave conversion
|
|
InferenceEngine::Blob::Ptr outputBlob;
|
|
auto outputDims = outputsDataMap[name]->getTensorDesc().getDims();
|
|
outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, outputDims.size() == 2 ? NC : NCHW));
|
|
outputBlob->allocate();
|
|
return outputBlob;
|
|
}
|
|
|
|
Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Precision precision) {
|
|
InferenceEngine::Blob::Ptr inputBlob;
|
|
// need to have intermediate blob for interleave conversion
|
|
// TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
|
|
auto inputDims = inputsDataMap[name]->getTensorDesc().getDims();
|
|
inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, inputDims.size() == 2 ? NC : NCHW));
|
|
inputBlob->allocate();
|
|
return inputBlob;
|
|
}
|
|
|
|
std::vector<InferenceEngine::MemoryStateInternal::Ptr> GNAPlugin::QueryState() {
|
|
if (memoryStates.size() != graphCompiler.memory_connection.size()) {
|
|
memoryStates.clear();
|
|
for (auto& connection : graphCompiler.memory_connection) {
|
|
auto state = std::make_shared<memory::GNAMemoryState>(connection.first, std::make_shared <GNAMemoryLayer>(connection.second));
|
|
memoryStates.emplace_back(state);
|
|
}
|
|
}
|
|
return memoryStates;
|
|
}
|
|
|
|
std::string GNAPlugin::GetName() const noexcept {
|
|
return _pluginName;
|
|
}
|
|
|
|
void GNAPlugin::SetName(const std::string & pluginName) noexcept {
|
|
_pluginName = pluginName;
|
|
}
|
|
|
|
InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(std::istream& networkModel) {
|
|
auto header = GNAModelSerial::ReadHeader(networkModel);
|
|
|
|
InitGNADevice();
|
|
|
|
graphCompiler.setGNAMemoryPtr(gnamem);
|
|
void *basePtr = nullptr;
|
|
gnamem->reserve_ptr(&basePtr, header.gnaMemSize);
|
|
gnamem->commit();
|
|
#if GNA_LIB_VER == 2
|
|
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>(header.layersCount)));
|
|
#else
|
|
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(header.layersCount), -1, InferenceEngine::BlobMap());
|
|
std::get<0>(nnets.back())->obj.nGroup = header.nGroup;
|
|
#endif
|
|
GNAModelSerial::MemoryType mt;
|
|
#if GNA_LIB_VER == 2
|
|
auto serial = GNAModelSerial(&std::get<0>(gnaModels.back())->obj, mt);
|
|
#else
|
|
auto serial = GNAModelSerial(&std::get<0>(nnets.back())->obj, mt);
|
|
#endif
|
|
|
|
serial.setHeader(header);
|
|
serial.Import(basePtr,
|
|
header.gnaMemSize,
|
|
networkModel,
|
|
inputsDesc,
|
|
outputsDesc,
|
|
inputsDataMap,
|
|
outputsDataMap);
|
|
|
|
#if GNA_LIB_VER == 2
|
|
auto getOrientation = [](Gna2Operation & gnaOperation) {
|
|
return gnaOperation.Type == Gna2OperationTypeConvolution ?
|
|
kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
|
|
};
|
|
#else
|
|
auto getOrientation = [](intel_nnet_layer_t & layer) {
|
|
return layer.nLayerKind == INTEL_CONVOLUTIONAL ?
|
|
kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
|
|
};
|
|
#endif
|
|
|
|
#if GNA_LIB_VER == 1
|
|
inputsDesc->orientation_in["input"] = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
|
|
outputsDesc[0].orientation = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers - 1]);
|
|
#endif
|
|
|
|
do_rotate_input = header.doRotateInput;
|
|
num_rotate_rows = header.nRotateRows;
|
|
num_rotate_columns = header.nRotateColumns;
|
|
|
|
for (auto && memory : mt) {
|
|
GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
|
|
memoryLayer.gna_ptr = memory.first;
|
|
memoryLayer.reserved_size = memory.second;
|
|
|
|
graphCompiler.memory_connection.emplace_back(make_pair(std::string("noname"), memoryLayer));
|
|
}
|
|
|
|
DumpXNNToFile();
|
|
|
|
#ifdef PLOT
|
|
dnn->WriteGraphWizModel("gna-blob-imported.dot");
|
|
#endif
|
|
#if GNA_LIB_VER == 2
|
|
createRequestConfigsForGnaModels();
|
|
#endif
|
|
return nullptr;
|
|
}
|
|
|
|
void GNAPlugin::Export(const std::string &fileName) {
|
|
if (inputsDesc->ptr_inputs_global_id.empty() || outputsDesc.empty()) {
|
|
THROW_GNA_EXCEPTION << " network not loaded";
|
|
}
|
|
|
|
#if GNA_LIB_VER == 1
|
|
if (inputsDesc->ptr_inputs_global_id.size() != 1) {
|
|
THROW_GNA_EXCEPTION << " exporting network with multiple inputs not supported";
|
|
}
|
|
#endif
|
|
|
|
std::fstream outStream(fileName, ios_base::out | ios_base::binary);
|
|
|
|
// TODO: nnet group parameter looks only used in application - so can we move this line into load network.
|
|
IE_ASSERT(!inputsDataMap.empty());
|
|
auto inputDims = inputsDataMap.begin()->second->getTensorDesc().getDims();
|
|
if (inputDims.size() == 2) {
|
|
#if GNA_LIB_VER == 1
|
|
std::get<0>(nnets.front())->obj.nGroup = inputDims[0];
|
|
#endif
|
|
}
|
|
#if GNA_LIB_VER == 2
|
|
Gna2Model* modelToSerial = &std::get<0>(gnaModels.front())->obj;
|
|
#else
|
|
intel_nnet_type_t* modelToSerial = &std::get<0>(nnets.front())->obj;
|
|
#endif
|
|
auto serial = GNAModelSerial(modelToSerial,
|
|
inputsDesc,
|
|
outputsDesc,
|
|
inputsDataMap,
|
|
outputsDataMap)
|
|
.SetInputRotation(dnn->num_rotate_rows, dnn->num_rotate_columns, dnn->do_rotate_input);
|
|
|
|
for (auto && memoryConnection : graphCompiler.memory_connection) {
|
|
serial.AddState(memoryConnection.second.gna_ptr, memoryConnection.second.reserved_size);
|
|
}
|
|
|
|
serial.Export(gnamem->getBasePtr(), gnamem->getTotalBytes(), outStream);
|
|
}
|
|
|
|
void GNAPlugin::GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) {
|
|
if (gnaFlags->performance_counting) {
|
|
gnadevice->getGnaPerfCounters(perfMap);
|
|
}
|
|
}
|
|
|
|
void GNAPlugin::AddExtension(InferenceEngine::IExtensionPtr extension) {}
|
|
|
|
void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config_map) {
|
|
config.UpdateFromMap(config_map);
|
|
UpdateFieldsFromConfig();
|
|
}
|
|
|
|
void GNAPlugin::UpdateFieldsFromConfig() {
|
|
inputsDesc->inputScaleFactors = config.inputScaleFactors;
|
|
*gnaFlags = config.gnaFlags;
|
|
}
|
|
|
|
void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
|
|
const std::map<std::string, std::string>& config,
|
|
InferenceEngine::QueryNetworkResult& res) const {
|
|
if (network.getFunction()) {
|
|
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << " ngraph::Function is not supported natively";
|
|
}
|
|
|
|
std::unordered_set<CNNLayer *> allLayers;
|
|
InferenceEngine::InputsDataMap inputs;
|
|
|
|
network.getInputsInfo(inputs);
|
|
std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
|
|
|
|
if (inputs.empty()) {
|
|
THROW_GNA_EXCEPTION << "Network is empty (GNA)\n";
|
|
}
|
|
|
|
auto const & secondLayers = getInputTo(inputs.begin()->second->getInputData());
|
|
if (secondLayers.empty()) {
|
|
THROW_GNA_EXCEPTION << "Network consists of input layer only (GNA)\n";
|
|
}
|
|
|
|
InferenceEngine::details::UnorderedDFS(allLayers,
|
|
secondLayers.begin()->second,
|
|
[&](CNNLayerPtr const& layer) {
|
|
if (LayerTypeFromStr(layer->type) != LayerType::NO_TYPE) {
|
|
res.supportedLayersMap.insert({ layer->name, GetName() });
|
|
}
|
|
}, false);
|
|
}
|