[CPU] A new transformation that adds a convert layer if there is no reorders that support the data type conversion. (#3498)

This commit is contained in:
Maksim Kutakov
2021-02-08 11:58:48 +03:00
committed by GitHub
parent db065d525e
commit 7387642a98
18 changed files with 681 additions and 198 deletions

View File

@@ -46,11 +46,11 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_scatter_update_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_interpolate_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reduce_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_convert_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder_seq_len.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp

View File

@@ -43,6 +43,7 @@ void BF16Transformer::convertToFloat(InferenceEngine::CNNNetwork &network) {
for (size_t o = 0; o < iter->outData.size(); o++) {
if (inputs.find(iter->outData[o]->getName()) == inputs.end()
&& outputs.find(iter->outData[o]->getName()) == outputs.end()
&& !CaselessEq<std::string>()(iter->type, "const")
&& iter->outData[o]->getPrecision() == Precision::BF16) {
iter->outData[o]->setPrecision(Precision::FP32);
}

View File

@@ -23,6 +23,7 @@
#include "mkldnn_infer_request.h"
#include <nodes/mkldnn_input_node.h>
#include <nodes/mkldnn_reorder_node.h>
#include <nodes/mkldnn_convert_node.h>
#include <legacy/graph_tools.hpp>
#include <ie_algorithm.hpp>
@@ -457,6 +458,21 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
}
}
static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& childDesc, const mkldnn::engine& eng) {
memory::desc dstMemDesc = MKLDNNMemoryDesc(childDesc);
memory::desc srcMemDesc = MKLDNNMemoryDesc(parentDesc);
mkldnn::primitive_attr attr;
dnnl_primitive_desc_t result = nullptr;
auto status = dnnl_reorder_primitive_desc_create(&result, &srcMemDesc.data, eng.get(), &dstMemDesc.data, eng.get(),
attr.get());
if (result) {
mkldnn_primitive_desc_destroy(result);
}
return mkldnn_success == status;
}
void MKLDNNGraph::InitEdges() {
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
@@ -470,18 +486,42 @@ void MKLDNNGraph::InitEdges() {
for (auto i = 0; i < numberOfEdges; i++) {
if (graphEdges[i]->needReorder()) {
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
auto &edge = graphEdges[i];
std::string basicLayerName = edge->getParent()->getName() + "_" +
MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
edge->getChild()->getName();
std::string layerName = basicLayerName;
int idx = 0;
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
idx++;
layerName = basicLayerName + "_" + std::to_string(idx);
auto edge = graphEdges[i];
bool insertReorder = true;
// Check if there is a reorder that supports the type conversion
if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
!isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
//If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
std::string convertName = edge->getParent()->getName() + "_" +
edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
InsertNode(edge, convertNode, true);
//Check if reorder is still needed
if (convertNode->getChildEdgeAt(0)->needReorder()) {
edge = convertNode->getChildEdgeAt(0);
} else {
insertReorder = false;
}
}
if (insertReorder) {
std::string basicLayerName = edge->getParent()->getName() + "_" +
MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
edge->getChild()->getName();
std::string layerName = basicLayerName;
int idx = 0;
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
idx++;
layerName = basicLayerName + "_" + std::to_string(idx);
}
uniqueLayerNames.insert(layerName);
InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
}
uniqueLayerNames.insert(layerName);
InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
graphEdges.erase(graphEdges.begin() + i);
i--;
numberOfEdges--;
@@ -1095,44 +1135,17 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
}
reorderPtr->setDescs(inDesc, outDesc);
reorderPtr->_scales = scales;
auto oIndex = edge->getOutputNum();
auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
<< edge->getParent()->getName() << " and "
<< edge->getChild()->getName() << ".";
edge->drop();
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
// Add edge for beforeNode
beforeNode->getChild()->parentEdges.push_back(beforeNode);
edge->getParent()->childEdges.push_back(beforeNode);
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
edge->getChild()->parentEdges.push_back(afterNode);
reorderPtr->setOptimized(isOptimized);
newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
newReorder->selectOptimalPrimitiveDescriptor();
graphEdges.push_back(beforeNode);
graphEdges.push_back(afterNode);
InsertNode(edge, newReorder, true);
// Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
// Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
if (!isOptimized) {
beforeNode->getDesc();
afterNode->getDesc();
newReorder->getParentEdgeAt(0)->getDesc();
newReorder->getChildEdgeAt(0)->getDesc();
}
graphNodes.push_back(newReorder);
return newReorder;
}
@@ -1235,3 +1248,42 @@ void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
return dump_graph_as_ie_ngraph_net(*this);
}
bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode) {
auto oIndex = edge->getOutputNum();
auto iIndex = edge->getInputNum();
if (iIndex < 0 || oIndex < 0)
THROW_IE_EXCEPTION << "Cannot insert node '" << node->getName() << "' between nodes: "
<< edge->getParent()->getName() << " and "
<< edge->getChild()->getName() << ".";
edge->drop();
return InsertNode(edge->getParent(), edge->getChild(), node, iIndex, oIndex, initNode);
}
bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode) {
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(parent, node, parentPort, 0));
MKLDNNEdgePtr afterNode(new MKLDNNEdge(node, child, 0, childPort));
// Add edge for beforeNode
beforeNode->getChild()->parentEdges.push_back(beforeNode);
parent->childEdges.push_back(beforeNode);
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
child->parentEdges.push_back(afterNode);
if (initNode) {
node->getSupportedDescriptors();
node->initSupportedPrimitiveDescriptors();
node->filterSupportedPrimitiveDescriptors();
node->selectOptimalPrimitiveDescriptor();
node->initOptimalPrimitiveDescriptor();
}
graphEdges.push_back(beforeNode);
graphEdges.push_back(afterNode);
graphNodes.push_back(node);
return true;
}

View File

@@ -115,6 +115,41 @@ public:
MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc,
const InferenceEngine::TensorDesc& outDesc, bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
/**
* @brief Insert MKLDNNNode at the edge-specified location.
* This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
* supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
* node without initialization.
* @param edge
* pointer to the edge in the graph where the node will be inserted
* @param node
* pointer to the inserted node
* @param initNode
* parameter that determines whether the node needs to be initialized
* @return true in case of success, false otherwise.
*/
bool InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode = false);
/**
* @brief Insert MKLDNNNode between two specified nodes.
* This procedure creates two edges that link the parent and child nodes to the inserted one and adds all created objects to the graph.
* This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
* supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
* node without initialization.
* @param parent
* pointer to the parent node
* @param child
* pointer to the child node
* @param parentPort
* port number of the parent node to which the inserted node should be connected
* @param childPort
* port number of the child node to which the inserted node should be connected
* @param initNode
* parameter that determines whether the node needs to be initialized
* @return true in case of success, false otherwise.
*/
bool InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode = false);
InferenceEngine::CNNNetwork dump() const;
template<typename NET>

View File

@@ -55,9 +55,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
MergeTwoEqualScaleShifts(graph);
graph.RemoveDroppedNodes();
MergeConversions(graph);
graph.RemoveDroppedNodes();
FuseBroadcastAndEltwise(graph);
graph.RemoveDroppedNodes();
@@ -154,51 +151,6 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
graph.RemoveDroppedEdges();
}
void MKLDNNGraphOptimizer::MergeConversions(MKLDNNGraph& graph) {
for (auto node : graph.GetNodes()) {
// Input with at least 2 Convertions
if (!IsOneOf(node->getType(), { Input }) || node->getChildEdges().size() < 2 ||
!IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), { Convert })) {
continue;
}
auto& input = node;
// Convertions of same the type with Concat as a child
for (size_t i = 0; i < input->getChildEdges().size(); i++) {
auto convInEdge = input->getChildEdgeAt(i);
auto conv = convInEdge->getChild();
auto convOutEdge = conv->getChildEdgeAt(i);
auto convInDims = convInEdge->getDims();
auto convOutDims = convOutEdge->getDims();
Precision convOutPrecision = conv->getCnnLayer()->precision;
for (size_t j = i + 1; j < input->getChildEdges().size();) {
auto childEdge = input->getChildEdgeAt(j);
auto child = childEdge->getChild();
if (child->getCnnLayer()->precision != convOutPrecision ||
child->getChildEdgeAt(0)->getDims() != convOutDims ||
childEdge->getDims() != convInDims ||
child->getChildEdges().size() != 1) {
j++;
continue;
}
auto childChildEdge = child->getChildEdgeAt(0);
auto childChild = childChildEdge->getChild();
int idxChild = childChildEdge->getOutputNum();
child->remove();
graph.DropNode(child);
MKLDNNEdgePtr newEdge(new MKLDNNEdge(conv, childChild, 0, idxChild));
graph.GetEdges().push_back(newEdge);
conv->addEdge(newEdge);
}
}
}
}
void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();
@@ -1844,6 +1796,10 @@ void MKLDNNGraphOptimizer::ChangeConvertToReorder(MKLDNNGraph& graph) {
if (!InferenceEngine::details::CaselessEq<std::string>()(nodeType, "convert")) {
continue;
}
if (convertCandidate->getCnnLayer()->insData.empty() ||
convertCandidate->getCnnLayer()->outData.empty()) {
continue;
}
auto inputPrecision = convertCandidate->getCnnLayer()->insData[0].lock()->getPrecision();
auto outputPrecision = convertCandidate->getCnnLayer()->outData[0]->getPrecision();
if (std::find(continuousPrecisions.begin(), continuousPrecisions.end(), inputPrecision) == continuousPrecisions.end() ||
@@ -2313,4 +2269,4 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
mergePermuteAndReorder(parentNode, childNode);
}
}
}
}

View File

@@ -19,7 +19,6 @@ public:
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
private:
void MergeConversions(MKLDNNGraph& graph);
void MergeGroupConvolution(MKLDNNGraph& graph);
void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
void FuseConvolutionAndActivation(MKLDNNGraph &graph);
@@ -41,6 +40,7 @@ private:
void DropDoubleReorders(MKLDNNGraph& graph);
void DropConvertReorder(MKLDNNGraph& graph);
void ChangeConvertToReorder(MKLDNNGraph &graph);
void AddConvertToReorder(MKLDNNGraph &graph);
void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
void FuseEltwiseAndSimple(MKLDNNGraph &graph);

View File

@@ -17,6 +17,7 @@
#include "mkldnn_memory.h"
#include "mkldnn_extension_utils.h"
#include "nodes/common/cpu_memcpy.h"
#include "nodes/common/cpu_convert.h"
#include "ie_mkldnn.h"
using namespace InferenceEngine;
@@ -88,10 +89,54 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo
}
}
void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) {
if (size != 0)
IE_ASSERT(size <= output.GetDescriptor().get_size());
if (input.GetDesc() == output.GetDesc()) {
auto srcPtr = static_cast<uint8_t*>(input.GetPtr());
auto dstPtr = static_cast<uint8_t*>(output.GetPtr());
auto copySize = size == 0 ? output.GetSize() : size;
cpu_memcpy(dstPtr, srcPtr, copySize);
} else {
std::unique_ptr<mkldnn::reorder> pReorder;
std::shared_ptr<memory> srcMemoryPtr;
std::vector<uint8_t> tmpBuff;
try {
pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive()));
srcMemoryPtr = input.prim;
}
catch (const mkldnn::error& err) {
if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType()) {
//we probably could not make the reorder because there is no one supporting this precision conversion
//lets try to convert data first using cpu_convert
auto data = static_cast<const uint8_t *>(input.GetPtr());
tmpBuff.resize(input.GetSize());
cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()),
MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount());
MKLDNNMemory tmpMem(output.eng);
tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetDesc().getFormat(), tmpBuff.data());
pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive()));
srcMemoryPtr = tmpMem.prim;
} else {
throw;
}
}
if (pReorder) {
mkldnn::stream loc_stream(output.eng, stream::flags::default_order);
pReorder->execute(loc_stream, *srcMemoryPtr, *output.prim);
} else {
THROW_IE_EXCEPTION << "Could not make mkldnn reorder.";
}
}
}
// TODO: It should be done via wrap into Memory;
void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format, const void* data, size_t size, bool ftz) const {
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType));
IE_ASSERT(!one_of(format, memory::format_tag::undef, memory::format_tag::any));
auto dst_desc = GetDescriptor();
@@ -99,25 +144,21 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format
IE_ASSERT(size <= dst_desc.get_size());
if (dst_desc != src_desc) {
auto memData = GetDescriptor().data;
memory::dims dims{memData.dims, memData.dims + memData.ndims};
MKLDNNMemory src(eng);
src.Create(dims, dataType, format, data);
std::shared_ptr<mkldnn::reorder> pReorder =
std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(src.GetPrimitive(), GetPrimitive()));
mkldnn::stream loc_stream(eng, stream::flags::default_flags);
pReorder->execute(loc_stream, *src.prim, *this->prim);
} else {
if (dst_desc == src_desc) {
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType));
uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
// We cannot support strides for i/o blobs because it affects performance.
dataPtr += itemSize * prim->get_desc().data.offset0;
cpu_memcpy(dataPtr, data, size);
}
} else {
auto memData = this->GetDescriptor().data;
memory::dims dims(memData.dims, memData.dims + memData.ndims);
MKLDNNMemory src(this->eng);
src.Create(dims, dataType, format, data);
reorderData(src, *this);
}
if (ftz
&& dataType == memory::data_type::f32
&& prim->get_desc().data.format_kind != dnnl_format_kind_wino
@@ -130,21 +171,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format
}
void MKLDNNMemory::SetData(const MKLDNNMemory& src, size_t size, bool ftz) const {
if (size != 0)
IE_ASSERT(size <= GetDescriptor().get_size());
// TODO: Optimization. Reorder perfect is not good enough, so in triviale cases we
// prefer use simple copy.
if (src.GetDesc() == this->GetDesc()) {
auto srcPtr = static_cast<uint8_t*>(src.GetPtr());
auto dstPtr = static_cast<uint8_t*>(this->GetPtr());
auto copySize = size == 0 ? this->GetSize() : size;
cpu_memcpy(dstPtr, srcPtr, copySize);
} else {
mkldnn::reorder reorderPrim(src.GetPrimitive(), GetPrimitive());
mkldnn::stream loc_stream(eng, stream::flags::default_order);
reorderPrim.execute(loc_stream, *src.prim, *this->prim);
}
reorderData(src, *this, size);
if (ftz
&& src.GetDataType() == memory::data_type::f32
@@ -840,5 +867,4 @@ bool MKLDNNMemoryDesc::blocksExtended() const {
}
return false;
}
} // namespace MKLDNNPlugin

View File

@@ -164,6 +164,8 @@ public:
static std::string formatToString(mkldnn::memory::format_tag fmt);
static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0);
private:
std::shared_ptr<mkldnn::memory> prim;
mkldnn::engine eng;

View File

@@ -168,7 +168,8 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
if (!(CaselessEq<std::string>()(layer->type, "memory") ||
CaselessEq<std::string>()(layer->type, "memoryinput") ||
CaselessEq<std::string>()(layer->type, "output") ||
CaselessEq<std::string>()(layer->type, "reorder"))) {
CaselessEq<std::string>()(layer->type, "reorder") ||
CaselessEq<std::string>()(layer->type, "convert"))) {
THROW_IE_EXCEPTION << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
}
}

View File

@@ -0,0 +1,119 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "tensor_desc_creator.h"
#include <numeric>
using namespace InferenceEngine;
using namespace MKLDNNPlugin;
namespace {
constexpr size_t channelsPos = 1lu;
class PlainFormatCreator : public TensorDescCreator {
public:
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
SizeVector order(srcDims.size());
std::iota(order.begin(), order.end(), 0);
return TensorDesc(precision, srcDims, {srcDims, order});
}
virtual size_t getMinimalRank() const { return 0lu; }
};
class PerChannelCreator : public TensorDescCreator {
public:
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const {
SizeVector order(srcDims.size());
std::iota(order.begin(), order.end(), 0);
SizeVector blkDims = srcDims;
if (srcDims.size() > 2) {
auto moveElementBack = [](SizeVector& vector, size_t indx) {
auto itr = vector.begin() + indx;
std::rotate(itr, itr + 1, vector.end());
};
moveElementBack(order, channelsPos);
moveElementBack(blkDims, channelsPos);
}
return TensorDesc(precision, srcDims, {blkDims, order});
}
virtual size_t getMinimalRank() const { return 3lu; }
};
class ChannelBlockedCreator : public TensorDescCreator {
public:
ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {}
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
if (srcDims.size() < 2) {
THROW_IE_EXCEPTION << "Can't create blocked tensor descriptor!";
}
SizeVector order(srcDims.size());
std::iota(order.begin(), order.end(), 0);
order.push_back(channelsPos);
SizeVector blkDims = srcDims;
blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0);
blkDims.push_back(_blockSize);
return TensorDesc(precision, srcDims, {blkDims, order});
}
virtual size_t getMinimalRank() const { return 3lu; }
private:
size_t _blockSize;
};
} // namespace
const TensorDescCreator::CreatorsMap& TensorDescCreator::getCommonCreators() {
static const CreatorsMap map{ { TensorDescCreatorTypes::nspc, CreatorConstPtr(new PerChannelCreator) },
{ TensorDescCreatorTypes::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) },
{ TensorDescCreatorTypes::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) },
{ TensorDescCreatorTypes::ncsp, CreatorConstPtr(new PlainFormatCreator) } };
return map;
}
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) {
auto rankFilter = [rank](const CreatorsMap::value_type& item) {
if (item.second->getMinimalRank() > rank) {
return false;
}
return true;
};
auto first = CreatorsMapFilterConstIterator(std::move(rankFilter), map.begin(), map.end());
auto last = first.end();
return std::make_pair(first, last);
}
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes) {
size_t bitMask = 0ul;
for (auto& item : supportedTypes) {
bitMask |= 1 << static_cast<unsigned>(item);
}
auto rankTypesFilter = [rank, bitMask](const CreatorsMap::value_type& item) {
if (!(bitMask & (1 << static_cast<unsigned>(item.first)))) {
return false;
}
if (item.second->getMinimalRank() > rank) {
return false;
}
return true;
};
auto first = CreatorsMapFilterConstIterator(std::move(rankTypesFilter), map.begin(), map.end());
auto last = first.end();
return std::make_pair(first, last);
}
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
TensorDescCreator::makeFilteredRange(const CreatorsMap &map, TensorDescCreator::Predicate predicate) {
auto first = CreatorsMapFilterConstIterator(std::move(predicate), map.begin(), map.end());
auto last = first.end();
return std::make_pair(first, last);
}

View File

@@ -0,0 +1,94 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_layouts.h>
namespace MKLDNNPlugin {
enum class TensorDescCreatorTypes : unsigned {
nspc, // general per channels format
ncsp, // general planar
nCsp8c, // general channels blocked by 8
nCsp16c // general channels blocked by 16
};
class CreatorsMapFilterConstIterator;
class TensorDescCreator {
public:
typedef std::shared_ptr<TensorDescCreator> CreatorPtr;
typedef std::shared_ptr<const TensorDescCreator> CreatorConstPtr;
typedef std::map<TensorDescCreatorTypes, CreatorConstPtr> CreatorsMap;
typedef std::function<bool(const CreatorsMap::value_type&)> Predicate;
public:
static const CreatorsMap& getCommonCreators();
static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
makeFilteredRange(const CreatorsMap &map, unsigned rank);
static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes);
static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
makeFilteredRange(const CreatorsMap& map, Predicate predicate);
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0;
virtual size_t getMinimalRank() const = 0;
virtual ~TensorDescCreator() = default;
};
class CreatorsMapFilterConstIterator {
public:
typedef TensorDescCreator::CreatorsMap::const_iterator Iterator;
typedef std::iterator_traits<Iterator>::value_type value_type;
typedef std::iterator_traits<Iterator>::reference reference;
typedef std::iterator_traits<Iterator>::pointer pointer;
typedef std::iterator_traits<Iterator>::difference_type difference_type;
typedef std::forward_iterator_tag iterator_category;
typedef std::function<bool(const value_type&)> predicate_type;
public:
CreatorsMapFilterConstIterator(predicate_type filter, Iterator begin, Iterator end) : _filter(std::move(filter)), _iter(begin), _end(end) {
while (_iter != _end && !_filter(*_iter)) {
++_iter;
}
}
CreatorsMapFilterConstIterator& operator++() {
do {
++_iter;
} while (_iter != _end && !_filter(*_iter));
return *this;
}
CreatorsMapFilterConstIterator end() const {
return CreatorsMapFilterConstIterator(predicate_type(), _end, _end);
}
CreatorsMapFilterConstIterator operator++(int) {
CreatorsMapFilterConstIterator temp(*this);
++*this;
return temp;
}
reference operator*() const {
return *_iter;
}
pointer operator->() const {
return std::addressof(*_iter);
}
friend bool operator==(const CreatorsMapFilterConstIterator& lhs, const CreatorsMapFilterConstIterator& rhs) {
return lhs._iter == rhs._iter;
}
friend bool operator!=(const CreatorsMapFilterConstIterator& lhs, const CreatorsMapFilterConstIterator& rhs) {
return !(lhs == rhs);
}
private:
Iterator _iter;
Iterator _end;
predicate_type _filter;
};
} // namespace MKLDNNPlugin

View File

@@ -1,72 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "base.hpp"
#include <string>
#include <vector>
#include "ie_precision.hpp"
#include "common/cpu_convert.h"
namespace InferenceEngine {
namespace Extensions {
namespace Cpu {
class ConvertImpl: public ExtLayerBase {
public:
explicit ConvertImpl(const CNNLayer* layer) {
try {
logPrefix = "Convert layer with name '" + layer->name + "' ";
if (layer->insData.size() != 1 || layer->outData.size() != 1)
THROW_IE_EXCEPTION << logPrefix << "has incorrect number of input/output edges";
precision = layer->GetParamAsString("precision");
LayerConfig config;
DataConfig dataIn;
const SizeVector& ins_dims = layer->insData[0].lock()->getTensorDesc().getDims();
dataIn.desc = TensorDesc(layer->insData[0].lock()->getTensorDesc().getPrecision(), ins_dims,
layer->insData[0].lock()->getTensorDesc().getLayout());
config.inConfs.push_back(dataIn);
DataConfig dataConfigOut;
const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
dataConfigOut.desc = TensorDesc(layer->outData[0]->getTensorDesc().getPrecision(), out_dims,
layer->outData[0]->getTensorDesc().getLayout());
config.outConfs.push_back(dataConfigOut);
config.dynBatchSupport = false;
confs.push_back(config);
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
}
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
try {
void *srcPtr = inputs[0]->cbuffer().as<void *>();
void *dstPtr = outputs[0]->buffer().as<void *>();
if (inputs[0]->size() != outputs[0]->size())
THROW_IE_EXCEPTION << logPrefix << "has input and output buffers with different sizes";
cpu_convert(srcPtr, dstPtr, inputs[0]->getTensorDesc().getPrecision(), outputs[0]->getTensorDesc().getPrecision(), outputs[0]->size());
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
if (resp)
errorMsg.copy(resp->msg, sizeof(resp->msg)-1);
return GENERAL_ERROR;
} catch(...) {
return GENERAL_ERROR;
}
return OK;
}
private:
std::string precision;
std::string logPrefix;
};
REG_FACTORY_FOR(ConvertImpl, Convert);
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine

View File

@@ -48,7 +48,6 @@ MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
MKLDNN_EXTENSION_NODE(FillImpl, Fill);
MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);

View File

@@ -0,0 +1,110 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <mkldnn_extension_utils.h>
#include "mkldnn_convert_node.h"
#include "common/cpu_convert.h"
#include "common/tensor_desc_creator.h"
#define THROW_ERROR THROW_IE_EXCEPTION << getTypeStr() << " layer with name '" << getName() <<"' ERROR: "
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(layer, eng, cache) {}
void MKLDNNConvertNode::getSupportedDescriptors() {
// if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
// from correspond tensor descriptors.
if (outDims.empty() && output && output->getLayout() != InferenceEngine::Layout::ANY)
outDims.push_back(MKLDNNDims(output->getDims()));
if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY)
inDims.push_back(MKLDNNDims(input->getDims()));
if (getParentEdges().size() != 1)
THROW_ERROR << "Incorrect number of input edges";
if (getChildEdges().empty())
THROW_ERROR << "Incorrect number of output edges";
}
void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
auto layer = getCnnLayer();
if (layer == nullptr) {
THROW_ERROR << "Cannot get CNN layer";
}
LayerConfig config;
DataConfig dataIn;
DataConfig dataConfigOut;
config.dynBatchSupport = false;
// if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so
// they should be used as the actual descriptors.
if (input && input->getLayout() != InferenceEngine::Layout::ANY && output && output->getLayout() != InferenceEngine::Layout::ANY) {
dataIn.desc = *input;
config.inConfs.push_back(dataIn);
const auto& blockingDesc = config.inConfs[0].desc.getBlockingDesc(); // inp/out layouts must be the same
dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc);
config.outConfs.push_back(dataConfigOut);
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
} else if (layer->insData.size() == 1 && layer->outData.size() == 1) {
auto insData = layer->insData[0].lock();
if (nullptr == insData) {
THROW_ERROR << "Input data is empty";
}
const SizeVector& insDims = insData->getTensorDesc().getDims();
auto insPrecision = insData->getTensorDesc().getPrecision();
const SizeVector& outputDims = layer->outData[0]->getTensorDesc().getDims();
auto outPrecision = layer->outData[0]->getTensorDesc().getPrecision();
config.inConfs.push_back(dataIn);
config.outConfs.push_back(dataConfigOut);
auto creators = TensorDescCreator::getCommonCreators();
auto range = TensorDescCreator::makeFilteredRange(creators, insDims.size());
for (auto itr = range.first; itr != range.second; ++itr) {
config.inConfs[0].desc = itr->second->createDesc(insPrecision, insDims);
config.outConfs[0].desc = itr->second->createDesc(outPrecision, outputDims);
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
}
} else {
THROW_ERROR << "Incorrect number of input/output edges";
}
}
void MKLDNNConvertNode::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_ERROR << "Destination memory didn't allocate.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
THROW_ERROR << "Input memory didn't allocate.";
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_ERROR << "Preferable primitive descriptor is not set.";
}
void MKLDNNConvertNode::execute(mkldnn::stream strm) {
auto& parentMem = getParentEdgeAt(0)->getMemory();
auto& childMem = getChildEdgeAt(0)->getMemory();
if (parentMem.GetElementsCount() != childMem.GetElementsCount())
THROW_ERROR << "Input and output buffers have different elements count";
void* srcPtr = parentMem.GetPtr();
void* dstPtr = childMem.GetPtr();
cpu_convert(srcPtr, dstPtr, getParentEdgeAt(0)->getDesc().getPrecision(), getChildEdgeAt(0)->getDesc().getPrecision(), parentMem.GetElementsCount());
}
bool MKLDNNConvertNode::created() const {
return getType() == Convert;
}
REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert);

View File

@@ -0,0 +1,45 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include <mkldnn_node.h>
#include <string>
#include <vector>
namespace MKLDNNPlugin {
class MKLDNNConvertNode : public MKLDNNNode {
public:
MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNConvertNode() override = default;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
bool canBeInPlace() const override {
return false;
}
// This is the interface extension designed to provide inp and output tensor descriptors without the CNNLayer.
// In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method.
// This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph
// initialization stage.
void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) {
this->input.reset(new InferenceEngine::TensorDesc(input));
this->output.reset(new InferenceEngine::TensorDesc(output));
}
std::shared_ptr<const InferenceEngine::TensorDesc> getInput() const { return input; }
std::shared_ptr<const InferenceEngine::TensorDesc> getOutput() const { return output; }
private:
std::shared_ptr<InferenceEngine::TensorDesc> input;
std::shared_ptr<InferenceEngine::TensorDesc> output;
};
} // namespace MKLDNNPlugin

View File

@@ -0,0 +1,93 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils/cpu_test_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace LayerTestsDefinitions {
class AddConvertToReorderTest : virtual public LayerTestsUtils::LayerTestsCommon {
public:
void BuildGraph(const ngraph::element::Type& secondInpType) {
secondConstantType = secondInpType;
int axis = 2;
std::vector<int> indices = {0, 3, 2, 1};
std::vector<size_t> indicesShape = {2, 2};
std::vector<size_t> inputShape = {10, 20, 30, 40};
InferenceEngine::Precision netPrecision = inPrc = outPrc = Precision::FP32;
targetDevice = CommonTestUtils::DEVICE_CPU;
ASSERT_EQ(ngraph::shape_size(indicesShape), indices.size())
<< "Indices vector size and provided indices shape doesn't fit each other";
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto indicesNode = ngraph::opset3::Constant::create(secondConstantType, ngraph::Shape(indicesShape), indices);
auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis});
auto gather = std::make_shared<ngraph::opset3::Gather>(paramOuts[0], indicesNode, axisNode);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather)};
function = std::make_shared<ngraph::Function>(results, params, "gather");
}
std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
// Convert the second input constant precision to i64 to run the reference function
if (ngraph::element::Type_t::i8 == secondConstantType) {
ngraph::pass::ConvertPrecision<ngraph::element::Type_t::i8, ngraph::element::Type_t::i64>().run_on_function(function);
} else if (ngraph::element::Type_t::bf16 == secondConstantType) {
ngraph::pass::ConvertPrecision<ngraph::element::Type_t::bf16, ngraph::element::Type_t::i64>().run_on_function(function);
}
return LayerTestsUtils::LayerTestsCommon::CalculateRefs();
}
private:
ngraph::element::Type secondConstantType;
};
namespace {
/* Test insertion of the Convert layer if there is no suitable reorder.
Parameter[FP32] Constant[BF16]
\ /
\ /
\ Convert[I32] (Is inserted by the MKLDNNGraph)
\ /
Gather[FP32]
|
|
Output[FP32]
*/
TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
BuildGraph(ngraph::element::bf16);
Run();
CheckNodeOfTypeCount(executableNetwork, "Convert", 1);
CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
}
/* Test insertion of the Reorder layer if there is one.
Parameter[FP32] Constant[I8]
\ /
\ /
\ Reorder[I32] (Is inserted by the MKLDNNGraph)
\ /
Gather[FP32]
|
|
Output[FP32]
*/
TEST_F(AddConvertToReorderTest, smoke_TestAddReorder_CPU) {
BuildGraph(ngraph::element::i8);
Run();
CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
CheckNodeOfTypeCount(executableNetwork, "Reorder", 1);
}
} // namespace
} // namespace LayerTestsDefinitions

View File

@@ -228,6 +228,27 @@ auto adjustBlockedFormatByIsa = [](std::vector<cpu_memory_format_t>& formats) {
return paramsVector;
}
void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount) {
InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();
auto function = execGraphInfo.getFunction();
ASSERT_NE(nullptr, function);
size_t actualNodeCount = 0;
for (const auto &node : function->get_ops()) {
const auto & rtInfo = node->get_rt_info();
auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
auto it = rtInfo.find(paramName);
IE_ASSERT(rtInfo.end() != it);
auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
IE_ASSERT(nullptr != value);
return value->get();
};
if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == nodeType) {
actualNodeCount++;
}
}
ASSERT_EQ(expectedCount, actualNodeCount) << "Unexpected count of the node type '" << nodeType << "' ";
}
std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams) {
std::vector<CPUSpecificParams> resCPUParams;
const int selectedTypeIndex = 3;

View File

@@ -114,4 +114,5 @@ const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_av
// utility functions
std::vector<CPUSpecificParams> filterCPUSpecificParams(std::vector<CPUSpecificParams>& paramsVector);
std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams);
void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount);
} // namespace CPUTestUtils