[CPU] A new transformation that adds a convert layer if there is no reorders that support the data type conversion. (#3498)
This commit is contained in:
@@ -46,11 +46,11 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_scatter_update_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_interpolate_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reduce_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_convert_node.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder_seq_len.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp
|
||||
|
||||
@@ -43,6 +43,7 @@ void BF16Transformer::convertToFloat(InferenceEngine::CNNNetwork &network) {
|
||||
for (size_t o = 0; o < iter->outData.size(); o++) {
|
||||
if (inputs.find(iter->outData[o]->getName()) == inputs.end()
|
||||
&& outputs.find(iter->outData[o]->getName()) == outputs.end()
|
||||
&& !CaselessEq<std::string>()(iter->type, "const")
|
||||
&& iter->outData[o]->getPrecision() == Precision::BF16) {
|
||||
iter->outData[o]->setPrecision(Precision::FP32);
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include "mkldnn_infer_request.h"
|
||||
#include <nodes/mkldnn_input_node.h>
|
||||
#include <nodes/mkldnn_reorder_node.h>
|
||||
#include <nodes/mkldnn_convert_node.h>
|
||||
|
||||
#include <legacy/graph_tools.hpp>
|
||||
#include <ie_algorithm.hpp>
|
||||
@@ -457,6 +458,21 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() {
|
||||
}
|
||||
}
|
||||
|
||||
static bool isReorderAvailable(const TensorDesc& parentDesc, const TensorDesc& childDesc, const mkldnn::engine& eng) {
|
||||
memory::desc dstMemDesc = MKLDNNMemoryDesc(childDesc);
|
||||
memory::desc srcMemDesc = MKLDNNMemoryDesc(parentDesc);
|
||||
mkldnn::primitive_attr attr;
|
||||
|
||||
dnnl_primitive_desc_t result = nullptr;
|
||||
auto status = dnnl_reorder_primitive_desc_create(&result, &srcMemDesc.data, eng.get(), &dstMemDesc.data, eng.get(),
|
||||
attr.get());
|
||||
if (result) {
|
||||
mkldnn_primitive_desc_destroy(result);
|
||||
}
|
||||
|
||||
return mkldnn_success == status;
|
||||
}
|
||||
|
||||
void MKLDNNGraph::InitEdges() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitEdges");
|
||||
|
||||
@@ -470,18 +486,42 @@ void MKLDNNGraph::InitEdges() {
|
||||
for (auto i = 0; i < numberOfEdges; i++) {
|
||||
if (graphEdges[i]->needReorder()) {
|
||||
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
|
||||
auto &edge = graphEdges[i];
|
||||
std::string basicLayerName = edge->getParent()->getName() + "_" +
|
||||
MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
|
||||
edge->getChild()->getName();
|
||||
std::string layerName = basicLayerName;
|
||||
int idx = 0;
|
||||
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
|
||||
idx++;
|
||||
layerName = basicLayerName + "_" + std::to_string(idx);
|
||||
auto edge = graphEdges[i];
|
||||
bool insertReorder = true;
|
||||
|
||||
// Check if there is a reorder that supports the type conversion
|
||||
if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
|
||||
!isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
|
||||
//If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
|
||||
std::string convertName = edge->getParent()->getName() + "_" +
|
||||
edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
|
||||
|
||||
CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
|
||||
auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
|
||||
convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
|
||||
InsertNode(edge, convertNode, true);
|
||||
|
||||
//Check if reorder is still needed
|
||||
if (convertNode->getChildEdgeAt(0)->needReorder()) {
|
||||
edge = convertNode->getChildEdgeAt(0);
|
||||
} else {
|
||||
insertReorder = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (insertReorder) {
|
||||
std::string basicLayerName = edge->getParent()->getName() + "_" +
|
||||
MKLDNNExtensionUtils::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
|
||||
edge->getChild()->getName();
|
||||
std::string layerName = basicLayerName;
|
||||
int idx = 0;
|
||||
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
|
||||
idx++;
|
||||
layerName = basicLayerName + "_" + std::to_string(idx);
|
||||
}
|
||||
uniqueLayerNames.insert(layerName);
|
||||
InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
|
||||
}
|
||||
uniqueLayerNames.insert(layerName);
|
||||
InsertReorder(edge, layerName, edge->getInputDesc(), edge->getOutputDesc());
|
||||
graphEdges.erase(graphEdges.begin() + i);
|
||||
i--;
|
||||
numberOfEdges--;
|
||||
@@ -1095,44 +1135,17 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa
|
||||
}
|
||||
reorderPtr->setDescs(inDesc, outDesc);
|
||||
reorderPtr->_scales = scales;
|
||||
|
||||
auto oIndex = edge->getOutputNum();
|
||||
auto iIndex = edge->getInputNum();
|
||||
if (iIndex < 0 || oIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
|
||||
<< edge->getParent()->getName() << " and "
|
||||
<< edge->getChild()->getName() << ".";
|
||||
|
||||
edge->drop();
|
||||
|
||||
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
|
||||
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
|
||||
|
||||
// Add edge for beforeNode
|
||||
beforeNode->getChild()->parentEdges.push_back(beforeNode);
|
||||
edge->getParent()->childEdges.push_back(beforeNode);
|
||||
|
||||
// Add edge for afterNode
|
||||
afterNode->getParent()->childEdges.push_back(afterNode);
|
||||
edge->getChild()->parentEdges.push_back(afterNode);
|
||||
|
||||
reorderPtr->setOptimized(isOptimized);
|
||||
|
||||
newReorder->getSupportedDescriptors();
|
||||
newReorder->initSupportedPrimitiveDescriptors();
|
||||
newReorder->selectOptimalPrimitiveDescriptor();
|
||||
|
||||
graphEdges.push_back(beforeNode);
|
||||
graphEdges.push_back(afterNode);
|
||||
InsertNode(edge, newReorder, true);
|
||||
|
||||
// Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal.
|
||||
// Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks.
|
||||
if (!isOptimized) {
|
||||
beforeNode->getDesc();
|
||||
afterNode->getDesc();
|
||||
newReorder->getParentEdgeAt(0)->getDesc();
|
||||
newReorder->getChildEdgeAt(0)->getDesc();
|
||||
}
|
||||
|
||||
graphNodes.push_back(newReorder);
|
||||
return newReorder;
|
||||
}
|
||||
|
||||
@@ -1235,3 +1248,42 @@ void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
|
||||
InferenceEngine::CNNNetwork MKLDNNGraph::dump() const {
|
||||
return dump_graph_as_ie_ngraph_net(*this);
|
||||
}
|
||||
|
||||
bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode) {
|
||||
auto oIndex = edge->getOutputNum();
|
||||
auto iIndex = edge->getInputNum();
|
||||
if (iIndex < 0 || oIndex < 0)
|
||||
THROW_IE_EXCEPTION << "Cannot insert node '" << node->getName() << "' between nodes: "
|
||||
<< edge->getParent()->getName() << " and "
|
||||
<< edge->getChild()->getName() << ".";
|
||||
|
||||
edge->drop();
|
||||
|
||||
return InsertNode(edge->getParent(), edge->getChild(), node, iIndex, oIndex, initNode);
|
||||
}
|
||||
|
||||
bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode) {
|
||||
MKLDNNEdgePtr beforeNode(new MKLDNNEdge(parent, node, parentPort, 0));
|
||||
MKLDNNEdgePtr afterNode(new MKLDNNEdge(node, child, 0, childPort));
|
||||
|
||||
// Add edge for beforeNode
|
||||
beforeNode->getChild()->parentEdges.push_back(beforeNode);
|
||||
parent->childEdges.push_back(beforeNode);
|
||||
|
||||
// Add edge for afterNode
|
||||
afterNode->getParent()->childEdges.push_back(afterNode);
|
||||
child->parentEdges.push_back(afterNode);
|
||||
|
||||
if (initNode) {
|
||||
node->getSupportedDescriptors();
|
||||
node->initSupportedPrimitiveDescriptors();
|
||||
node->filterSupportedPrimitiveDescriptors();
|
||||
node->selectOptimalPrimitiveDescriptor();
|
||||
node->initOptimalPrimitiveDescriptor();
|
||||
}
|
||||
|
||||
graphEdges.push_back(beforeNode);
|
||||
graphEdges.push_back(afterNode);
|
||||
graphNodes.push_back(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -115,6 +115,41 @@ public:
|
||||
MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const InferenceEngine::TensorDesc& inDesc,
|
||||
const InferenceEngine::TensorDesc& outDesc, bool isOptimized = false, InferenceEngine::Blob::Ptr scales = nullptr);
|
||||
|
||||
/**
|
||||
* @brief Insert MKLDNNNode at the edge-specified location.
|
||||
* This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
|
||||
* supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
|
||||
* node without initialization.
|
||||
* @param edge
|
||||
* pointer to the edge in the graph where the node will be inserted
|
||||
* @param node
|
||||
* pointer to the inserted node
|
||||
* @param initNode
|
||||
* parameter that determines whether the node needs to be initialized
|
||||
* @return true in case of success, false otherwise.
|
||||
*/
|
||||
bool InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode = false);
|
||||
|
||||
/**
|
||||
* @brief Insert MKLDNNNode between two specified nodes.
|
||||
* This procedure creates two edges that link the parent and child nodes to the inserted one and adds all created objects to the graph.
|
||||
* This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization,
|
||||
* supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the
|
||||
* node without initialization.
|
||||
* @param parent
|
||||
* pointer to the parent node
|
||||
* @param child
|
||||
* pointer to the child node
|
||||
* @param parentPort
|
||||
* port number of the parent node to which the inserted node should be connected
|
||||
* @param childPort
|
||||
* port number of the child node to which the inserted node should be connected
|
||||
* @param initNode
|
||||
* parameter that determines whether the node needs to be initialized
|
||||
* @return true in case of success, false otherwise.
|
||||
*/
|
||||
bool InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode = false);
|
||||
|
||||
InferenceEngine::CNNNetwork dump() const;
|
||||
|
||||
template<typename NET>
|
||||
|
||||
@@ -55,9 +55,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
|
||||
MergeTwoEqualScaleShifts(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
MergeConversions(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
FuseBroadcastAndEltwise(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
@@ -154,51 +151,6 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
|
||||
graph.RemoveDroppedEdges();
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::MergeConversions(MKLDNNGraph& graph) {
|
||||
for (auto node : graph.GetNodes()) {
|
||||
// Input with at least 2 Convertions
|
||||
if (!IsOneOf(node->getType(), { Input }) || node->getChildEdges().size() < 2 ||
|
||||
!IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), { Convert })) {
|
||||
continue;
|
||||
}
|
||||
auto& input = node;
|
||||
|
||||
// Convertions of same the type with Concat as a child
|
||||
for (size_t i = 0; i < input->getChildEdges().size(); i++) {
|
||||
auto convInEdge = input->getChildEdgeAt(i);
|
||||
auto conv = convInEdge->getChild();
|
||||
auto convOutEdge = conv->getChildEdgeAt(i);
|
||||
auto convInDims = convInEdge->getDims();
|
||||
auto convOutDims = convOutEdge->getDims();
|
||||
Precision convOutPrecision = conv->getCnnLayer()->precision;
|
||||
|
||||
for (size_t j = i + 1; j < input->getChildEdges().size();) {
|
||||
auto childEdge = input->getChildEdgeAt(j);
|
||||
auto child = childEdge->getChild();
|
||||
|
||||
if (child->getCnnLayer()->precision != convOutPrecision ||
|
||||
child->getChildEdgeAt(0)->getDims() != convOutDims ||
|
||||
childEdge->getDims() != convInDims ||
|
||||
child->getChildEdges().size() != 1) {
|
||||
j++;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto childChildEdge = child->getChildEdgeAt(0);
|
||||
auto childChild = childChildEdge->getChild();
|
||||
int idxChild = childChildEdge->getOutputNum();
|
||||
|
||||
child->remove();
|
||||
graph.DropNode(child);
|
||||
|
||||
MKLDNNEdgePtr newEdge(new MKLDNNEdge(conv, childChild, 0, idxChild));
|
||||
graph.GetEdges().push_back(newEdge);
|
||||
conv->addEdge(newEdge);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
@@ -1844,6 +1796,10 @@ void MKLDNNGraphOptimizer::ChangeConvertToReorder(MKLDNNGraph& graph) {
|
||||
if (!InferenceEngine::details::CaselessEq<std::string>()(nodeType, "convert")) {
|
||||
continue;
|
||||
}
|
||||
if (convertCandidate->getCnnLayer()->insData.empty() ||
|
||||
convertCandidate->getCnnLayer()->outData.empty()) {
|
||||
continue;
|
||||
}
|
||||
auto inputPrecision = convertCandidate->getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
auto outputPrecision = convertCandidate->getCnnLayer()->outData[0]->getPrecision();
|
||||
if (std::find(continuousPrecisions.begin(), continuousPrecisions.end(), inputPrecision) == continuousPrecisions.end() ||
|
||||
@@ -2313,4 +2269,4 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) {
|
||||
mergePermuteAndReorder(parentNode, childNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,6 @@ public:
|
||||
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
|
||||
|
||||
private:
|
||||
void MergeConversions(MKLDNNGraph& graph);
|
||||
void MergeGroupConvolution(MKLDNNGraph& graph);
|
||||
void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
|
||||
void FuseConvolutionAndActivation(MKLDNNGraph &graph);
|
||||
@@ -41,6 +40,7 @@ private:
|
||||
void DropDoubleReorders(MKLDNNGraph& graph);
|
||||
void DropConvertReorder(MKLDNNGraph& graph);
|
||||
void ChangeConvertToReorder(MKLDNNGraph &graph);
|
||||
void AddConvertToReorder(MKLDNNGraph &graph);
|
||||
void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
|
||||
void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
|
||||
void FuseEltwiseAndSimple(MKLDNNGraph &graph);
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "mkldnn_memory.h"
|
||||
#include "mkldnn_extension_utils.h"
|
||||
#include "nodes/common/cpu_memcpy.h"
|
||||
#include "nodes/common/cpu_convert.h"
|
||||
#include "ie_mkldnn.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
@@ -88,10 +89,54 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) {
|
||||
if (size != 0)
|
||||
IE_ASSERT(size <= output.GetDescriptor().get_size());
|
||||
if (input.GetDesc() == output.GetDesc()) {
|
||||
auto srcPtr = static_cast<uint8_t*>(input.GetPtr());
|
||||
auto dstPtr = static_cast<uint8_t*>(output.GetPtr());
|
||||
|
||||
auto copySize = size == 0 ? output.GetSize() : size;
|
||||
cpu_memcpy(dstPtr, srcPtr, copySize);
|
||||
} else {
|
||||
std::unique_ptr<mkldnn::reorder> pReorder;
|
||||
std::shared_ptr<memory> srcMemoryPtr;
|
||||
std::vector<uint8_t> tmpBuff;
|
||||
|
||||
try {
|
||||
pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive()));
|
||||
srcMemoryPtr = input.prim;
|
||||
}
|
||||
catch (const mkldnn::error& err) {
|
||||
if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType()) {
|
||||
//we probably could not make the reorder because there is no one supporting this precision conversion
|
||||
//lets try to convert data first using cpu_convert
|
||||
auto data = static_cast<const uint8_t *>(input.GetPtr());
|
||||
tmpBuff.resize(input.GetSize());
|
||||
|
||||
cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()),
|
||||
MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount());
|
||||
|
||||
MKLDNNMemory tmpMem(output.eng);
|
||||
tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetDesc().getFormat(), tmpBuff.data());
|
||||
|
||||
pReorder = std::unique_ptr<mkldnn::reorder>(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive()));
|
||||
srcMemoryPtr = tmpMem.prim;
|
||||
} else {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
if (pReorder) {
|
||||
mkldnn::stream loc_stream(output.eng, stream::flags::default_order);
|
||||
pReorder->execute(loc_stream, *srcMemoryPtr, *output.prim);
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Could not make mkldnn reorder.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: It should be done via wrap into Memory;
|
||||
void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format, const void* data, size_t size, bool ftz) const {
|
||||
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType));
|
||||
|
||||
IE_ASSERT(!one_of(format, memory::format_tag::undef, memory::format_tag::any));
|
||||
|
||||
auto dst_desc = GetDescriptor();
|
||||
@@ -99,25 +144,21 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format
|
||||
|
||||
IE_ASSERT(size <= dst_desc.get_size());
|
||||
|
||||
if (dst_desc != src_desc) {
|
||||
auto memData = GetDescriptor().data;
|
||||
memory::dims dims{memData.dims, memData.dims + memData.ndims};
|
||||
|
||||
MKLDNNMemory src(eng);
|
||||
src.Create(dims, dataType, format, data);
|
||||
|
||||
std::shared_ptr<mkldnn::reorder> pReorder =
|
||||
std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(src.GetPrimitive(), GetPrimitive()));
|
||||
|
||||
mkldnn::stream loc_stream(eng, stream::flags::default_flags);
|
||||
pReorder->execute(loc_stream, *src.prim, *this->prim);
|
||||
} else {
|
||||
if (dst_desc == src_desc) {
|
||||
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType));
|
||||
uint8_t* dataPtr = static_cast<uint8_t*>(GetData());
|
||||
// We cannot support strides for i/o blobs because it affects performance.
|
||||
dataPtr += itemSize * prim->get_desc().data.offset0;
|
||||
cpu_memcpy(dataPtr, data, size);
|
||||
}
|
||||
} else {
|
||||
auto memData = this->GetDescriptor().data;
|
||||
memory::dims dims(memData.dims, memData.dims + memData.ndims);
|
||||
|
||||
MKLDNNMemory src(this->eng);
|
||||
src.Create(dims, dataType, format, data);
|
||||
|
||||
reorderData(src, *this);
|
||||
}
|
||||
if (ftz
|
||||
&& dataType == memory::data_type::f32
|
||||
&& prim->get_desc().data.format_kind != dnnl_format_kind_wino
|
||||
@@ -130,21 +171,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format
|
||||
}
|
||||
|
||||
void MKLDNNMemory::SetData(const MKLDNNMemory& src, size_t size, bool ftz) const {
|
||||
if (size != 0)
|
||||
IE_ASSERT(size <= GetDescriptor().get_size());
|
||||
|
||||
// TODO: Optimization. Reorder perfect is not good enough, so in triviale cases we
|
||||
// prefer use simple copy.
|
||||
if (src.GetDesc() == this->GetDesc()) {
|
||||
auto srcPtr = static_cast<uint8_t*>(src.GetPtr());
|
||||
auto dstPtr = static_cast<uint8_t*>(this->GetPtr());
|
||||
auto copySize = size == 0 ? this->GetSize() : size;
|
||||
cpu_memcpy(dstPtr, srcPtr, copySize);
|
||||
} else {
|
||||
mkldnn::reorder reorderPrim(src.GetPrimitive(), GetPrimitive());
|
||||
mkldnn::stream loc_stream(eng, stream::flags::default_order);
|
||||
reorderPrim.execute(loc_stream, *src.prim, *this->prim);
|
||||
}
|
||||
reorderData(src, *this, size);
|
||||
|
||||
if (ftz
|
||||
&& src.GetDataType() == memory::data_type::f32
|
||||
@@ -840,5 +867,4 @@ bool MKLDNNMemoryDesc::blocksExtended() const {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
@@ -164,6 +164,8 @@ public:
|
||||
|
||||
static std::string formatToString(mkldnn::memory::format_tag fmt);
|
||||
|
||||
static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0);
|
||||
|
||||
private:
|
||||
std::shared_ptr<mkldnn::memory> prim;
|
||||
mkldnn::engine eng;
|
||||
|
||||
@@ -168,7 +168,8 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
|
||||
if (!(CaselessEq<std::string>()(layer->type, "memory") ||
|
||||
CaselessEq<std::string>()(layer->type, "memoryinput") ||
|
||||
CaselessEq<std::string>()(layer->type, "output") ||
|
||||
CaselessEq<std::string>()(layer->type, "reorder"))) {
|
||||
CaselessEq<std::string>()(layer->type, "reorder") ||
|
||||
CaselessEq<std::string>()(layer->type, "convert"))) {
|
||||
THROW_IE_EXCEPTION << "Inappropriate layer type: " << layer->type << " name: " << layer->name;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "tensor_desc_creator.h"
|
||||
#include <numeric>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace MKLDNNPlugin;
|
||||
|
||||
namespace {
|
||||
constexpr size_t channelsPos = 1lu;
|
||||
|
||||
class PlainFormatCreator : public TensorDescCreator {
|
||||
public:
|
||||
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
|
||||
SizeVector order(srcDims.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
return TensorDesc(precision, srcDims, {srcDims, order});
|
||||
}
|
||||
virtual size_t getMinimalRank() const { return 0lu; }
|
||||
};
|
||||
|
||||
class PerChannelCreator : public TensorDescCreator {
|
||||
public:
|
||||
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const {
|
||||
SizeVector order(srcDims.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
SizeVector blkDims = srcDims;
|
||||
if (srcDims.size() > 2) {
|
||||
auto moveElementBack = [](SizeVector& vector, size_t indx) {
|
||||
auto itr = vector.begin() + indx;
|
||||
std::rotate(itr, itr + 1, vector.end());
|
||||
};
|
||||
|
||||
moveElementBack(order, channelsPos);
|
||||
moveElementBack(blkDims, channelsPos);
|
||||
}
|
||||
|
||||
return TensorDesc(precision, srcDims, {blkDims, order});
|
||||
}
|
||||
virtual size_t getMinimalRank() const { return 3lu; }
|
||||
};
|
||||
|
||||
class ChannelBlockedCreator : public TensorDescCreator {
|
||||
public:
|
||||
ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {}
|
||||
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const {
|
||||
if (srcDims.size() < 2) {
|
||||
THROW_IE_EXCEPTION << "Can't create blocked tensor descriptor!";
|
||||
}
|
||||
|
||||
SizeVector order(srcDims.size());
|
||||
std::iota(order.begin(), order.end(), 0);
|
||||
order.push_back(channelsPos);
|
||||
|
||||
SizeVector blkDims = srcDims;
|
||||
blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0);
|
||||
blkDims.push_back(_blockSize);
|
||||
|
||||
return TensorDesc(precision, srcDims, {blkDims, order});
|
||||
}
|
||||
virtual size_t getMinimalRank() const { return 3lu; }
|
||||
|
||||
private:
|
||||
size_t _blockSize;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
const TensorDescCreator::CreatorsMap& TensorDescCreator::getCommonCreators() {
|
||||
static const CreatorsMap map{ { TensorDescCreatorTypes::nspc, CreatorConstPtr(new PerChannelCreator) },
|
||||
{ TensorDescCreatorTypes::nCsp8c, CreatorConstPtr(new ChannelBlockedCreator(8)) },
|
||||
{ TensorDescCreatorTypes::nCsp16c, CreatorConstPtr(new ChannelBlockedCreator(16)) },
|
||||
{ TensorDescCreatorTypes::ncsp, CreatorConstPtr(new PlainFormatCreator) } };
|
||||
return map;
|
||||
}
|
||||
|
||||
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
TensorDescCreator::makeFilteredRange(const CreatorsMap &map, unsigned int rank) {
|
||||
auto rankFilter = [rank](const CreatorsMap::value_type& item) {
|
||||
if (item.second->getMinimalRank() > rank) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto first = CreatorsMapFilterConstIterator(std::move(rankFilter), map.begin(), map.end());
|
||||
auto last = first.end();
|
||||
return std::make_pair(first, last);
|
||||
}
|
||||
|
||||
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
TensorDescCreator::makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes) {
|
||||
size_t bitMask = 0ul;
|
||||
for (auto& item : supportedTypes) {
|
||||
bitMask |= 1 << static_cast<unsigned>(item);
|
||||
}
|
||||
|
||||
auto rankTypesFilter = [rank, bitMask](const CreatorsMap::value_type& item) {
|
||||
if (!(bitMask & (1 << static_cast<unsigned>(item.first)))) {
|
||||
return false;
|
||||
}
|
||||
if (item.second->getMinimalRank() > rank) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto first = CreatorsMapFilterConstIterator(std::move(rankTypesFilter), map.begin(), map.end());
|
||||
auto last = first.end();
|
||||
return std::make_pair(first, last);
|
||||
}
|
||||
|
||||
std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
TensorDescCreator::makeFilteredRange(const CreatorsMap &map, TensorDescCreator::Predicate predicate) {
|
||||
auto first = CreatorsMapFilterConstIterator(std::move(predicate), map.begin(), map.end());
|
||||
auto last = first.end();
|
||||
return std::make_pair(first, last);
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_layouts.h>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
enum class TensorDescCreatorTypes : unsigned {
|
||||
nspc, // general per channels format
|
||||
ncsp, // general planar
|
||||
nCsp8c, // general channels blocked by 8
|
||||
nCsp16c // general channels blocked by 16
|
||||
};
|
||||
|
||||
class CreatorsMapFilterConstIterator;
|
||||
|
||||
class TensorDescCreator {
|
||||
public:
|
||||
typedef std::shared_ptr<TensorDescCreator> CreatorPtr;
|
||||
typedef std::shared_ptr<const TensorDescCreator> CreatorConstPtr;
|
||||
typedef std::map<TensorDescCreatorTypes, CreatorConstPtr> CreatorsMap;
|
||||
typedef std::function<bool(const CreatorsMap::value_type&)> Predicate;
|
||||
|
||||
public:
|
||||
static const CreatorsMap& getCommonCreators();
|
||||
static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
makeFilteredRange(const CreatorsMap &map, unsigned rank);
|
||||
static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector<TensorDescCreatorTypes>& supportedTypes);
|
||||
static std::pair<CreatorsMapFilterConstIterator, CreatorsMapFilterConstIterator>
|
||||
makeFilteredRange(const CreatorsMap& map, Predicate predicate);
|
||||
virtual InferenceEngine::TensorDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0;
|
||||
virtual size_t getMinimalRank() const = 0;
|
||||
virtual ~TensorDescCreator() = default;
|
||||
};
|
||||
|
||||
class CreatorsMapFilterConstIterator {
|
||||
public:
|
||||
typedef TensorDescCreator::CreatorsMap::const_iterator Iterator;
|
||||
typedef std::iterator_traits<Iterator>::value_type value_type;
|
||||
typedef std::iterator_traits<Iterator>::reference reference;
|
||||
typedef std::iterator_traits<Iterator>::pointer pointer;
|
||||
typedef std::iterator_traits<Iterator>::difference_type difference_type;
|
||||
typedef std::forward_iterator_tag iterator_category;
|
||||
typedef std::function<bool(const value_type&)> predicate_type;
|
||||
|
||||
public:
|
||||
CreatorsMapFilterConstIterator(predicate_type filter, Iterator begin, Iterator end) : _filter(std::move(filter)), _iter(begin), _end(end) {
|
||||
while (_iter != _end && !_filter(*_iter)) {
|
||||
++_iter;
|
||||
}
|
||||
}
|
||||
CreatorsMapFilterConstIterator& operator++() {
|
||||
do {
|
||||
++_iter;
|
||||
} while (_iter != _end && !_filter(*_iter));
|
||||
return *this;
|
||||
}
|
||||
|
||||
CreatorsMapFilterConstIterator end() const {
|
||||
return CreatorsMapFilterConstIterator(predicate_type(), _end, _end);
|
||||
}
|
||||
|
||||
CreatorsMapFilterConstIterator operator++(int) {
|
||||
CreatorsMapFilterConstIterator temp(*this);
|
||||
++*this;
|
||||
return temp;
|
||||
}
|
||||
|
||||
reference operator*() const {
|
||||
return *_iter;
|
||||
}
|
||||
|
||||
pointer operator->() const {
|
||||
return std::addressof(*_iter);
|
||||
}
|
||||
|
||||
friend bool operator==(const CreatorsMapFilterConstIterator& lhs, const CreatorsMapFilterConstIterator& rhs) {
|
||||
return lhs._iter == rhs._iter;
|
||||
}
|
||||
|
||||
friend bool operator!=(const CreatorsMapFilterConstIterator& lhs, const CreatorsMapFilterConstIterator& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
Iterator _iter;
|
||||
Iterator _end;
|
||||
predicate_type _filter;
|
||||
};
|
||||
} // namespace MKLDNNPlugin
|
||||
@@ -1,72 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "base.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ie_precision.hpp"
|
||||
#include "common/cpu_convert.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
namespace Cpu {
|
||||
|
||||
class ConvertImpl: public ExtLayerBase {
|
||||
public:
|
||||
explicit ConvertImpl(const CNNLayer* layer) {
|
||||
try {
|
||||
logPrefix = "Convert layer with name '" + layer->name + "' ";
|
||||
if (layer->insData.size() != 1 || layer->outData.size() != 1)
|
||||
THROW_IE_EXCEPTION << logPrefix << "has incorrect number of input/output edges";
|
||||
|
||||
precision = layer->GetParamAsString("precision");
|
||||
|
||||
LayerConfig config;
|
||||
DataConfig dataIn;
|
||||
const SizeVector& ins_dims = layer->insData[0].lock()->getTensorDesc().getDims();
|
||||
dataIn.desc = TensorDesc(layer->insData[0].lock()->getTensorDesc().getPrecision(), ins_dims,
|
||||
layer->insData[0].lock()->getTensorDesc().getLayout());
|
||||
config.inConfs.push_back(dataIn);
|
||||
|
||||
DataConfig dataConfigOut;
|
||||
const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
|
||||
dataConfigOut.desc = TensorDesc(layer->outData[0]->getTensorDesc().getPrecision(), out_dims,
|
||||
layer->outData[0]->getTensorDesc().getLayout());
|
||||
config.outConfs.push_back(dataConfigOut);
|
||||
config.dynBatchSupport = false;
|
||||
confs.push_back(config);
|
||||
} catch (InferenceEngine::details::InferenceEngineException &ex) {
|
||||
errorMsg = ex.what();
|
||||
}
|
||||
}
|
||||
|
||||
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
|
||||
try {
|
||||
void *srcPtr = inputs[0]->cbuffer().as<void *>();
|
||||
void *dstPtr = outputs[0]->buffer().as<void *>();
|
||||
if (inputs[0]->size() != outputs[0]->size())
|
||||
THROW_IE_EXCEPTION << logPrefix << "has input and output buffers with different sizes";
|
||||
cpu_convert(srcPtr, dstPtr, inputs[0]->getTensorDesc().getPrecision(), outputs[0]->getTensorDesc().getPrecision(), outputs[0]->size());
|
||||
} catch (InferenceEngine::details::InferenceEngineException &ex) {
|
||||
errorMsg = ex.what();
|
||||
if (resp)
|
||||
errorMsg.copy(resp->msg, sizeof(resp->msg)-1);
|
||||
return GENERAL_ERROR;
|
||||
} catch(...) {
|
||||
return GENERAL_ERROR;
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string precision;
|
||||
std::string logPrefix;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ConvertImpl, Convert);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
||||
@@ -48,7 +48,6 @@ MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
|
||||
MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
|
||||
MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
|
||||
MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
|
||||
MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
|
||||
MKLDNN_EXTENSION_NODE(FillImpl, Fill);
|
||||
MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
|
||||
MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
|
||||
|
||||
110
inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
Normal file
110
inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp
Normal file
@@ -0,0 +1,110 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "mkldnn_convert_node.h"
|
||||
#include "common/cpu_convert.h"
|
||||
#include "common/tensor_desc_creator.h"
|
||||
|
||||
#define THROW_ERROR THROW_IE_EXCEPTION << getTypeStr() << " layer with name '" << getName() <<"' ERROR: "
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(layer, eng, cache) {}
|
||||
|
||||
void MKLDNNConvertNode::getSupportedDescriptors() {
|
||||
// if tensor descriptors are set via setDescs method we need to update the inDims/outDims data
|
||||
// from correspond tensor descriptors.
|
||||
if (outDims.empty() && output && output->getLayout() != InferenceEngine::Layout::ANY)
|
||||
outDims.push_back(MKLDNNDims(output->getDims()));
|
||||
if (inDims.empty() && input && input->getLayout() != InferenceEngine::Layout::ANY)
|
||||
inDims.push_back(MKLDNNDims(input->getDims()));
|
||||
if (getParentEdges().size() != 1)
|
||||
THROW_ERROR << "Incorrect number of input edges";
|
||||
if (getChildEdges().empty())
|
||||
THROW_ERROR << "Incorrect number of output edges";
|
||||
}
|
||||
|
||||
void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
auto layer = getCnnLayer();
|
||||
if (layer == nullptr) {
|
||||
THROW_ERROR << "Cannot get CNN layer";
|
||||
}
|
||||
|
||||
LayerConfig config;
|
||||
DataConfig dataIn;
|
||||
DataConfig dataConfigOut;
|
||||
|
||||
config.dynBatchSupport = false;
|
||||
|
||||
// if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so
|
||||
// they should be used as the actual descriptors.
|
||||
if (input && input->getLayout() != InferenceEngine::Layout::ANY && output && output->getLayout() != InferenceEngine::Layout::ANY) {
|
||||
dataIn.desc = *input;
|
||||
config.inConfs.push_back(dataIn);
|
||||
|
||||
const auto& blockingDesc = config.inConfs[0].desc.getBlockingDesc(); // inp/out layouts must be the same
|
||||
dataConfigOut.desc = TensorDesc(output->getPrecision(), input->getDims(), blockingDesc);
|
||||
config.outConfs.push_back(dataConfigOut);
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
|
||||
} else if (layer->insData.size() == 1 && layer->outData.size() == 1) {
|
||||
auto insData = layer->insData[0].lock();
|
||||
if (nullptr == insData) {
|
||||
THROW_ERROR << "Input data is empty";
|
||||
}
|
||||
|
||||
const SizeVector& insDims = insData->getTensorDesc().getDims();
|
||||
auto insPrecision = insData->getTensorDesc().getPrecision();
|
||||
const SizeVector& outputDims = layer->outData[0]->getTensorDesc().getDims();
|
||||
auto outPrecision = layer->outData[0]->getTensorDesc().getPrecision();
|
||||
|
||||
config.inConfs.push_back(dataIn);
|
||||
config.outConfs.push_back(dataConfigOut);
|
||||
|
||||
auto creators = TensorDescCreator::getCommonCreators();
|
||||
auto range = TensorDescCreator::makeFilteredRange(creators, insDims.size());
|
||||
|
||||
for (auto itr = range.first; itr != range.second; ++itr) {
|
||||
config.inConfs[0].desc = itr->second->createDesc(insPrecision, insDims);
|
||||
config.outConfs[0].desc = itr->second->createDesc(outPrecision, outputDims);
|
||||
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemoryDesc(config.outConfs.front().desc).getFormat());
|
||||
}
|
||||
} else {
|
||||
THROW_ERROR << "Incorrect number of input/output edges";
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvertNode::createPrimitive() {
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
THROW_ERROR << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
THROW_ERROR << "Input memory didn't allocate.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_ERROR << "Preferable primitive descriptor is not set.";
|
||||
}
|
||||
|
||||
void MKLDNNConvertNode::execute(mkldnn::stream strm) {
|
||||
auto& parentMem = getParentEdgeAt(0)->getMemory();
|
||||
auto& childMem = getChildEdgeAt(0)->getMemory();
|
||||
if (parentMem.GetElementsCount() != childMem.GetElementsCount())
|
||||
THROW_ERROR << "Input and output buffers have different elements count";
|
||||
|
||||
void* srcPtr = parentMem.GetPtr();
|
||||
void* dstPtr = childMem.GetPtr();
|
||||
cpu_convert(srcPtr, dstPtr, getParentEdgeAt(0)->getDesc().getPrecision(), getChildEdgeAt(0)->getDesc().getPrecision(), parentMem.GetElementsCount());
|
||||
}
|
||||
|
||||
bool MKLDNNConvertNode::created() const {
|
||||
return getType() == Convert;
|
||||
}
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert);
|
||||
@@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <mkldnn_node.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNConvertNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
~MKLDNNConvertNode() override = default;
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
// This is the interface extension designed to provide inp and output tensor descriptors without the CNNLayer.
|
||||
// In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method.
|
||||
// This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph
|
||||
// initialization stage.
|
||||
void setDescs(const InferenceEngine::TensorDesc& input, const InferenceEngine::TensorDesc& output) {
|
||||
this->input.reset(new InferenceEngine::TensorDesc(input));
|
||||
this->output.reset(new InferenceEngine::TensorDesc(output));
|
||||
}
|
||||
|
||||
std::shared_ptr<const InferenceEngine::TensorDesc> getInput() const { return input; }
|
||||
std::shared_ptr<const InferenceEngine::TensorDesc> getOutput() const { return output; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<InferenceEngine::TensorDesc> input;
|
||||
std::shared_ptr<InferenceEngine::TensorDesc> output;
|
||||
};
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class AddConvertToReorderTest : virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
void BuildGraph(const ngraph::element::Type& secondInpType) {
|
||||
secondConstantType = secondInpType;
|
||||
int axis = 2;
|
||||
std::vector<int> indices = {0, 3, 2, 1};
|
||||
std::vector<size_t> indicesShape = {2, 2};
|
||||
std::vector<size_t> inputShape = {10, 20, 30, 40};
|
||||
|
||||
InferenceEngine::Precision netPrecision = inPrc = outPrc = Precision::FP32;
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
ASSERT_EQ(ngraph::shape_size(indicesShape), indices.size())
|
||||
<< "Indices vector size and provided indices shape doesn't fit each other";
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(
|
||||
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
auto indicesNode = ngraph::opset3::Constant::create(secondConstantType, ngraph::Shape(indicesShape), indices);
|
||||
auto axisNode = ngraph::opset3::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis});
|
||||
auto gather = std::make_shared<ngraph::opset3::Gather>(paramOuts[0], indicesNode, axisNode);
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(gather)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "gather");
|
||||
}
|
||||
std::vector<std::vector<std::uint8_t>> CalculateRefs() override {
|
||||
// Convert the second input constant precision to i64 to run the reference function
|
||||
if (ngraph::element::Type_t::i8 == secondConstantType) {
|
||||
ngraph::pass::ConvertPrecision<ngraph::element::Type_t::i8, ngraph::element::Type_t::i64>().run_on_function(function);
|
||||
} else if (ngraph::element::Type_t::bf16 == secondConstantType) {
|
||||
ngraph::pass::ConvertPrecision<ngraph::element::Type_t::bf16, ngraph::element::Type_t::i64>().run_on_function(function);
|
||||
}
|
||||
return LayerTestsUtils::LayerTestsCommon::CalculateRefs();
|
||||
}
|
||||
|
||||
private:
|
||||
ngraph::element::Type secondConstantType;
|
||||
};
|
||||
|
||||
namespace {
|
||||
/* Test insertion of the Convert layer if there is no suitable reorder.
|
||||
|
||||
Parameter[FP32] Constant[BF16]
|
||||
\ /
|
||||
\ /
|
||||
\ Convert[I32] (Is inserted by the MKLDNNGraph)
|
||||
\ /
|
||||
Gather[FP32]
|
||||
|
|
||||
|
|
||||
Output[FP32]
|
||||
*/
|
||||
|
||||
TEST_F(AddConvertToReorderTest, smoke_TestAddConvert_CPU) {
|
||||
BuildGraph(ngraph::element::bf16);
|
||||
Run();
|
||||
CheckNodeOfTypeCount(executableNetwork, "Convert", 1);
|
||||
CheckNodeOfTypeCount(executableNetwork, "Reorder", 0);
|
||||
}
|
||||
|
||||
/* Test insertion of the Reorder layer if there is one.
|
||||
|
||||
Parameter[FP32] Constant[I8]
|
||||
\ /
|
||||
\ /
|
||||
\ Reorder[I32] (Is inserted by the MKLDNNGraph)
|
||||
\ /
|
||||
Gather[FP32]
|
||||
|
|
||||
|
|
||||
Output[FP32]
|
||||
*/
|
||||
TEST_F(AddConvertToReorderTest, smoke_TestAddReorder_CPU) {
|
||||
BuildGraph(ngraph::element::i8);
|
||||
Run();
|
||||
CheckNodeOfTypeCount(executableNetwork, "Convert", 0);
|
||||
CheckNodeOfTypeCount(executableNetwork, "Reorder", 1);
|
||||
}
|
||||
} // namespace
|
||||
} // namespace LayerTestsDefinitions
|
||||
@@ -228,6 +228,27 @@ auto adjustBlockedFormatByIsa = [](std::vector<cpu_memory_format_t>& formats) {
|
||||
return paramsVector;
|
||||
}
|
||||
|
||||
void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount) {
|
||||
InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();
|
||||
auto function = execGraphInfo.getFunction();
|
||||
ASSERT_NE(nullptr, function);
|
||||
size_t actualNodeCount = 0;
|
||||
for (const auto &node : function->get_ops()) {
|
||||
const auto & rtInfo = node->get_rt_info();
|
||||
auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string {
|
||||
auto it = rtInfo.find(paramName);
|
||||
IE_ASSERT(rtInfo.end() != it);
|
||||
auto value = std::dynamic_pointer_cast<ngraph::VariantImpl<std::string>>(it->second);
|
||||
IE_ASSERT(nullptr != value);
|
||||
return value->get();
|
||||
};
|
||||
if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == nodeType) {
|
||||
actualNodeCount++;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(expectedCount, actualNodeCount) << "Unexpected count of the node type '" << nodeType << "' ";
|
||||
}
|
||||
std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams) {
|
||||
std::vector<CPUSpecificParams> resCPUParams;
|
||||
const int selectedTypeIndex = 3;
|
||||
|
||||
@@ -114,4 +114,5 @@ const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_av
|
||||
// utility functions
|
||||
std::vector<CPUSpecificParams> filterCPUSpecificParams(std::vector<CPUSpecificParams>& paramsVector);
|
||||
std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams);
|
||||
void CheckNodeOfTypeCount(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType, size_t expectedCount);
|
||||
} // namespace CPUTestUtils
|
||||
|
||||
Reference in New Issue
Block a user