[CPU] Generic JIT Eltwise implementation (#1464)
This commit is contained in:
parent
e3ed796b2e
commit
abb8817cf6
@ -9,7 +9,6 @@ if (WIN32)
|
||||
endif()
|
||||
|
||||
set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_activation_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_batchnorm_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_bin_conv_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_concat_node.cpp
|
||||
@ -17,7 +16,6 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_crop_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_deconv_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_def_conv_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_depthwise_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_eltwise_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fullyconnected_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_gemm_node.cpp
|
||||
@ -27,7 +25,6 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_permute_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_power_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_quantize_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reshape_node.cpp
|
||||
@ -94,7 +91,10 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/unique.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/unsqueeze.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/emitter.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/interp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_eltwise_emitters.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_mkldnn_emitters.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax_imp.cpp
|
||||
|
@ -150,19 +150,6 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::softmax_forward::desc>() {
|
||||
return typeDesc->getPtr();
|
||||
}
|
||||
|
||||
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::depthwise_forward::desc> desc) {
|
||||
this->desc.reset(new DescFwdImpl<mkldnn::depthwise_forward::desc>(desc));
|
||||
}
|
||||
|
||||
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::depthwise_forward::desc>() {
|
||||
DescFwdImpl<mkldnn::depthwise_forward::desc> *typeDesc =
|
||||
dynamic_cast<DescFwdImpl<mkldnn::depthwise_forward::desc> *>(desc.get());
|
||||
if (typeDesc == nullptr) {
|
||||
THROW_IE_EXCEPTION << "Cannot cast descriptor!";
|
||||
}
|
||||
return typeDesc->getPtr();
|
||||
}
|
||||
|
||||
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::rnn_forward::desc> desc) {
|
||||
this->desc.reset(new DescFwdImpl<mkldnn::rnn_forward::desc>(desc));
|
||||
}
|
||||
|
@ -37,9 +37,6 @@ public:
|
||||
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::softmax_forward::desc> desc);
|
||||
operator std::shared_ptr<mkldnn::softmax_forward::desc>();
|
||||
|
||||
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::depthwise_forward::desc> desc);
|
||||
operator std::shared_ptr<mkldnn::depthwise_forward::desc>();
|
||||
|
||||
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::rnn_forward::desc> desc);
|
||||
operator std::shared_ptr<mkldnn::rnn_forward::desc>();
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <cstring>
|
||||
#include <legacy/details/ie_cnn_network_tools.h>
|
||||
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
@ -57,18 +58,17 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
|
||||
#ifdef USE_CNNNETWORK_LPT
|
||||
auto params = LayerTransformation::Params(true, // updatePrecisions
|
||||
true, // quantizeOutputs
|
||||
true, // weightsToConst
|
||||
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
|
||||
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
|
||||
true, // roundQuantizedValues
|
||||
true, // updateBiases
|
||||
true); // supportAsymmetricQuantization
|
||||
true, // quantizeOutputs
|
||||
true, // weightsToConst
|
||||
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
|
||||
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
|
||||
true, // roundQuantizedValues
|
||||
true, // updateBiases
|
||||
true); // supportAsymmetricQuantization
|
||||
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
|
||||
add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
|
||||
addCleanup<ScaleShiftToConvolutionTransformation>(
|
||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
|
||||
"ScaleShift"));
|
||||
remove("ScaleShift").
|
||||
remove("Power"));
|
||||
transformer.transform(*_clonedNetwork);
|
||||
#endif
|
||||
|
||||
@ -102,6 +102,59 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
|
||||
|
||||
MKLDNNGraph::ApplyUnrollPasses(static_cast<ICNNNetwork&>(*_clonedNetwork));
|
||||
|
||||
auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, std::string name) {
|
||||
LayerParams attrs = {layer.get()->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
|
||||
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
|
||||
constLayer->blobs["custom"] = blob;
|
||||
|
||||
std::vector<size_t> constDims(layer->insData[0].lock()->getDims().size(), 1);
|
||||
if (constDims.size() > 1)
|
||||
constDims[1] = blob.get()->size();
|
||||
else
|
||||
constDims[0] = blob.get()->size();
|
||||
const TensorDesc& td = {blob->getTensorDesc().getPrecision(), constDims, TensorDesc::getLayoutByDims(constDims)};
|
||||
|
||||
DataPtr newEdgeAfterLayer(new Data(constLayer->name, td));
|
||||
newEdgeAfterLayer->setName(constLayer->name);
|
||||
getCreatorLayer(newEdgeAfterLayer) = constLayer;
|
||||
getInputTo(newEdgeAfterLayer).clear();
|
||||
|
||||
_clonedNetwork->addData(constLayer->name.c_str(), newEdgeAfterLayer);
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
_clonedNetwork->addLayer(constLayer);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
constLayer->outData.push_back(newEdgeAfterLayer);
|
||||
getInputTo(newEdgeAfterLayer)[layer->name] = layer;
|
||||
layer->insData.push_back(newEdgeAfterLayer);
|
||||
};
|
||||
|
||||
auto all_layers = details::CNNNetSortTopologically(*_clonedNetwork);
|
||||
for (auto &layer : all_layers) {
|
||||
if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
|
||||
Blob::Ptr scalesBlob = layer->blobs["weights"];
|
||||
if (scalesBlob != nullptr)
|
||||
createConstInputTo(layer, scalesBlob, "weights");
|
||||
|
||||
Blob::Ptr shiftBlob = layer->blobs["biases"];
|
||||
if (shiftBlob != nullptr) {
|
||||
createConstInputTo(layer, shiftBlob, "biases");
|
||||
} else if (scalesBlob != nullptr) {
|
||||
Blob::Ptr biases = make_shared_blob<float>(scalesBlob->getTensorDesc());
|
||||
biases->allocate();
|
||||
auto biasesPtr = biases->buffer().as<float*>();
|
||||
for (size_t i = 0; i < biases->size(); i++)
|
||||
biasesPtr[i] = 0;
|
||||
|
||||
createConstInputTo(layer, biases, "biases");
|
||||
}
|
||||
} else if (layer->type == "PReLU" && layer->insData.size() == 1) {
|
||||
Blob::Ptr scalesBlob = layer->blobs["weights"];
|
||||
if (scalesBlob != nullptr)
|
||||
createConstInputTo(layer, scalesBlob, "weights");
|
||||
}
|
||||
}
|
||||
|
||||
if (_cfg.batchLimit > 1) {
|
||||
// check topology for applicability
|
||||
if (!CanProcessDynBatch(*_clonedNetwork)) {
|
||||
@ -272,7 +325,6 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::ICNNNetwork &n
|
||||
type != SoftMax &&
|
||||
type != Split &&
|
||||
type != Concatenation &&
|
||||
type != Power &&
|
||||
type != Eltwise &&
|
||||
type != Crop &&
|
||||
type != BatchNormalization &&
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "mkldnn_graph.h"
|
||||
#include "nodes/mkldnn_eltwise_node.h"
|
||||
#include <vector>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
@ -18,18 +19,12 @@ public:
|
||||
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
|
||||
|
||||
private:
|
||||
void SLTMTransform(MKLDNNGraph& graph);
|
||||
void MergeConversions(MKLDNNGraph& graph);
|
||||
void MergeGroupConvolution(MKLDNNGraph& graph);
|
||||
void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
|
||||
void MergeSigmoidAndMultiplyToSwish(MKLDNNGraph& graph);
|
||||
#if defined(COMPILED_CPU_MKLDNN_ACTIVATION_NODE)
|
||||
void FuseConvolutionAndActivation(MKLDNNGraph &graph);
|
||||
void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
|
||||
#endif
|
||||
#if defined (COMPILED_CPU_MKLDNN_DEPTHWISE_NODE)
|
||||
void FuseConvolutionAndDepthwise(MKLDNNGraph &graph);
|
||||
#endif
|
||||
void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph);
|
||||
void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
|
||||
#if defined(COMPILED_CPU_MKLDNN_QUANTIZE_NODE)
|
||||
@ -59,6 +54,9 @@ private:
|
||||
void FuseClampAndQuantize(MKLDNNGraph &graph);
|
||||
|
||||
bool IsOneOf(Type type, std::vector<Type> types);
|
||||
bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
|
||||
|
||||
void removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge);
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -22,12 +22,9 @@
|
||||
#include <nodes/mkldnn_input_node.h>
|
||||
#include <nodes/mkldnn_lrn_node.h>
|
||||
#include <nodes/mkldnn_pooling_node.h>
|
||||
#include <nodes/mkldnn_power_node.h>
|
||||
#include <nodes/mkldnn_activation_node.h>
|
||||
#include <nodes/mkldnn_reorder_node.h>
|
||||
#include <nodes/mkldnn_reshape_node.h>
|
||||
#include <nodes/mkldnn_roi_pooling_node.h>
|
||||
#include <nodes/mkldnn_depthwise_node.h>
|
||||
#include <nodes/mkldnn_softmax_node.h>
|
||||
#include <nodes/mkldnn_tile_node.h>
|
||||
#include <nodes/mkldnn_split_node.h>
|
||||
@ -63,23 +60,23 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
|
||||
{ "Output", Output },
|
||||
{ "Reorder", Reorder },
|
||||
{ "Convolution", Convolution },
|
||||
{ "ReLU", Activation },
|
||||
{ "GELU", Activation },
|
||||
{ "ELU", Activation },
|
||||
{ "Sigmoid", Activation },
|
||||
{ "Logistic", Activation },
|
||||
{ "TanH", Activation },
|
||||
{ "ReLU6", Activation },
|
||||
{ "Exp", Activation },
|
||||
{ "Not", Activation },
|
||||
{ "Activation", Activation },
|
||||
{ "Clamp", Activation },
|
||||
{ "Swish", Activation },
|
||||
{ "HSwish", Activation },
|
||||
{ "Mish", Activation },
|
||||
{ "HSigmoid", Activation },
|
||||
{ "ScaleShift", Depthwise },
|
||||
{ "PReLU", Depthwise },
|
||||
{ "ReLU", Eltwise },
|
||||
{ "GELU", Eltwise },
|
||||
{ "ELU", Eltwise },
|
||||
{ "Sigmoid", Eltwise },
|
||||
{ "Logistic", Eltwise },
|
||||
{ "TanH", Eltwise },
|
||||
{ "ReLU6", Eltwise },
|
||||
{ "Exp", Eltwise },
|
||||
{ "Not", Eltwise },
|
||||
{ "Activation", Eltwise },
|
||||
{ "Clamp", Eltwise },
|
||||
{ "Swish", Eltwise },
|
||||
{ "HSwish", Eltwise },
|
||||
{ "Mish", Eltwise },
|
||||
{ "HSigmoid", Eltwise },
|
||||
{ "ScaleShift", Eltwise },
|
||||
{ "PReLU", Eltwise },
|
||||
{ "Norm", Lrn },
|
||||
{ "LRN", Lrn },
|
||||
{ "Pooling", Pooling },
|
||||
@ -91,9 +88,10 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
|
||||
{ "Split", Split },
|
||||
{ "Slice", Split },
|
||||
{ "Concat", Concatenation },
|
||||
{ "Power", Power },
|
||||
{ "Deconvolution", Deconvolution },
|
||||
{ "Eltwise", Eltwise },
|
||||
{ "Mod", Eltwise },
|
||||
{ "Power", Eltwise },
|
||||
{ "Crop", Crop },
|
||||
{ "Reshape", Reshape },
|
||||
{ "Tile", Tile },
|
||||
|
@ -44,7 +44,6 @@ enum Type {
|
||||
SoftMax,
|
||||
Split,
|
||||
Concatenation,
|
||||
Power,
|
||||
Eltwise,
|
||||
Gemm,
|
||||
Crop,
|
||||
@ -118,8 +117,6 @@ static std::string NameFromType(Type type) {
|
||||
return "Split";
|
||||
case Concatenation:
|
||||
return "Concatenation";
|
||||
case Power:
|
||||
return "Power";
|
||||
case Depthwise:
|
||||
return "Depthwise";
|
||||
case Crop:
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include <transformations/op_conversions/softplus_decomposition.hpp>
|
||||
#include <transformations/op_conversions/convert_space_to_batch.hpp>
|
||||
#include <transformations/op_conversions/convert_batch_to_space.hpp>
|
||||
#include <transformations/op_conversions/convert_mod.hpp>
|
||||
#include <transformations/convert_precision.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/rt_info/fused_names_attribute.hpp>
|
||||
@ -145,6 +146,7 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf)
|
||||
pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
|
||||
pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
|
||||
pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
|
||||
pass_config->disable<ngraph::pass::ConvertMod>();
|
||||
|
||||
pass_config->enable<ngraph::pass::ConvertPadToGroupConvolution>();
|
||||
|
||||
|
200
inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp
Normal file
200
inference-engine/src/mkldnn_plugin/nodes/common/emitter.cpp
Normal file
@ -0,0 +1,200 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "emitter.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace mkldnn::impl::cpu;
|
||||
using namespace mkldnn::impl;
|
||||
using namespace Xbyak;
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
template <typename T, typename P>
|
||||
constexpr bool one_of(T val, P item) { return val == item; }
|
||||
|
||||
template <typename T, typename P, typename... Args>
|
||||
constexpr bool one_of(T val, P item, Args... item_others) {
|
||||
return val == item || one_of(val, item_others...);
|
||||
}
|
||||
|
||||
|
||||
size_t jit_emitter::get_max_vecs_count() const {
|
||||
return one_of(host_isa_, cpu::avx512_common, cpu::avx512_core) ? 32 : 16;
|
||||
}
|
||||
|
||||
size_t jit_emitter::get_vec_length() const {
|
||||
return one_of(host_isa_, cpu::avx512_common, cpu::avx512_core) ? 64 :
|
||||
one_of(host_isa_, cpu::avx2) ? 32 : 16;
|
||||
}
|
||||
|
||||
void jit_emitter::push_vec(const Xbyak::Address &addr, size_t vec_idx) const {
|
||||
if (host_isa_ == cpu::sse42) {
|
||||
h->uni_vmovups(addr, Xmm(vec_idx));
|
||||
} else if (host_isa_ == cpu::avx2) {
|
||||
h->uni_vmovups(addr, Ymm(vec_idx));
|
||||
} else {
|
||||
h->uni_vmovups(addr, Zmm(vec_idx));
|
||||
}
|
||||
}
|
||||
|
||||
void jit_emitter::pop_vec(size_t vec_idx, const Xbyak::Address &addr) const {
|
||||
if (host_isa_ == cpu::sse42) {
|
||||
h->uni_vmovups(Xmm(vec_idx), addr);
|
||||
} else if (host_isa_ == cpu::avx2) {
|
||||
h->uni_vmovups(Ymm(vec_idx), addr);
|
||||
} else {
|
||||
h->uni_vmovups(Zmm(vec_idx), addr);
|
||||
}
|
||||
}
|
||||
|
||||
size_t jit_emitter::aux_vecs_count() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t jit_emitter::aux_gprs_count() const {
|
||||
// We need one gpr to load table address
|
||||
return entry_map_.empty() ? 0 : 1;
|
||||
}
|
||||
|
||||
std::set<InferenceEngine::Precision> jit_emitter::get_supported_precisions() {
|
||||
return {InferenceEngine::Precision::FP32};
|
||||
}
|
||||
|
||||
void jit_emitter::emitter_preamble(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &pool_vec_idxs,
|
||||
const std::vector<size_t> &pool_gpr_idxs) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
for (auto idx : pool_vec_idxs)
|
||||
aux_vec_idxs.push_back(idx);
|
||||
|
||||
// For sse42 mask register has to be Xmm(0)
|
||||
if (host_isa_ == cpu::sse42 && aux_vecs_count() > 0) {
|
||||
size_t idx = 0;
|
||||
assert(std::find(in_vec_idxs.begin(), in_vec_idxs.end(), idx) == in_vec_idxs.end());
|
||||
if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) == aux_vec_idxs.end()) {
|
||||
aux_vec_idxs.push_back(idx);
|
||||
preserved_vec_idxs.push_back(idx);
|
||||
}
|
||||
|
||||
// moving mask vector at the beginning of aux vectors list to simplify further processing
|
||||
for (int i = 0; i < aux_vec_idxs.size(); i++) {
|
||||
if (aux_vec_idxs[i] == 0) {
|
||||
size_t tmp = aux_vec_idxs[0];
|
||||
aux_vec_idxs[0] = aux_vec_idxs[i];
|
||||
aux_vec_idxs[i] = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t idx = 0; idx < get_max_vecs_count(); idx++) {
|
||||
if (aux_vec_idxs.size() >= aux_vecs_count()) break;
|
||||
|
||||
if (std::find(in_vec_idxs.begin(), in_vec_idxs.end(), idx) != in_vec_idxs.end()) continue;
|
||||
if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) continue;
|
||||
if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) continue;
|
||||
|
||||
aux_vec_idxs.push_back(idx);
|
||||
preserved_vec_idxs.push_back(idx);
|
||||
}
|
||||
assert(aux_vec_idxs.size() >= aux_vecs_count());
|
||||
|
||||
// Same logic but to allocate gprs
|
||||
for (auto idx : pool_gpr_idxs)
|
||||
aux_gpr_idxs.push_back(idx);
|
||||
|
||||
for (size_t gpr_idx = 0; gpr_idx <= Operand::R15; ++gpr_idx) {
|
||||
size_t _idx = Operand::R15 - gpr_idx; // we allocate from the end
|
||||
|
||||
if (aux_gpr_idxs.size() >= aux_gprs_count()) break;
|
||||
if (_idx == Operand::RSP) continue;
|
||||
if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) continue;
|
||||
if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) continue;
|
||||
|
||||
aux_gpr_idxs.push_back(_idx);
|
||||
preserved_gpr_idxs.push_back(_idx);
|
||||
}
|
||||
assert(aux_gpr_idxs.size() == aux_gprs_count());
|
||||
|
||||
if (!entry_map_.empty()) {
|
||||
p_table = Reg64(aux_gpr_idxs[0]);
|
||||
aux_gpr_idxs.erase(aux_gpr_idxs.begin());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < preserved_gpr_idxs.size(); ++i)
|
||||
h->push(Reg64(preserved_gpr_idxs[i]));
|
||||
|
||||
if (preserved_vec_idxs.size())
|
||||
h->sub(h->rsp, preserved_vec_idxs.size() * get_vec_length());
|
||||
|
||||
for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) {
|
||||
push_vec(h->ptr[h->rsp + i * get_vec_length()], preserved_vec_idxs[i]);
|
||||
}
|
||||
|
||||
if (!entry_map_.empty())
|
||||
load_table_addr();
|
||||
}
|
||||
|
||||
|
||||
void jit_emitter::emitter_postamble() {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
for (size_t i = 0; i < preserved_vec_idxs.size(); ++i)
|
||||
pop_vec(preserved_vec_idxs[i], h->ptr[h->rsp + i * get_vec_length()]);
|
||||
|
||||
if (preserved_vec_idxs.size())
|
||||
h->add(h->rsp, preserved_vec_idxs.size() * get_vec_length());
|
||||
|
||||
for (int i = aux_gprs_count() - 1; i >= 0; --i)
|
||||
h->pop(Reg64(preserved_gpr_idxs[i]));
|
||||
|
||||
preserved_vec_idxs.clear();
|
||||
preserved_gpr_idxs.clear();
|
||||
|
||||
aux_vec_idxs.clear();
|
||||
aux_gpr_idxs.clear();
|
||||
}
|
||||
|
||||
void jit_emitter::emit_table() {
|
||||
h->align(64);
|
||||
h->L(l_table);
|
||||
|
||||
// Assumption: entries can be inserted with dd, so they should be 4 bytes.
|
||||
assert(sizeof(table_entry_val_t) == 4);
|
||||
|
||||
// Run through the map and insert values stored there
|
||||
for (auto it = entry_map_.begin(); it != entry_map_.end(); it++) {
|
||||
const auto &te = (*it).second; // get map entry for a given key
|
||||
const auto len = te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
|
||||
for (size_t d = 0; d < len; d += sizeof(table_entry_val_t))
|
||||
h->dd(te.val);
|
||||
}
|
||||
}
|
||||
|
||||
void jit_emitter::prepare_table() {
|
||||
register_table_entries();
|
||||
|
||||
// Now that we registered the entries, we set the offsets. No
|
||||
// entries should be registered after this point. This allows to
|
||||
// expect the same order when injecting the table entries in
|
||||
// prepare_table.
|
||||
size_t off = 0;
|
||||
for (auto it = entry_map_.begin(); it != entry_map_.end(); it++) {
|
||||
auto &te = (*it).second;
|
||||
te.off = off;
|
||||
off += te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
|
||||
}
|
||||
}
|
||||
|
||||
void jit_emitter::emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) {
|
||||
emitter_preamble(in_vec_idxs, pool_vec_idxs, pool_gpr_idxs);
|
||||
|
||||
emit_impl(in_vec_idxs, out_vec_idxs, pool_vec_idxs, pool_gpr_idxs);
|
||||
|
||||
emitter_postamble();
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
128
inference-engine/src/mkldnn_plugin/nodes/common/emitter.h
Normal file
128
inference-engine/src/mkldnn_plugin/nodes/common/emitter.h
Normal file
@ -0,0 +1,128 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include "jit_generator.hpp"
|
||||
#include "mkldnn_node.h"
|
||||
#include <set>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class jit_emitter {
|
||||
public:
|
||||
jit_emitter(mkldnn::impl::cpu::jit_generator* host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32)
|
||||
: h(host), host_isa_(host_isa), n(node), exec_prc_(exec_prc) {
|
||||
k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well
|
||||
}
|
||||
|
||||
virtual void emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs = {}, const std::vector<size_t> &pool_gpr_idxs = {});
|
||||
virtual void emit_table();
|
||||
virtual size_t get_inputs_num() = 0;
|
||||
virtual size_t aux_vecs_count() const;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
protected:
|
||||
virtual size_t aux_gprs_count() const;
|
||||
|
||||
size_t get_max_vecs_count() const;
|
||||
size_t get_vec_length() const;
|
||||
|
||||
const MKLDNNNode& n;
|
||||
mkldnn::impl::cpu::jit_generator* h;
|
||||
mkldnn::impl::cpu::cpu_isa_t host_isa_;
|
||||
InferenceEngine::Precision exec_prc_;
|
||||
|
||||
Xbyak::Opmask k_mask;
|
||||
|
||||
virtual void prepare_table();
|
||||
virtual void register_table_entries() {}
|
||||
|
||||
void load_table_addr() { h->mov(p_table, l_table); }
|
||||
|
||||
// we accept only 32bit hexadecimal table values to avoid any rounding
|
||||
using table_entry_val_t = uint32_t;
|
||||
using table_entry_offset_t = size_t; // offsets are in bytes wrt p_table
|
||||
using table_entry_bcast_t = bool; // true => bcast value
|
||||
|
||||
struct table_entry_t {
|
||||
table_entry_val_t val;
|
||||
table_entry_bcast_t bcast;
|
||||
};
|
||||
struct mapped_table_entry_t {
|
||||
table_entry_offset_t off;
|
||||
table_entry_val_t val;
|
||||
table_entry_bcast_t bcast;
|
||||
};
|
||||
|
||||
Xbyak::Reg64 p_table;
|
||||
Xbyak::Label l_table;
|
||||
|
||||
enum {
|
||||
_cmp_eq_oq = mkldnn::impl::cpu::jit_generator::_cmp_eq_oq,
|
||||
_cmp_neq_uq = mkldnn::impl::cpu::jit_generator::_cmp_neq_uq,
|
||||
_cmp_lt_os = mkldnn::impl::cpu::jit_generator::_cmp_lt_os,
|
||||
_cmp_le_os = mkldnn::impl::cpu::jit_generator::_cmp_le_os,
|
||||
_cmp_ge_os = mkldnn::impl::cpu::jit_generator::_cmp_nlt_us,
|
||||
_cmp_gt_os = mkldnn::impl::cpu::jit_generator::_cmp_nle_us,
|
||||
};
|
||||
|
||||
virtual void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) {}
|
||||
|
||||
virtual void emitter_preamble(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &pool_vec_idxs,
|
||||
const std::vector<size_t> &pool_gpr_idxs);
|
||||
virtual void emitter_postamble();
|
||||
|
||||
std::vector<size_t> aux_vec_idxs;
|
||||
std::vector<size_t> aux_gpr_idxs;
|
||||
|
||||
static constexpr int k_mask_size = 8;
|
||||
|
||||
Xbyak::Address table_val(std::string key, size_t key_off_val_shift = 0) const {
|
||||
auto off = table_off(key, key_off_val_shift);
|
||||
return h->ptr[p_table + off];
|
||||
}
|
||||
|
||||
using table_t = std::multimap<std::string, table_entry_t>;
|
||||
using mapped_table_t = std::multimap<std::string, mapped_table_entry_t>;
|
||||
|
||||
mapped_table_t entry_map_;
|
||||
|
||||
void push_arg_entry_of(const std::string key, const table_entry_val_t val, const bool broadcast) {
|
||||
mapped_table_entry_t te {0, val, broadcast};
|
||||
entry_map_.insert(std::make_pair(key, te));
|
||||
}
|
||||
|
||||
void push_entries_of(const table_t &t) {
|
||||
for (auto it = t.begin(); it != t.end(); it++) {
|
||||
auto key = (*it).first;
|
||||
auto te = (*it).second; // copy values from table
|
||||
push_arg_entry_of(key, te.val, te.bcast);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<size_t> preserved_vec_idxs;
|
||||
std::vector<size_t> preserved_gpr_idxs;
|
||||
|
||||
void push_vec(const Xbyak::Address &addr, size_t vec_idx) const;
|
||||
void pop_vec(size_t vec_idx, const Xbyak::Address &addr) const;
|
||||
|
||||
size_t table_off(std::string& key, size_t key_off_val_shift = 0) const {
|
||||
// assumption: all table entries sharing the same key also
|
||||
// share their broadcast property
|
||||
// TODO: enforce through data structure
|
||||
const auto it = entry_map_.find(key); // search an entry for a key
|
||||
assert(it != entry_map_.end());
|
||||
const auto &te = (*it).second;
|
||||
const auto scale = te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
|
||||
return te.off + key_off_val_shift * scale;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
1417
inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp
Normal file
1417
inference-engine/src/mkldnn_plugin/nodes/jit_eltwise_emitters.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,417 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/emitter.h"
|
||||
#include "jit_generator.hpp"
|
||||
#include "mkldnn_node.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class jit_add_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_add_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
class jit_mul_add_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_mul_add_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_subtract_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_subtract_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_multiply_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_multiply_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_divide_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_divide_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_floor_mod_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_floor_mod_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_mod_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_mod_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_maximum_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_maximum_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_minimum_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_minimum_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
static std::set<InferenceEngine::Precision> get_supported_precisions();
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_squared_difference_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_squared_difference_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_power_dynamic_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_power_dynamic_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
};
|
||||
|
||||
|
||||
class jit_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_not_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_not_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_greater_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_greater_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_greater_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_greater_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_less_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_less_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_less_equal_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_less_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_logical_and_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_and_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_logical_or_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_or_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
|
||||
class jit_logical_xor_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_xor_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
class jit_logical_not_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_logical_not_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
class jit_power_static_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_power_static_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
void register_table_entries() override;
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
class jit_prelu_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_prelu_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
private:
|
||||
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
template <mkldnn::impl::cpu::cpu_isa_t isa>
|
||||
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
|
||||
|
||||
size_t aux_vecs_count() const override;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -0,0 +1,70 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "common/emitter.h"
|
||||
#include "jit_mkldnn_emitters.hpp"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include "legacy/ie_layers.h"
|
||||
|
||||
using namespace mkldnn::impl::utils;
|
||||
using namespace mkldnn::impl::cpu;
|
||||
using namespace Xbyak;
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode& node, InferenceEngine::Precision exec_prc)
|
||||
: jit_emitter(host, host_isa, node, exec_prc) {
|
||||
auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(n);
|
||||
|
||||
auto alg = static_cast<mkldnn_alg_kind_t>(eltwiseNode.getAlgorithm());
|
||||
|
||||
if (host_isa_ == cpu::sse42) {
|
||||
eltwise_injector_sse42 = std::make_shared<jit_uni_eltwise_injector_f32<cpu::sse42>>(
|
||||
host, alg, eltwiseNode.getAlpha(), eltwiseNode.getBeta());
|
||||
} else if (host_isa_ == cpu::avx2) {
|
||||
eltwise_injector_avx2 = std::make_shared<jit_uni_eltwise_injector_f32<cpu::avx2>>(
|
||||
host, alg, eltwiseNode.getAlpha(), eltwiseNode.getBeta());
|
||||
} else if (host_isa_ == cpu::avx512_common) {
|
||||
eltwise_injector_avx512_common = std::make_shared<jit_uni_eltwise_injector_f32<cpu::avx512_common>>(
|
||||
host, alg, eltwiseNode.getAlpha(), eltwiseNode.getBeta());
|
||||
} else {
|
||||
assert(!"unsupported isa");
|
||||
}
|
||||
}
|
||||
|
||||
size_t jit_mkldnn_emitter::get_inputs_num() { return 1; }
|
||||
|
||||
void jit_mkldnn_emitter::emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) {
|
||||
if (host_isa_ == cpu::sse42) {
|
||||
if (out_vec_idxs[0] != in_vec_idxs[0])
|
||||
h->uni_vmovups(Xmm(out_vec_idxs[0]), Xmm(in_vec_idxs[0]));
|
||||
eltwise_injector_sse42->compute_vector(out_vec_idxs[0]);
|
||||
} else if (host_isa_ == cpu::avx2) {
|
||||
if (out_vec_idxs[0] != in_vec_idxs[0])
|
||||
h->uni_vmovups(Ymm(out_vec_idxs[0]), Ymm(in_vec_idxs[0]));
|
||||
eltwise_injector_avx2->compute_vector(out_vec_idxs[0]);
|
||||
} else if (host_isa_ == cpu::avx512_common) {
|
||||
if (out_vec_idxs[0] != in_vec_idxs[0])
|
||||
h->uni_vmovups(Zmm(out_vec_idxs[0]), Zmm(in_vec_idxs[0]));
|
||||
eltwise_injector_avx512_common->compute_vector(out_vec_idxs[0]);
|
||||
} else {
|
||||
assert(!"unsupported isa");
|
||||
}
|
||||
}
|
||||
|
||||
void jit_mkldnn_emitter::emit_table() {
|
||||
if (host_isa_ == cpu::sse42) {
|
||||
eltwise_injector_sse42->prepare_table();
|
||||
} else if (host_isa_ == cpu::avx2) {
|
||||
eltwise_injector_avx2->prepare_table();
|
||||
} else if (host_isa_ == cpu::avx512_common) {
|
||||
eltwise_injector_avx512_common->prepare_table();
|
||||
} else {
|
||||
assert(!"unsupported isa");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/emitter.h"
|
||||
#include "jit_generator.hpp"
|
||||
#include "mkldnn_node.h"
|
||||
#include "jit_uni_eltwise.hpp"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class jit_mkldnn_emitter : public jit_emitter {
|
||||
public:
|
||||
jit_mkldnn_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
|
||||
|
||||
size_t get_inputs_num() override;
|
||||
|
||||
void emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
|
||||
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
|
||||
|
||||
void emit_table() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<mkldnn::impl::cpu::jit_uni_eltwise_injector_f32<mkldnn::impl::cpu::sse42>> eltwise_injector_sse42;
|
||||
std::shared_ptr<mkldnn::impl::cpu::jit_uni_eltwise_injector_f32<mkldnn::impl::cpu::avx2>> eltwise_injector_avx2;
|
||||
std::shared_ptr<mkldnn::impl::cpu::jit_uni_eltwise_injector_f32<mkldnn::impl::cpu::avx512_common>> eltwise_injector_avx512_common;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -1,252 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
// TODO: (ichuraev) I don't fully sure that names of types and parameters are correct for square, abs, sqrt, linear, bounded_relu and soft_relu
|
||||
caseless_map<std::string, std::function<void(GenericLayer*, mkldnn::algorithm&, float&, float&)>> MKLDNNActivationNode::initializers = {
|
||||
{"relu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("negative_slope", 0.0f);
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_relu;
|
||||
}},
|
||||
{"gelu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_gelu;
|
||||
}},
|
||||
{"elu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_elu;
|
||||
}},
|
||||
{"tanh", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_tanh;
|
||||
}},
|
||||
{"logistic", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_logistic;
|
||||
}},
|
||||
{"square", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_square;
|
||||
}},
|
||||
{"abs", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_abs;
|
||||
}},
|
||||
{"sqrt", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_sqrt;
|
||||
}},
|
||||
{"linear", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
|
||||
beta = activationLayer->GetParamAsFloat("beta", 0.0f);
|
||||
algorithm = eltwise_linear;
|
||||
}},
|
||||
{"bounded_relu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("alpha", 0.0f);
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_bounded_relu;
|
||||
}},
|
||||
{"soft_relu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_soft_relu;
|
||||
}},
|
||||
{"relu6", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("n", 6.0f);
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_bounded_relu;
|
||||
}},
|
||||
{"clamp", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("max", 1.0f);
|
||||
beta = activationLayer->GetParamAsFloat("min", 0.0f);
|
||||
algorithm = eltwise_clamp;
|
||||
}},
|
||||
{"exp", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_exp;
|
||||
}},
|
||||
{"not", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_not;
|
||||
}},
|
||||
{"swish", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_swish;
|
||||
}},
|
||||
{"hswish", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_hswish;
|
||||
}},
|
||||
{"mish", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_mish;
|
||||
}},
|
||||
{"hsigmoid", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
|
||||
alpha = 0.0f;
|
||||
beta = 0.0f;
|
||||
algorithm = eltwise_hsigmoid;
|
||||
}},
|
||||
};
|
||||
|
||||
MKLDNNActivationNode::MKLDNNActivationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
|
||||
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(layer, eng, cache) {
|
||||
GenericLayer* activationLayer = getCnnLayer().get();
|
||||
if (activationLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get CNNLayer.";
|
||||
|
||||
std::string type = activationLayer->type;
|
||||
CaselessEq<std::string> comparator;
|
||||
if (comparator(type, "activation"))
|
||||
type = activationLayer->GetParamAsString("type");
|
||||
if (comparator(type, "sigmoid"))
|
||||
type = "logistic";
|
||||
|
||||
if (initializers.find(type) != initializers.end())
|
||||
initializers[type](activationLayer, algorithm, alpha, beta);
|
||||
}
|
||||
|
||||
void MKLDNNActivationNode::getSupportedDescriptors() {
|
||||
if (!descs.empty())
|
||||
return;
|
||||
|
||||
if (getParentEdges().size() != 1)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
|
||||
if (!getChildEdges().size())
|
||||
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
|
||||
|
||||
auto parentOutDims = getParentEdgeAt(0)->getDims();
|
||||
|
||||
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
|
||||
// FIXME: MKLDNN doesn't support not inputs with number of dimensions less than 4 for activation
|
||||
while (parentOutDims.ndims() < 4)
|
||||
parentOutDims.push_back(1);
|
||||
for (auto format : getAvailableFormatsForDims(parentOutDims)) {
|
||||
MKLDNNMemoryDesc in_candidate(parentOutDims, MKLDNNExtensionUtils::IEPrecisionToDataType(precision), format);
|
||||
createDescriptor({in_candidate}, {});
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNActivationNode::createPrimitive() {
|
||||
if (prim)
|
||||
return;
|
||||
|
||||
auto prim_desc = createPrimitiveDescriptor<eltwise_forward::primitive_desc, eltwise_forward::desc>();
|
||||
|
||||
prim.reset(new eltwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
|
||||
getChildEdgeAt(0)->getMemory().GetPrimitive()));
|
||||
}
|
||||
|
||||
bool MKLDNNActivationNode::created() const {
|
||||
return getType() == Activation;
|
||||
}
|
||||
|
||||
void MKLDNNActivationNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
|
||||
const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
|
||||
MKLDNNMemoryDesc inDesc(inputDesc[0]);
|
||||
MKLDNNDescriptor desc(std::shared_ptr<eltwise_forward::desc>(
|
||||
new eltwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), inDesc, getAlpha(), getBeta())));
|
||||
descs.push_back(desc);
|
||||
}
|
||||
|
||||
void MKLDNNActivationNode::initOptimalPrimitiveDescriptor() {
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
if (isInitConfig(config))
|
||||
return;
|
||||
|
||||
if (config.inConfs.size() != 1 || config.outConfs.size() != 1 ||
|
||||
(!isUninitTensorDesc(config.inConfs[0].desc) &&
|
||||
!isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
|
||||
THROW_IE_EXCEPTION << "Layer " << getName() << " has incorrect selected config!";
|
||||
|
||||
if (!isUninitTensorDesc(config.inConfs[0].desc)) {
|
||||
config.outConfs[0].desc = config.inConfs[0].desc;
|
||||
} else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
|
||||
config.inConfs[0].desc = config.outConfs[0].desc;
|
||||
} else {
|
||||
config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
|
||||
}
|
||||
|
||||
initDescriptor(config);
|
||||
}
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNActivationNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_primitive_desc(idx).desc());
|
||||
|
||||
auto parentOutDims = getParentEdgeAt(idx)->getDims().ToSizeVector();
|
||||
|
||||
SizeVector blocked_dims, order, dimOffsets, strides;
|
||||
size_t offset = desc.getBlockingDesc().getOffsetPadding();
|
||||
|
||||
for (size_t i = 0; i < desc.getBlockingDesc().getStrides().size(); i++) {
|
||||
if (desc.getBlockingDesc().getOrder()[i] >= parentOutDims.size())
|
||||
continue;
|
||||
|
||||
blocked_dims.push_back(desc.getBlockingDesc().getBlockDims()[i]);
|
||||
order.push_back(desc.getBlockingDesc().getOrder()[i]);
|
||||
dimOffsets.push_back(desc.getBlockingDesc().getOffsetPaddingToData()[i]);
|
||||
strides.push_back(desc.getBlockingDesc().getStrides()[i]);
|
||||
}
|
||||
if (desc.getLayout() == InferenceEngine::Layout::ANY)
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
parentOutDims,
|
||||
desc.getLayout()));
|
||||
else
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
parentOutDims,
|
||||
{blocked_dims, order, offset, dimOffsets, strides}));
|
||||
}
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNActivationNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_primitive_desc(idx).desc());
|
||||
|
||||
auto childInDims = getChildEdgeAt(idx)->getDims().ToSizeVector();
|
||||
|
||||
SizeVector blocked_dims, order, dimOffsets, strides;
|
||||
size_t offset = desc.getBlockingDesc().getOffsetPadding();
|
||||
|
||||
for (size_t i = 0; i < desc.getBlockingDesc().getStrides().size(); i++) {
|
||||
if (desc.getBlockingDesc().getOrder()[i] >= childInDims.size())
|
||||
continue;
|
||||
|
||||
blocked_dims.push_back(desc.getBlockingDesc().getBlockDims()[i]);
|
||||
order.push_back(desc.getBlockingDesc().getOrder()[i]);
|
||||
dimOffsets.push_back(desc.getBlockingDesc().getOffsetPaddingToData()[i]);
|
||||
strides.push_back(desc.getBlockingDesc().getStrides()[i]);
|
||||
}
|
||||
if (desc.getLayout() == InferenceEngine::Layout::ANY)
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
childInDims,
|
||||
desc.getLayout()));
|
||||
else
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
childInDims,
|
||||
{blocked_dims, order, offset, dimOffsets, strides}));
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNActivationNode, Activation);
|
@ -1,44 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <mkldnn_node.h>
|
||||
#include "caseless.hpp"
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNActivationNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNActivationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
~MKLDNNActivationNode() override = default;
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initOptimalPrimitiveDescriptor() override;
|
||||
void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
|
||||
const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
|
||||
void createPrimitive() override;
|
||||
bool created() const override;
|
||||
|
||||
mkldnn::algorithm getAlgorithm() const { return algorithm; }
|
||||
float getAlpha() const { return alpha; }
|
||||
float getBeta() const { return beta; }
|
||||
|
||||
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
|
||||
MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
|
||||
|
||||
private:
|
||||
float alpha = 0.0f;
|
||||
float beta = 0.0f;
|
||||
static InferenceEngine::details::caseless_map<std::string,
|
||||
std::function<void(InferenceEngine::GenericLayer*, mkldnn::algorithm&, float&, float&)>> initializers;
|
||||
mkldnn::algorithm algorithm = mkldnn::algorithm::eltwise_relu;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
#include "mkldnn_batchnorm_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
|
@ -25,7 +25,7 @@ public:
|
||||
const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
|
||||
void createPrimitive() override;
|
||||
bool created() const override;
|
||||
bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Depthwise
|
||||
bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise
|
||||
&& fusedWith[0]->getCnnLayer()->type == "ScaleShift";}
|
||||
|
||||
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
|
||||
|
@ -5,10 +5,8 @@
|
||||
#include "mkldnn_bin_conv_node.h"
|
||||
#include "mkldnn_reorder_node.h"
|
||||
#include "mkldnn_input_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_conv_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
@ -116,7 +114,6 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
|
||||
paddingR[i] = (dst - calc_dst) * stride[i];
|
||||
}
|
||||
|
||||
withSum = isFusedWith(Eltwise);
|
||||
withDWConv = isFusedWith(Convolution);
|
||||
withBinarization = isFusedWith(Quantize);
|
||||
for (auto &node : fusedWith) {
|
||||
@ -138,12 +135,19 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
|
||||
#endif
|
||||
}
|
||||
|
||||
int expectedInputEdgesNum = baseInputsNumber + isFusedWith(Eltwise);
|
||||
withSum = false;
|
||||
int expectedInputEdgesNum = baseInputsNumber;
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
|
||||
if (convolutionNode) {
|
||||
expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
|
||||
}
|
||||
|
||||
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
|
||||
if (eltwiseNode && eltwiseNode->isSum()) {
|
||||
withSum = true;
|
||||
expectedInputEdgesNum++;
|
||||
}
|
||||
}
|
||||
|
||||
if (getParentEdges().size() != expectedInputEdgesNum)
|
||||
@ -164,88 +168,13 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool
|
||||
for (auto &node : fusedWith) {
|
||||
#if defined (COMPILED_CPU_MKLDNN_ELTWISE_NODE)
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode && eltwiseNode->isSum()) {
|
||||
ops.append_sum(1.0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eltwiseNode) {
|
||||
if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
|
||||
auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
|
||||
if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
|
||||
// currently there is the only one scale while we need scale by channel :(
|
||||
ops.append_sum(it->second->buffer().as<float*>()[0]);
|
||||
}
|
||||
} else {
|
||||
ops.append_sum(1.0);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(COMPILED_CPU_MKLDNN_ACTIVATION_NODE)
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
|
||||
activationNode->getBeta());
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (COMPILED_CPU_MKLDNN_DEPTHWISE_NODE)
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
|
||||
if (initWeights) {
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
|
||||
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx]->FillZero();
|
||||
|
||||
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_weights->buffer(),
|
||||
depthwiseLayer->_weights->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
|
||||
memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_biases->buffer(),
|
||||
depthwiseLayer->_biases->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
nullptr);
|
||||
|
||||
blob_idx += 1;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "mkldnn_conv_node.h"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_pooling_node.h"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include <limits>
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
@ -93,12 +94,9 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
|
||||
MKLDNNDims dstDims = getChildEdgeAt(0)->getDims();
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = true;
|
||||
bool hasEltwise = false;
|
||||
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
auto parentEdge = getParentEdgeAt(i);
|
||||
if (parentEdge->getParent()->getType() == Eltwise)
|
||||
hasEltwise = true;
|
||||
|
||||
InferenceEngine::DataConfig dataConfig;
|
||||
dataConfig.inPlace = -1;
|
||||
@ -117,7 +115,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
|
||||
config.outConfs.resize(1);
|
||||
config.outConfs[0].inPlace = -1;
|
||||
config.outConfs[0].constant = false;
|
||||
if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1 || hasEltwise) {
|
||||
if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1) {
|
||||
auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? dims.ndims() == 2 ? memory::format::nc :
|
||||
dims.ndims() == 4 ? memory::format::nhwc :
|
||||
memory::format::ndhwc
|
||||
@ -155,7 +153,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
if (axis != 1 || hasEltwise)
|
||||
if (axis != 1)
|
||||
return;
|
||||
|
||||
auto numOfDim = static_cast<size_t>(dstDims.ndims());
|
||||
|
@ -5,10 +5,8 @@
|
||||
#include "mkldnn_conv_node.h"
|
||||
#include "mkldnn_reorder_node.h"
|
||||
#include "mkldnn_input_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_pooling_node.h"
|
||||
#include "mkldnn_concat_node.h"
|
||||
@ -110,6 +108,21 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
if (convLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
|
||||
|
||||
withSum = false;
|
||||
int expectedInputEdgesNum = baseInputsNumber;
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
|
||||
if (convolutionNode) {
|
||||
expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
|
||||
}
|
||||
|
||||
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
|
||||
if (eltwiseNode && eltwiseNode->isSum()) {
|
||||
withSum = true;
|
||||
expectedInputEdgesNum++;
|
||||
}
|
||||
}
|
||||
|
||||
auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision());
|
||||
if (!inputZeroPoints.empty())
|
||||
inputDataType = memory::u8;
|
||||
@ -127,10 +140,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
|
||||
// We need to make sure that convolution output and second input of fused Eltwise operation
|
||||
// have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
|
||||
if (outputDataType != memory::f32 && outputDataType != memory::bf16 && isFusedWith(Eltwise)) {
|
||||
if (outputDataType != memory::f32 && outputDataType != memory::bf16 && withSum) {
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
|
||||
if (eltwiseNode) {
|
||||
if (eltwiseNode && eltwiseNode->isSum()) {
|
||||
eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
|
||||
if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
|
||||
eltwisePrecision = Precision::FP32;
|
||||
@ -142,14 +155,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
int expectedInputEdgesNum = baseInputsNumber + isFusedWith(Eltwise);
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
|
||||
if (convolutionNode) {
|
||||
expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (getParentEdges().size() != expectedInputEdgesNum)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
|
||||
if (getChildEdges().empty())
|
||||
@ -232,7 +237,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
|
||||
MKLDNNDims weightsDims = MKLDNNDims(weightDims);
|
||||
|
||||
withSum = isFusedWith(Eltwise);
|
||||
withDWConv = isFusedWith(Convolution);
|
||||
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
@ -287,7 +291,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
eltwisePrecision = Precision::FP32;
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
|
||||
if (eltwiseNode) {
|
||||
if (eltwiseNode && eltwiseNode->isSum()) {
|
||||
eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
|
||||
// TODO(amalyshe): there might be situation when convolution can be executed in BF16,
|
||||
// output is required in FP32 but eltwise inplace tensor would be in BF16
|
||||
@ -364,93 +368,16 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
if (node->getType() == Split || node->getType() == Concatenation)
|
||||
continue;
|
||||
|
||||
#if defined (COMPILED_CPU_MKLDNN_ELTWISE_NODE)
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
|
||||
auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
|
||||
if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
|
||||
// currently there is the only one scale while we need scale by channel :(
|
||||
ops.append_sum(it->second->buffer().as<float*>()[0], mkldnn::memory::convert_to_c(precisionToDataType(eltwisePrecision)));
|
||||
}
|
||||
} else {
|
||||
if (eltwiseNode && eltwiseNode->isSum()) {
|
||||
ops.append_sum(1.0, mkldnn::memory::convert_to_c(precisionToDataType(eltwisePrecision)));
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(COMPILED_CPU_MKLDNN_ACTIVATION_NODE)
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
|
||||
activationNode->getBeta());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (COMPILED_CPU_MKLDNN_DEPTHWISE_NODE)
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
|
||||
if (initWeights) {
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
|
||||
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_weights->buffer(),
|
||||
depthwiseLayer->_weights->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
|
||||
memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_biases->buffer(),
|
||||
depthwiseLayer->_biases->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
nullptr);
|
||||
|
||||
blob_idx += 1;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
|
||||
if (quantizeNode) {
|
||||
|
@ -5,10 +5,8 @@
|
||||
#include "mkldnn_def_conv_node.h"
|
||||
#include "mkldnn_reorder_node.h"
|
||||
#include "mkldnn_input_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -1,353 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "caseless.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
MKLDNNDepthwiseNode::MKLDNNDepthwiseNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode(layer, eng, cache) {
|
||||
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
|
||||
return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
|
||||
});
|
||||
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
|
||||
if (!isWithBiases())
|
||||
return MKLDNNMemoryDesc();
|
||||
return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(1).desc());
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::getSupportedDescriptors() {
|
||||
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
|
||||
auto parentOutDims = getParentEdgeAt(0)->getDims();
|
||||
|
||||
if (getParentEdges().size() != 1)
|
||||
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect number of inputs!";
|
||||
if (parentOutDims != getChildEdgeAt(0)->getDims())
|
||||
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect dimensions!";
|
||||
|
||||
auto size = static_cast<size_t>(parentOutDims.ndims() == 1 ? parentOutDims[0] : parentOutDims[1]);
|
||||
SizeVector weightDims = { size };
|
||||
MKLDNNDims blocked_weightDims(weightDims);
|
||||
|
||||
auto * wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(getCnnLayer().get());
|
||||
if (wLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get weightable layer for node " << getName() << ".";
|
||||
|
||||
InferenceEngine::Blob::Ptr blb = wLayer->_weights;
|
||||
if (blb)
|
||||
realWeightSize = blb->size();
|
||||
internalBlobs.push_back(createInternalBlob(weightDims, true));
|
||||
if (isWithBiases()) {
|
||||
InferenceEngine::Blob::Ptr blb = wLayer->_biases;
|
||||
if (blb)
|
||||
realBiasSize = blb->size();
|
||||
internalBlobs.push_back(createInternalBlob(weightDims, false));
|
||||
}
|
||||
|
||||
for (auto format : getAvailableFormatsForDims(parentOutDims)) {
|
||||
MKLDNNMemoryDesc in_candidate{parentOutDims, inputDataType, format};
|
||||
createDescriptor({in_candidate}, {});
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
auto parentOutDims = getParentEdgeAt(0)->getDims();
|
||||
if (parentOutDims.ndims() <= 5) {
|
||||
MKLDNNNode::initSupportedPrimitiveDescriptors();
|
||||
} else {
|
||||
createSpecificDescriptor5D();
|
||||
if (specificDesc5DPtr == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot create specific MKLDNNDescriptor for depthwise node " << getName();
|
||||
const auto& desc = *specificDesc5DPtr;
|
||||
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine());
|
||||
while (itpd.is_not_end()) {
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = true;
|
||||
for (size_t i = 0; i < descInputNumbers(desc); i++) {
|
||||
InferenceEngine::DataConfig dataConfig;
|
||||
dataConfig.inPlace = -1;
|
||||
dataConfig.constant = false;
|
||||
dataConfig.desc = MKLDNNMemoryDesc(InferenceEngine::TensorDesc(Precision::FP32, parentOutDims.ToSizeVector(), Layout::ANY));
|
||||
config.inConfs.push_back(dataConfig);
|
||||
}
|
||||
|
||||
std::vector<mkldnn::memory::format> outFormats;
|
||||
for (size_t i = 0; i < descOutputNumbers(desc); i++) {
|
||||
InferenceEngine::DataConfig dataConfig;
|
||||
dataConfig.inPlace = canBeInPlace() ? 0 : -1;
|
||||
dataConfig.constant = false;
|
||||
dataConfig.desc = MKLDNNMemoryDesc(InferenceEngine::TensorDesc(Precision::FP32, parentOutDims.ToSizeVector(), Layout::ANY));
|
||||
config.outConfs.push_back(dataConfig);
|
||||
|
||||
auto primDesc = itpd.fetch();
|
||||
auto dstPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(dst_pd), 0);
|
||||
if (dstPrimDesc) {
|
||||
outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
|
||||
} else {
|
||||
// This path is needed to correctly handle Deconvolution node
|
||||
auto diffSrcPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(diff_src_pd), 0);
|
||||
if (diffSrcPrimDesc) {
|
||||
outFormats.emplace_back(static_cast<memory::format>(itpd.diff_src_primitive_desc().desc().data.format));
|
||||
}
|
||||
}
|
||||
}
|
||||
impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
|
||||
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
|
||||
itpd++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::createPrimitive() {
|
||||
if (prim)
|
||||
return;
|
||||
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
|
||||
auto createRightPrimitiveDescriptor = [&]() -> depthwise_forward::primitive_desc {
|
||||
auto parentOutDims = getParentEdgeAt(0)->getDims();
|
||||
if (parentOutDims.ndims() <= 5) {
|
||||
return createPrimitiveDescriptor<depthwise_forward::primitive_desc, depthwise_forward::desc>();
|
||||
} else {
|
||||
const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
|
||||
auto& desc = *specificDesc5DPtr;
|
||||
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), mkldnn::primitive_attr());
|
||||
|
||||
while (itpd.is_not_end()) {
|
||||
impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
|
||||
if (impl_type == getSelectedPrimitiveDescriptor()->getImplementationType()) {
|
||||
specificPrepareMemory5D(itpd);
|
||||
std::shared_ptr<depthwise_forward::desc> selected_desc_ptr = desc;
|
||||
depthwise_forward::primitive_desc prim_desc = depthwise_forward::primitive_desc(*selected_desc_ptr, getEngine());
|
||||
return prim_desc;
|
||||
}
|
||||
itpd++;
|
||||
}
|
||||
THROW_IE_EXCEPTION << "Cannot create specific primitive descriptor for depthwise node " << getName() << ".";
|
||||
}
|
||||
};
|
||||
|
||||
auto prim_desc = createRightPrimitiveDescriptor();
|
||||
|
||||
if (isBroadcast()) {
|
||||
float broadcastValue = static_cast<float*>(internalBlobMemory[0]->GetData())[0];
|
||||
size_t blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
|
||||
for (int i = 1; i < blbSize && realWeightSize != blbSize; i++) {
|
||||
static_cast<float*>(internalBlobMemory[0]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
|
||||
if (isWithBiases()) {
|
||||
blbSize = internalBlobMemory[1]->GetPrimitiveDescriptor().desc().data.dims[0];
|
||||
broadcastValue = static_cast<float*>(internalBlobMemory[1]->GetData())[0];
|
||||
for (int i = 1; i < blbSize && realBiasSize != blbSize; i++) {
|
||||
static_cast<float*>(internalBlobMemory[1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
size_t blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
|
||||
if (realWeightSize != blbSize)
|
||||
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect weights!";
|
||||
if (isWithBiases()) {
|
||||
blbSize = internalBlobMemory[1]->GetPrimitiveDescriptor().desc().data.dims[0];
|
||||
if (realBiasSize != blbSize)
|
||||
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect biases!";
|
||||
}
|
||||
}
|
||||
|
||||
if (isWithBiases()) {
|
||||
prim.reset(new depthwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
|
||||
internalBlobMemory[0]->GetPrimitive(),
|
||||
internalBlobMemory[1]->GetPrimitive(),
|
||||
getChildEdgeAt(0)->getMemory().GetPrimitive()));
|
||||
} else {
|
||||
prim.reset(new depthwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
|
||||
internalBlobMemory[0]->GetPrimitive(),
|
||||
getChildEdgeAt(0)->getMemory().GetPrimitive()));
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNDepthwiseNode::created() const {
|
||||
return getType() == Depthwise;
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::init() {
|
||||
GenericLayer* depthwiseLayer = getCnnLayer().get();
|
||||
if (depthwiseLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get CNNLayer.";
|
||||
|
||||
CaselessEq<std::string> comparator;
|
||||
if (comparator(depthwiseLayer->type, "ScaleShift")) {
|
||||
auto *scshLayer = dynamic_cast<ScaleShiftLayer*>(getCnnLayer().get());
|
||||
if (scshLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get scale shift layer " << getName();
|
||||
if (scshLayer->_weights == nullptr)
|
||||
THROW_IE_EXCEPTION << "ScaleShift without weights is not supported";
|
||||
|
||||
algorithm = depthwise_scale_shift;
|
||||
withBiases = scshLayer->_biases != nullptr;
|
||||
broadcast = static_cast<bool>(scshLayer->_broadcast);
|
||||
} else if (comparator(depthwiseLayer->type, "PReLU")) {
|
||||
auto *preluLayer = dynamic_cast<PReLULayer*>(getCnnLayer().get());
|
||||
if (preluLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get PReLU layer " << getName();
|
||||
if (preluLayer->_weights == nullptr)
|
||||
THROW_IE_EXCEPTION << "PReLU without weights is not supported";
|
||||
|
||||
algorithm = depthwise_prelu;
|
||||
withBiases = false;
|
||||
broadcast = preluLayer->_channel_shared;
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Unsupported depthwise operation";
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
|
||||
const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
|
||||
MKLDNNMemoryDesc in_candidate(inputDesc[0]);
|
||||
MKLDNNMemoryDesc out_candidate(inputDesc[0]);
|
||||
MKLDNNDims weightDims({in_candidate.getDims().ndims() == 1 ? in_candidate.getDims()[0] : in_candidate.getDims()[1]});
|
||||
|
||||
MKLDNNMemoryDesc wgh_candidate{weightDims, in_candidate.getDataType(), memory::x};
|
||||
|
||||
if (isWithBiases()) {
|
||||
MKLDNNMemoryDesc bias_candidate{weightDims, in_candidate.getDataType(), memory::x};
|
||||
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
|
||||
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate, bias_candidate)));
|
||||
descs.push_back(desc);
|
||||
} else {
|
||||
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
|
||||
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate)));
|
||||
descs.push_back(desc);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::initOptimalPrimitiveDescriptor() {
|
||||
auto selected_pd = getSelectedPrimitiveDescriptor();
|
||||
if (selected_pd == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
auto config = selected_pd->getConfig();
|
||||
if (isInitConfig(config))
|
||||
return;
|
||||
|
||||
if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || (!isUninitTensorDesc(config.inConfs[0].desc) &&
|
||||
!isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
|
||||
THROW_IE_EXCEPTION << "Layer " << getName() << " has incorrect selected config!";
|
||||
|
||||
if (getParentEdgeAt(0)->getDims().ndims() > 5)
|
||||
return;
|
||||
|
||||
if (!isUninitTensorDesc(config.inConfs[0].desc)) {
|
||||
config.outConfs[0].desc = config.inConfs[0].desc;
|
||||
} else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
|
||||
config.inConfs[0].desc = config.outConfs[0].desc;
|
||||
} else {
|
||||
config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
|
||||
}
|
||||
|
||||
initDescriptor(config);
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::createSpecificDescriptor5D() {
|
||||
auto parentOutDims = getParentEdgeAt(0)->getDims();
|
||||
MKLDNNDims newDims;
|
||||
for (int i = 0; i < 4; i++)
|
||||
newDims.push_back(parentOutDims[i]);
|
||||
int lastDim = 1;
|
||||
for (int i = 4; i < parentOutDims.ndims(); i++) {
|
||||
lastDim *= parentOutDims[i];
|
||||
}
|
||||
newDims.push_back(lastDim);
|
||||
|
||||
MKLDNNMemoryDesc in_candidate{newDims, MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32), mkldnn::memory::ncdhw};
|
||||
MKLDNNMemoryDesc out_candidate(in_candidate);
|
||||
MKLDNNDims weightDims({in_candidate.getDims()[1]});
|
||||
|
||||
MKLDNNMemoryDesc wgh_candidate{weightDims, in_candidate.getDataType(), memory::x};
|
||||
|
||||
if (isWithBiases()) {
|
||||
MKLDNNMemoryDesc bias_candidate{weightDims, in_candidate.getDataType(), memory::x};
|
||||
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
|
||||
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate, bias_candidate)));
|
||||
specificDesc5DPtr = std::make_shared<MKLDNNDescriptor>(desc);
|
||||
} else {
|
||||
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
|
||||
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate)));
|
||||
specificDesc5DPtr = std::make_shared<MKLDNNDescriptor>(desc);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDepthwiseNode::specificPrepareMemory5D(mkldnn::primitive_desc_iterator& itpd) {
|
||||
std::vector<MKLDNNMemoryDesc> intDescs;
|
||||
for (auto &it : internalBlobDesc)
|
||||
intDescs.push_back(it(itpd, 0));
|
||||
|
||||
internalBlobMemory.clear();
|
||||
for (size_t i = 0; i < internalBlobs.size(); i++) {
|
||||
const auto &internalBlob = internalBlobs[i];
|
||||
|
||||
auto create = [&] () {
|
||||
auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
|
||||
auto newFormat = newDesc.getFormat();
|
||||
if (newFormat == mkldnn::memory::ncdhw) {
|
||||
newFormat = mkldnn::memory::goihw;
|
||||
}
|
||||
if (newFormat == mkldnn::memory::nchw) {
|
||||
newFormat = mkldnn::memory::oihw;
|
||||
}
|
||||
|
||||
MKLDNNMemory memory{ getEngine() };
|
||||
memory.Create(MKLDNNMemoryDesc(newDesc.getDims(), newDesc.getDataType(), newFormat), internalBlob->buffer());
|
||||
|
||||
MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()));
|
||||
_ptr->Create(intDescs[i]);
|
||||
_ptr->SetData(memory);
|
||||
|
||||
return _ptr;
|
||||
};
|
||||
|
||||
MKLDNNMemoryPtr ptr;
|
||||
if (weightCache != nullptr) {
|
||||
const uint64_t data_hash = weightCache->GetHashFunc().hash(
|
||||
internalBlob->buffer(), internalBlob->byteSize());
|
||||
|
||||
const std::string string_hash = getName() + "_" + std::to_string(i)
|
||||
+ "_" + std::to_string(internalBlob->byteSize())
|
||||
+ "_" + std::to_string(data_hash);
|
||||
|
||||
ptr = weightCache->findOrCreate(string_hash, create);
|
||||
} else {
|
||||
ptr = create();
|
||||
}
|
||||
|
||||
internalBlobMemory.push_back(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNDepthwiseNode, Depthwise);
|
@ -1,46 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <mkldnn_node.h>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNDepthwiseNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNDepthwiseNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
~MKLDNNDepthwiseNode() override = default;
|
||||
|
||||
void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
|
||||
const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
|
||||
void initOptimalPrimitiveDescriptor() override;
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
bool created() const override;
|
||||
|
||||
mkldnn::algorithm getAlgorithm() const { return algorithm; }
|
||||
bool isWithBiases() const { return withBiases; }
|
||||
bool isBroadcast() const { return broadcast; }
|
||||
|
||||
private:
|
||||
void init() override;
|
||||
|
||||
mkldnn::algorithm algorithm = mkldnn::algorithm::depthwise_scale_shift;
|
||||
size_t realWeightSize = 0;
|
||||
size_t realBiasSize = 0;
|
||||
bool withBiases = false;
|
||||
bool broadcast = false;
|
||||
|
||||
std::shared_ptr<MKLDNNDescriptor> specificDesc5DPtr;
|
||||
void createSpecificDescriptor5D();
|
||||
void specificPrepareMemory5D(mkldnn::primitive_desc_iterator& itpd);
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
File diff suppressed because it is too large
Load Diff
@ -8,45 +8,98 @@
|
||||
#include <mkldnn_node.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <c_types_map.hpp>
|
||||
#include <memory>
|
||||
#include <caseless.hpp>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
struct jit_eltwise_fq_params {
|
||||
int src0_step;
|
||||
int src1_step;
|
||||
int dst_step;
|
||||
mkldnn::memory::data_type src0_dt;
|
||||
mkldnn::memory::data_type src1_dt;
|
||||
mkldnn::memory::data_type dst_dt;
|
||||
int src0_data_size;
|
||||
int src1_data_size;
|
||||
int dst_data_size;
|
||||
#define MAX_ELTWISE_INPUTS 7
|
||||
|
||||
InferenceEngine::EltwiseLayer::eOperation eltwise_op;
|
||||
enum EltwiseOpType {
|
||||
Add = 0,
|
||||
Multiply,
|
||||
Subtract,
|
||||
Divide,
|
||||
FloorMod,
|
||||
Mod,
|
||||
Maximum,
|
||||
Minimum,
|
||||
SquaredDifference,
|
||||
PowerDynamic,
|
||||
PowerStatic,
|
||||
MulAdd,
|
||||
|
||||
Equal,
|
||||
NotEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
Less,
|
||||
LessEqual,
|
||||
|
||||
LogicalAnd,
|
||||
LogicalOr,
|
||||
LogicalXor,
|
||||
LogicalNot,
|
||||
|
||||
Relu,
|
||||
Gelu,
|
||||
Elu,
|
||||
Tanh,
|
||||
Logistic,
|
||||
Square,
|
||||
Abs,
|
||||
Sqrt,
|
||||
Linear,
|
||||
BoundedRelu,
|
||||
SoftRelu,
|
||||
Relu6,
|
||||
Exp,
|
||||
Clamp,
|
||||
Swish,
|
||||
Prelu,
|
||||
Mish,
|
||||
Hswish,
|
||||
Hsigmoid
|
||||
};
|
||||
|
||||
struct jit_eltwise_fq_call_args {
|
||||
const void *src0;
|
||||
const void *src1;
|
||||
struct jit_eltwise_params {
|
||||
size_t inputs_number;
|
||||
size_t input_size;
|
||||
|
||||
InferenceEngine::Precision src_prc[MAX_ELTWISE_INPUTS];
|
||||
InferenceEngine::Precision dst_prc;
|
||||
|
||||
std::vector<size_t> src_offsets[MAX_ELTWISE_INPUTS];
|
||||
std::vector<size_t> dst_offsets;
|
||||
|
||||
size_t src_size[MAX_ELTWISE_INPUTS];
|
||||
size_t dst_size;
|
||||
size_t oc_size;
|
||||
};
|
||||
|
||||
struct jit_eltwise_call_args {
|
||||
const void *src_ptr[MAX_ELTWISE_INPUTS];
|
||||
void *dst;
|
||||
|
||||
size_t work_amount;
|
||||
size_t oc_off;
|
||||
};
|
||||
|
||||
struct jit_uni_eltwise_fq_kernel {
|
||||
void (*ker_)(const jit_eltwise_fq_call_args *);
|
||||
class MKLDNNEltwiseNode;
|
||||
|
||||
void operator()(const jit_eltwise_fq_call_args *args) {
|
||||
struct jit_uni_eltwise_kernel {
|
||||
void (*ker_)(const jit_eltwise_call_args *);
|
||||
|
||||
void operator()(const jit_eltwise_call_args *args) {
|
||||
assert(ker_);
|
||||
ker_(args);
|
||||
}
|
||||
|
||||
explicit jit_uni_eltwise_fq_kernel(jit_eltwise_fq_params jep, const mkldnn_primitive_attr &attr) : ker_(nullptr), jep_(jep), attr_(attr) {}
|
||||
virtual ~jit_uni_eltwise_fq_kernel() {}
|
||||
explicit jit_uni_eltwise_kernel(jit_eltwise_params jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(jep), eltwiseNode(node) {}
|
||||
virtual ~jit_uni_eltwise_kernel() {}
|
||||
|
||||
jit_eltwise_fq_params jep_;
|
||||
const mkldnn_primitive_attr &attr_;
|
||||
jit_eltwise_params jep_;
|
||||
MKLDNNEltwiseNode& eltwiseNode;
|
||||
};
|
||||
|
||||
class MKLDNNEltwiseNode : public MKLDNNNode {
|
||||
@ -56,54 +109,66 @@ public:
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void selectOptimalPrimitiveDescriptor() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override;
|
||||
|
||||
bool isSum();
|
||||
bool isUnitScales();
|
||||
bool isWithBroadcast();
|
||||
void initOptimalPrimitiveDescriptor() override;
|
||||
|
||||
bool canFuse(const MKLDNNNodePtr& node) const;
|
||||
|
||||
size_t getOpInputsNum() const;
|
||||
EltwiseOpType getOpType() const { return eltwiseOp; }
|
||||
mkldnn::algorithm getAlgorithm() const { return eltwiseAlgorithm; }
|
||||
|
||||
float getAlpha() const { return alpha; }
|
||||
float getBeta() const { return beta; }
|
||||
|
||||
void appendPostOps(mkldnn::post_ops& ops) override;
|
||||
|
||||
private:
|
||||
InferenceEngine::EltwiseLayer::eOperation op;
|
||||
std::vector<float> sum_scales;
|
||||
bool broadcast = false;
|
||||
int batch_dim = 5;
|
||||
mkldnn::primitive_attr attr;
|
||||
void init() override;
|
||||
|
||||
std::shared_ptr<jit_uni_eltwise_fq_kernel> eltiwse_fq_kernel;
|
||||
jit_eltwise_fq_params jep;
|
||||
EltwiseOpType eltwiseOp = Add;
|
||||
mkldnn::algorithm eltwiseAlgorithm = mkldnn::algorithm_undef;
|
||||
|
||||
void jit_eltwise_fq();
|
||||
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
|
||||
std::shared_ptr<jit_uni_eltwise_kernel> eltwise_kernel = nullptr;
|
||||
jit_eltwise_params jep = {};
|
||||
|
||||
template <typename T0, typename T1> void ref_eltwise(int in0, int in1);
|
||||
template <typename T0, typename T1, typename T2> void ref_eltwise2(int in0, int in1);
|
||||
void dims_calc(int *dims, const MKLDNNDims &edge_dims, bool channels_first);
|
||||
void offset_out_calc(int *offset, int *dims);
|
||||
void offset_in_calc(int *offset, int *dims_in, int *dims_out);
|
||||
int optimalTensorRank = 6;
|
||||
bool canUseOptimizedImpl = false;
|
||||
bool isDynBatchEnabled = false;
|
||||
size_t batchDimIdx = 0;
|
||||
size_t tensorRank = 0;
|
||||
size_t fullWorkAmount = 0;
|
||||
size_t schedulerWorkAmount = 0;
|
||||
std::vector<std::vector<size_t>> dims_in = {};
|
||||
std::vector<std::vector<size_t>> offsets_in = {};
|
||||
std::vector<size_t> dims_out = {};
|
||||
std::vector<size_t> offsets_out = {};
|
||||
std::vector<ptrdiff_t> start_offset_in = {};
|
||||
ptrdiff_t start_offset_out = 0;
|
||||
std::vector<size_t> offsets_oc = {};
|
||||
|
||||
template <typename T0, typename T1> void eltwise_add(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_prod(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_max(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_sub(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_min(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_div(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_squared_diff(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_floor_mod(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_pow(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_logical_and(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_logical_or(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1> void eltwise_logical_xor(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
float gamma = 0;
|
||||
|
||||
template <typename T0, typename T1, typename T2> void eltwise_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1, typename T2> void eltwise_not_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1, typename T2> void eltwise_less(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1, typename T2> void eltwise_less_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1, typename T2> void eltwise_greater(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
|
||||
template <typename T0, typename T1, typename T2> void eltwise_greater_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
|
||||
std::vector<float> scales = {};
|
||||
std::vector<float> shifts = {};
|
||||
|
||||
inline void executeOptimized6D(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
|
||||
inline void executeOptimizedGeneric(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
|
||||
inline void executeReference(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
|
||||
|
||||
void offset_out_calc(std::vector<size_t>& offset, std::vector<size_t>& dims);
|
||||
void offset_in_calc(std::vector<size_t>& offset, std::vector<size_t>& dims_in, std::vector<size_t>& dims_out);
|
||||
|
||||
static InferenceEngine::details::caseless_map<std::string,
|
||||
std::function<void(InferenceEngine::GenericLayer*, EltwiseOpType&, mkldnn::algorithm&, float&, float&)>> initializers;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -3,8 +3,7 @@
|
||||
//
|
||||
|
||||
#include "mkldnn_fullyconnected_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include <legacy/ie_layers.h>
|
||||
@ -199,10 +198,10 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode && (eltwiseNode->getOpType() == MulAdd || eltwiseNode->getOpType() == Prelu)) {
|
||||
if (initWeights) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(eltwiseNode->getCnnLayer().get());
|
||||
int ndims = getParentEdgeAt(0)->getDims().ndims();
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(ndims == 3 ? getChildEdgeAt(0)->getDims()[2] : getChildEdgeAt(0)->getDims()[1], 16))});
|
||||
|
||||
@ -211,7 +210,7 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
PostOpsIntBlobMemory[blob_idx]->FillZero();
|
||||
|
||||
// In case ndims == 3 graph optimizer allows fusing only if all weights values are the same
|
||||
if (depthwiseNode->isBroadcast() || ndims == 3) {
|
||||
if (depthwiseLayer->blobs["weights"]->size() == 1 || ndims == 3) {
|
||||
float broadcastValue = static_cast<float *>(depthwiseLayer->_weights->buffer())[0];
|
||||
for (int i = 0; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
@ -223,13 +222,13 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
if (eltwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
|
||||
// In case ndims == 3 graph optimizer allows fusing only if all biases values are the same
|
||||
if (depthwiseNode->isBroadcast() || ndims == 3) {
|
||||
if (depthwiseLayer->blobs["biases"]->size() == 1 || ndims == 3) {
|
||||
float broadcastValue = static_cast<float *>(depthwiseLayer->_biases->buffer())[0];
|
||||
for (int i = 0; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
@ -241,20 +240,20 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
ops.append_depthwise(eltwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
ops.append_depthwise(eltwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
nullptr);
|
||||
|
||||
blob_idx += 1;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
ops.append_depthwise(eltwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
@ -262,11 +261,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
|
||||
|
||||
continue;
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,9 +5,8 @@
|
||||
#include "mkldnn_interpolate_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include <mkldnn.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -1480,62 +1479,9 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
if (initWeights) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getChildEdgeAt(0)->getDims()[1], 16))});
|
||||
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
|
||||
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_weights->buffer(),
|
||||
depthwiseLayer->_weights->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
|
||||
memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_biases->buffer(),
|
||||
depthwiseLayer->_biases->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2153,7 +2099,7 @@ inline int MKLDNNInterpolateNode::nearestRound(float originCoord, bool isDownsam
|
||||
}
|
||||
|
||||
bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
auto isOneOf = [](mkldnn::algorithm alg, std::vector<mkldnn::algorithm> algs) {
|
||||
auto isOneOf = [&](EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
|
||||
for (auto a : algs) {
|
||||
if (alg == a) {
|
||||
return true;
|
||||
@ -2170,22 +2116,16 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
if (node->getType() == Quantize) {
|
||||
auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
|
||||
if (quantizeNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get quantize layer " << node->getName();
|
||||
THROW_IE_EXCEPTION << "Cannot get quantize node " << node->getName();
|
||||
return !quantizeNode->isBinarization();
|
||||
} else if (node->getType() == Depthwise) {
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode*>(node.get());
|
||||
if (depthwiseNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get depthwise layer " << node->getName();
|
||||
return ((depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_scale_shift && depthwiseNode->isWithBiases()) ||
|
||||
(depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_prelu));
|
||||
} else if (node->getType() == Activation) {
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode*>(node.get());
|
||||
if (activationNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get activation layer " << node->getName();
|
||||
return isOneOf(activationNode->getAlgorithm(), {eltwise_relu, eltwise_gelu, eltwise_elu, eltwise_logistic,
|
||||
eltwise_bounded_relu, eltwise_clamp, eltwise_tanh, eltwise_swish, eltwise_hswish, eltwise_mish, eltwise_hsigmoid,
|
||||
eltwise_linear, eltwise_abs, eltwise_square, eltwise_sqrt});
|
||||
} else if (node->getType() == Eltwise) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
|
||||
if (eltwiseNode == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName();
|
||||
return isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
|
||||
Tanh, Swish, Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt});
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -5,9 +5,8 @@
|
||||
#include "mkldnn_mvn_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include <mkldnn.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -597,64 +596,9 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
if (initWeights) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getChildEdgeAt(0)->getDims()[1], 16))});
|
||||
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx]->FillZero();
|
||||
|
||||
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_weights->buffer(),
|
||||
depthwiseLayer->_weights->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
|
||||
memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_biases->buffer(),
|
||||
depthwiseLayer->_biases->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -3,8 +3,7 @@
|
||||
//
|
||||
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <legacy/ie_layers_internal.hpp>
|
||||
#include "ie_parallel.hpp"
|
||||
@ -808,70 +807,9 @@ void MKLDNNNormalizeNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeig
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
if (initWeights) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getParentEdgeAt(0)->getDims()[1], 16))});
|
||||
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx]->FillZero();
|
||||
|
||||
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_weights->buffer(),
|
||||
depthwiseLayer->_weights->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
|
||||
memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_biases->buffer(),
|
||||
depthwiseLayer->_biases->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
nullptr);
|
||||
|
||||
blob_idx += 1;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1,133 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mkldnn_power_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <string>
|
||||
#include <cmath>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <limits>
|
||||
#include "ie_parallel.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
MKLDNNPowerNode::MKLDNNPowerNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode(layer, eng, cache), scale(1.0f), shift(1.0f), power(1.0f) {}
|
||||
|
||||
void MKLDNNPowerNode::getSupportedDescriptors() {
|
||||
auto * powerLayer = dynamic_cast<PowerLayer*>(getCnnLayer().get());
|
||||
|
||||
if (powerLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot convert power layer.";
|
||||
scale = powerLayer->scale;
|
||||
power = powerLayer->power;
|
||||
shift = powerLayer->offset;
|
||||
|
||||
if (getParentEdges().size() != 1)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
|
||||
if (getChildEdges().empty())
|
||||
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
|
||||
}
|
||||
|
||||
void MKLDNNPowerNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
precision = getCnnLayer()->outData[0]->getPrecision();
|
||||
if (precision != InferenceEngine::Precision::FP32)
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
|
||||
InferenceEngine::LayerConfig config;
|
||||
config.dynBatchSupport = true;
|
||||
config.inConfs.resize(1);
|
||||
config.outConfs.resize(1);
|
||||
config.inConfs[0].inPlace = -1;
|
||||
config.inConfs[0].constant = false;
|
||||
config.outConfs[0].inPlace = -1;
|
||||
config.outConfs[0].constant = false;
|
||||
for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) {
|
||||
config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format);
|
||||
config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, format);
|
||||
if (format != memory::any) {
|
||||
config.inConfs[0].desc = InferenceEngine::TensorDesc(config.inConfs[0].desc.getPrecision(),
|
||||
config.inConfs[0].desc.getDims(), {
|
||||
config.inConfs[0].desc.getBlockingDesc().getBlockDims(),
|
||||
config.inConfs[0].desc.getBlockingDesc().getOrder(),
|
||||
(std::numeric_limits<size_t>::max)()
|
||||
});
|
||||
config.outConfs[0].desc = InferenceEngine::TensorDesc(config.outConfs[0].desc.getPrecision(),
|
||||
config.outConfs[0].desc.getDims(), {
|
||||
config.outConfs[0].desc.getBlockingDesc().getBlockDims(),
|
||||
config.outConfs[0].desc.getBlockingDesc().getOrder(),
|
||||
(std::numeric_limits<size_t>::max)()
|
||||
});
|
||||
}
|
||||
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, format);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNPowerNode::createPrimitive() {
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
|
||||
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
|
||||
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
|
||||
if (getSelectedPrimitiveDescriptor() == nullptr)
|
||||
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
|
||||
}
|
||||
|
||||
void MKLDNNPowerNode::execute(mkldnn::stream strm) {
|
||||
auto& srcMemory = getParentEdgeAt(0)->getMemory();
|
||||
auto& dstMemory = getChildEdgeAt(0)->getMemory();
|
||||
const size_t data_size = srcMemory.GetSize() / sizeof(float) / srcMemory.GetDims()[0] * batchToProcess();
|
||||
|
||||
const auto *src_ptr = reinterpret_cast<const float*>(srcMemory.GetData()) +
|
||||
srcMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
float *dst_ptr = reinterpret_cast<float*>(dstMemory.GetData()) +
|
||||
dstMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
|
||||
|
||||
if (power == -1.f) {
|
||||
parallel_for(data_size, [&](size_t i) {
|
||||
float val = src_ptr[i] * scale + shift;
|
||||
dst_ptr[i] = 1 / val;
|
||||
});
|
||||
} else if (power == 0.5f) {
|
||||
parallel_for(data_size, [&](size_t i) {
|
||||
float val = src_ptr[i] * scale + shift;
|
||||
dst_ptr[i] = sqrtf(val);
|
||||
});
|
||||
} else if (power == 1.0f) {
|
||||
parallel_for(data_size, [&](size_t i) {
|
||||
dst_ptr[i] = src_ptr[i] * scale + shift;
|
||||
});
|
||||
} else if (power == 2.0f) {
|
||||
parallel_for(data_size, [&](size_t i) {
|
||||
float val = src_ptr[i] * scale + shift;
|
||||
dst_ptr[i] = val * val;
|
||||
});
|
||||
} else if (power == 3.0f) {
|
||||
parallel_for(data_size, [&](size_t i) {
|
||||
float val = src_ptr[i] * scale + shift;
|
||||
dst_ptr[i] = val * val * val;
|
||||
});
|
||||
} else {
|
||||
parallel_for(data_size, [&](size_t i) {
|
||||
dst_ptr[i] = pow(src_ptr[i] * scale + shift, power);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNPowerNode::created() const {
|
||||
return getType() == Power;
|
||||
}
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNPowerNode, Power);
|
@ -1,31 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <mkldnn_node.h>
|
||||
#include <string>
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class MKLDNNPowerNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNPowerNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
~MKLDNNPowerNode() override = default;
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
bool created() const override;
|
||||
|
||||
private:
|
||||
float scale;
|
||||
float shift;
|
||||
float power;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -43,10 +43,6 @@ void MKLDNNQuantizeNode::init() {
|
||||
THROW_IE_EXCEPTION << "Quantize layer " << getName() << " has unsupported number of parent edges at port " << i;
|
||||
}
|
||||
|
||||
if (getParentEdgesAtPort(0)[0]->getDims().ndims() < 1ul || getParentEdgesAtPort(0)[0]->getDims().ndims() > 5ul) {
|
||||
THROW_IE_EXCEPTION << "Unsupported number of dimensions for input at edge 0 in Quantize layer " << getName();
|
||||
}
|
||||
|
||||
auto initAxisIdx = [&](size_t edgeIdx) {
|
||||
auto edge = getParentEdgesAtPort(edgeIdx)[0];
|
||||
|
||||
@ -319,6 +315,10 @@ std::vector<mkldnn::memory::format> MKLDNNQuantizeNode::getDataFormats() const {
|
||||
}
|
||||
|
||||
void MKLDNNQuantizeNode::getSupportedDescriptors() {
|
||||
if (getParentEdgesAtPort(0)[0]->getDims().ndims() < 1ul || getParentEdgesAtPort(0)[0]->getDims().ndims() > 5ul) {
|
||||
THROW_IE_EXCEPTION << "Unsupported number of dimensions for input at edge 0 in Quantize layer " << getName();
|
||||
}
|
||||
|
||||
mkldnn::memory::data_type idt = MKLDNNExtensionUtils::IEPrecisionToDataType(getInputPrecision());
|
||||
mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
|
||||
mkldnn::memory::data_type ddt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOutputPrecision());
|
||||
|
@ -5,12 +5,11 @@
|
||||
#include "mkldnn_reduce_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <mkldnn.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include "ie_parallel.hpp"
|
||||
|
@ -5,9 +5,8 @@
|
||||
#include "mkldnn_resample_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "mkldnn_eltwise_node.h"
|
||||
#include <mkldnn.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -438,64 +437,9 @@ void MKLDNNResampleNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeigh
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
|
||||
if (depthwiseNode) {
|
||||
if (initWeights) {
|
||||
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getChildEdgeAt(0)->getDims()[1], 16))});
|
||||
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx]->FillZero();
|
||||
|
||||
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_weights->buffer(),
|
||||
depthwiseLayer->_weights->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
|
||||
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
|
||||
memory::format::x);
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
|
||||
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
|
||||
depthwiseLayer->_biases->buffer(),
|
||||
depthwiseLayer->_biases->size() *
|
||||
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
|
||||
|
||||
if (depthwiseNode->isBroadcast()) {
|
||||
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
|
||||
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
|
||||
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
|
||||
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
|
||||
|
||||
blob_idx += 2;
|
||||
}
|
||||
} else {
|
||||
ops.append_depthwise(depthwiseNode->getAlgorithm(),
|
||||
nullptr,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
|
||||
if (activationNode) {
|
||||
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -5,8 +5,6 @@
|
||||
#include "mkldnn_scatter_update_node.h"
|
||||
#include "desc_iterator.hpp"
|
||||
#include "mkldnn_quantize_node.h"
|
||||
#include "mkldnn_depthwise_node.h"
|
||||
#include "mkldnn_activation_node.h"
|
||||
#include <legacy/ie_layers.h>
|
||||
#include <mkldnn.hpp>
|
||||
#include <string>
|
||||
|
@ -39,9 +39,14 @@ std::vector<CommonTestUtils::OpType> opTypes = {
|
||||
};
|
||||
|
||||
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
|
||||
ngraph::helpers::EltwiseTypes::ADD,
|
||||
ngraph::helpers::EltwiseTypes::MULTIPLY,
|
||||
ngraph::helpers::EltwiseTypes::SUBTRACT,
|
||||
ngraph::helpers::EltwiseTypes::ADD
|
||||
ngraph::helpers::EltwiseTypes::DIVIDE,
|
||||
ngraph::helpers::EltwiseTypes::FLOOR_MOD,
|
||||
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
|
||||
ngraph::helpers::EltwiseTypes::POWER,
|
||||
ngraph::helpers::EltwiseTypes::MOD
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {};
|
||||
|
@ -22,7 +22,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*(QuantGroupConv3D).*)",
|
||||
// TODO: Issue 31845
|
||||
R"(.*(FakeQuantizeLayerTest).*)",
|
||||
R"(.*(EltwiseLayerTest).*IS=\(.*\..*\..*\..*\..*\).*secondaryInputType=PARAMETER.*opType=SCALAR.*)",
|
||||
// TODO: failed to downgrade to opset v0 in interpreter backend
|
||||
R"(.*Gather.*axis=-1.*)",
|
||||
// TODO: Issue 33151
|
||||
|
@ -0,0 +1,327 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <single_layer_tests/eltwise.hpp>
|
||||
#include <ngraph_functions/builders.hpp>
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
LayerTestsDefinitions::EltwiseTestParams,
|
||||
CPUSpecificParams> EltwiseLayerCPUTestParamsSet;
|
||||
|
||||
class EltwiseLayerCPUTest : public testing::WithParamInterface<EltwiseLayerCPUTestParamsSet>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<EltwiseLayerCPUTestParamsSet> obj) {
|
||||
LayerTestsDefinitions::EltwiseTestParams basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(basicParamsSet, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << LayerTestsDefinitions::EltwiseLayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::EltwiseTestParams>(
|
||||
basicParamsSet, 0));
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() {
|
||||
LayerTestsDefinitions::EltwiseTestParams basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(basicParamsSet, cpuParams) = this->GetParam();
|
||||
|
||||
std::vector<std::vector<size_t>> inputShapes;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::helpers::InputLayerType secondaryInputType;
|
||||
CommonTestUtils::OpType opType;
|
||||
ngraph::helpers::EltwiseTypes eltwiseType;
|
||||
std::map<std::string, std::string> additional_config;
|
||||
std::tie(inputShapes, eltwiseType, secondaryInputType, opType, netPrecision, inPrc, outPrc, inLayout, targetDevice, additional_config) = basicParamsSet;
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::string isaType;
|
||||
if (with_cpu_x86_avx512f()) {
|
||||
isaType = "jit_avx512";
|
||||
} else if (with_cpu_x86_avx2()) {
|
||||
isaType = "jit_avx2";
|
||||
} else if (with_cpu_x86_sse42()) {
|
||||
isaType = "jit_sse42";
|
||||
} else {
|
||||
isaType = "ref";
|
||||
}
|
||||
selectedType = isaType + "_" + "FP32";
|
||||
|
||||
std::vector<size_t> inputShape1, inputShape2;
|
||||
if (inputShapes.size() == 1) {
|
||||
inputShape1 = inputShape2 = inputShapes.front();
|
||||
} else if (inputShapes.size() == 2) {
|
||||
inputShape1 = inputShapes.front();
|
||||
inputShape2 = inputShapes.back();
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input shapes";
|
||||
}
|
||||
|
||||
configuration.insert(additional_config.begin(), additional_config.end());
|
||||
auto input = ngraph::builder::makeParams(ngPrc, {inputShape1});
|
||||
|
||||
std::vector<size_t> shape_input_secondary;
|
||||
switch (opType) {
|
||||
case CommonTestUtils::OpType::SCALAR: {
|
||||
shape_input_secondary = std::vector<size_t>({1});
|
||||
break;
|
||||
}
|
||||
case CommonTestUtils::OpType::VECTOR:
|
||||
shape_input_secondary = inputShape2;
|
||||
break;
|
||||
default:
|
||||
FAIL() << "Unsupported Secondary operation type";
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> secondaryInput;
|
||||
if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE ||
|
||||
eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD ||
|
||||
eltwiseType == ngraph::helpers::EltwiseTypes::MOD) {
|
||||
std::vector<float> data(ngraph::shape_size(shape_input_secondary));
|
||||
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape_input_secondary));
|
||||
for (float &i : data) {
|
||||
if (i == 0) {
|
||||
i = 1;
|
||||
}
|
||||
}
|
||||
secondaryInput = ngraph::builder::makeConstant(ngPrc, shape_input_secondary, data);
|
||||
} else {
|
||||
secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shape_input_secondary);
|
||||
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
input.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
|
||||
}
|
||||
}
|
||||
|
||||
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
|
||||
eltwise->get_rt_info() = CPUTestsBase::setCPUInfo(inFmts, outFmts, priority);
|
||||
function = std::make_shared<ngraph::Function>(eltwise, input, "Eltwise");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(EltwiseLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
CheckCPUImpl(executableNetwork, "Eltwise", inFmts, outFmts, selectedType);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
|
||||
ngraph::helpers::InputLayerType::CONSTANT,
|
||||
ngraph::helpers::InputLayerType::PARAMETER,
|
||||
};
|
||||
|
||||
std::vector<CommonTestUtils::OpType> opTypes = {
|
||||
CommonTestUtils::OpType::VECTOR,
|
||||
};
|
||||
|
||||
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
|
||||
ngraph::helpers::EltwiseTypes::ADD,
|
||||
ngraph::helpers::EltwiseTypes::MULTIPLY,
|
||||
// TODO: Disabled because memory formats filter is not propogated through ngraph transformations
|
||||
// ngraph::helpers::EltwiseTypes::SUBTRACT,
|
||||
// ngraph::helpers::EltwiseTypes::DIVIDE,
|
||||
ngraph::helpers::EltwiseTypes::FLOOR_MOD,
|
||||
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
|
||||
};
|
||||
|
||||
std::map<std::string, std::string> additional_config = {};
|
||||
|
||||
std::vector<CPUSpecificParams> filterCPUSpecificParams(std::vector<CPUSpecificParams>& paramsVector) {
|
||||
auto adjustBlockedFormatByIsa = [](std::vector<cpu_memory_format_t>& formats) {
|
||||
for (int i = 0; i < formats.size(); i++) {
|
||||
if (formats[i] == nChw16c)
|
||||
formats[i] = nChw8c;
|
||||
if (formats[i] == nCdhw16c)
|
||||
formats[i] = nCdhw8c;
|
||||
}
|
||||
};
|
||||
|
||||
if (!with_cpu_x86_avx512f()) {
|
||||
for (auto& param : paramsVector) {
|
||||
adjustBlockedFormatByIsa(std::get<0>(param));
|
||||
adjustBlockedFormatByIsa(std::get<1>(param));
|
||||
}
|
||||
}
|
||||
|
||||
return paramsVector;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inShapes_4D = {
|
||||
{{2, 4, 4, 1}},
|
||||
{{2, 17, 5, 4}},
|
||||
{{2, 17, 5, 4}, {1, 17, 1, 1}},
|
||||
{{2, 17, 5, 1}, {1, 17, 1, 4}},
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> cpuParams_4D = {
|
||||
CPUSpecificParams({nChw16c, nChw16c}, {nChw16c}, {}, {}),
|
||||
CPUSpecificParams({nhwc, nhwc}, {nhwc}, {}, {}),
|
||||
CPUSpecificParams({nchw, nchw}, {nchw}, {}, {})
|
||||
};
|
||||
|
||||
const auto params_4D_FP32 = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_4D),
|
||||
::testing::ValuesIn(eltwiseOpTypes),
|
||||
::testing::ValuesIn(secondaryInputTypes),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32, EltwiseLayerCPUTest, params_4D_FP32, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inShapes_5D = {
|
||||
{{2, 4, 3, 4, 1}},
|
||||
{{2, 17, 7, 5, 4}},
|
||||
{{2, 17, 6, 5, 4}, {1, 17, 6, 1, 1}},
|
||||
{{2, 17, 6, 5, 1}, {1, 17, 1, 1, 4}},
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> cpuParams_5D = {
|
||||
CPUSpecificParams({nCdhw16c, nCdhw16c}, {nCdhw16c}, {}, {}),
|
||||
CPUSpecificParams({ndhwc, ndhwc}, {ndhwc}, {}, {}),
|
||||
CPUSpecificParams({ncdhw, ncdhw}, {ncdhw}, {}, {})
|
||||
};
|
||||
|
||||
const auto params_5D_FP32 = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_5D),
|
||||
::testing::ValuesIn(eltwiseOpTypes),
|
||||
::testing::ValuesIn(secondaryInputTypes),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32, EltwiseLayerCPUTest, params_5D_FP32, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inShapes_4D_Blocked_Planar = {
|
||||
{{2, 17, 31, 3}, {2, 1, 31, 3}},
|
||||
{{2, 17, 5, 1}, {2, 1, 1, 4}},
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> cpuParams_4D_Blocked_Planar = {
|
||||
CPUSpecificParams({nChw16c, nchw}, {nChw16c}, {}, {}),
|
||||
};
|
||||
|
||||
const auto params_4D_FP32_Blocked_Planar = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_4D_Blocked_Planar),
|
||||
::testing::ValuesIn(eltwiseOpTypes),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Blocked_Planar)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_Blocked_Planar, EltwiseLayerCPUTest, params_4D_FP32_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inShapes_4D_Planar_Blocked = {
|
||||
{{2, 1, 31, 3}, {2, 17, 31, 3}},
|
||||
{{2, 1, 1, 4}, {2, 17, 5, 1}},
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> cpuParams_4D_Planar_Blocked = {
|
||||
CPUSpecificParams({nchw, nChw16c}, {nChw16c}, {}, {}),
|
||||
};
|
||||
|
||||
const auto params_4D_FP32_Planar_Blocked = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_4D_Planar_Blocked),
|
||||
::testing::ValuesIn(eltwiseOpTypes),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Planar_Blocked)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_Planar_Blocked, EltwiseLayerCPUTest, params_4D_FP32_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inShapes_5D_Blocked_Planar = {
|
||||
{{2, 17, 31, 4, 3}, {2, 1, 31, 1, 3}},
|
||||
{{2, 17, 5, 3, 1}, {2, 1, 1, 3, 4}},
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> cpuParams_5D_Blocked_Planar = {
|
||||
CPUSpecificParams({nCdhw16c, ncdhw}, {nCdhw16c}, {}, {}),
|
||||
};
|
||||
|
||||
const auto params_5D_FP32_Blocked_Planar = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_5D_Blocked_Planar),
|
||||
::testing::ValuesIn(eltwiseOpTypes),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Blocked_Planar)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_Blocked_Planar, EltwiseLayerCPUTest, params_5D_FP32_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inShapes_5D_Planar_Blocked = {
|
||||
{{2, 1, 31, 1, 3}, {2, 17, 31, 4, 3}},
|
||||
{{2, 1, 1, 3, 4}, {2, 17, 5, 3, 1}},
|
||||
};
|
||||
|
||||
std::vector<CPUSpecificParams> cpuParams_5D_Planar_Blocked = {
|
||||
CPUSpecificParams({ncdhw, nCdhw16c}, {nCdhw16c}, {}, {}),
|
||||
};
|
||||
|
||||
const auto params_5D_FP32_Planar_Blocked = ::testing::Combine(
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inShapes_5D_Planar_Blocked),
|
||||
::testing::ValuesIn(eltwiseOpTypes),
|
||||
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
|
||||
::testing::ValuesIn(opTypes),
|
||||
::testing::Values(InferenceEngine::Precision::FP32),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(additional_config)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Planar_Blocked)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_Planar_Blocked, EltwiseLayerCPUTest, params_5D_FP32_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -0,0 +1,184 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <debug.h>
|
||||
#include <functional_test_utils/layer_test_utils.hpp>
|
||||
#include <ngraph_functions/builders.hpp>
|
||||
#include <ie_precision.hpp>
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/precision_utils.hpp"
|
||||
#include "functional_test_utils/skip_tests_config.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
using namespace CPUTestUtils;
|
||||
using InferenceEngine::Precision;
|
||||
using ngraph::helpers::EltwiseTypes;
|
||||
using FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
std::vector<std::vector<size_t>>, // Input shapes
|
||||
std::vector<InferenceEngine::Precision>, // Input precisions
|
||||
std::vector<EltwiseTypes>, // Eltwise operations
|
||||
bool, // With quantization
|
||||
std::string // Device name
|
||||
> EltwiseChainTuple;
|
||||
|
||||
class EltwiseChainTest : public testing::WithParamInterface<EltwiseChainTuple>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<EltwiseChainTuple> &obj) {
|
||||
std::vector<std::vector<size_t>> inputShapes;
|
||||
std::vector<InferenceEngine::Precision> inputPrecisions;
|
||||
std::vector<EltwiseTypes> eltwiseOpTypes;
|
||||
bool withQuantization;
|
||||
std::string targetName;
|
||||
std::tie(inputShapes, inputPrecisions, eltwiseOpTypes, withQuantization, targetName) = obj.param;
|
||||
std::ostringstream results;
|
||||
|
||||
for (int i = 0; i < inputShapes.size(); i++) {
|
||||
results << "IS" << std::to_string(i) << "=" << CommonTestUtils::vec2str(inputShapes[i]) << "_";
|
||||
}
|
||||
for (int i = 0; i < inputPrecisions.size(); i++) {
|
||||
results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i].name() << "_";
|
||||
}
|
||||
for (int i = 0; i < eltwiseOpTypes.size(); i++) {
|
||||
results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_";
|
||||
}
|
||||
|
||||
results << "WithQuant=" << withQuantization << "_";
|
||||
results << "targetDevice=" << targetName;
|
||||
|
||||
return results.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() {
|
||||
threshold = 0.1f;
|
||||
|
||||
std::vector<std::vector<size_t>> inputShapes;
|
||||
std::vector<InferenceEngine::Precision> inputPrecisions;
|
||||
std::vector<EltwiseTypes> eltwiseOpTypes;
|
||||
bool withQuantization;
|
||||
std::tie(inputShapes, inputPrecisions, eltwiseOpTypes, withQuantization, targetDevice) = this->GetParam();
|
||||
|
||||
auto ngraphParam = ngraph::builder::makeParams(convertIE2nGraphPrc(inputPrecisions[0]), {inputShapes[0]});
|
||||
|
||||
std::vector<std::shared_ptr<ngraph::Node>> ngraphInputs;
|
||||
for (int i = 1; i < inputPrecisions.size(); i++) {
|
||||
std::vector<float> ngraphInput1Data(ngraph::shape_size(ngraph::Shape{inputShapes[i]}));
|
||||
ngraphInputs.push_back(ngraph::builder::makeConstant(convertIE2nGraphPrc(inputPrecisions[i]), ngraph::Shape{inputShapes[i]},
|
||||
ngraphInput1Data, true));
|
||||
}
|
||||
|
||||
if (withQuantization) {
|
||||
std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
|
||||
eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
|
||||
for (int i = 1; i < eltwiseOpTypes.size() - 1; i++) {
|
||||
eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
|
||||
}
|
||||
|
||||
std::vector<size_t> constShape(inputShapes[0].size(), 1);
|
||||
constShape[1] = inputShapes[0][1];
|
||||
auto fq = ngraph::builder::makeFakeQuantize(eltwiseOps[eltwiseOps.size() - 1],
|
||||
::ngraph::element::Type(::ngraph::element::Type_t::f32),
|
||||
256, constShape);
|
||||
|
||||
eltwiseOps.push_back(ngraph::builder::makeEltwise(fq, ngraphInputs[eltwiseOpTypes.size() - 1], eltwiseOpTypes[eltwiseOpTypes.size() - 1]));
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
|
||||
function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain_fq");
|
||||
} else {
|
||||
std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
|
||||
eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
|
||||
for (int i = 1; i < eltwiseOpTypes.size(); i++) {
|
||||
eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
|
||||
}
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
|
||||
function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(EltwiseChainTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inputShapes {
|
||||
{
|
||||
{{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}},
|
||||
{{1, 48, 5, 6}, {1, 48, 1, 1}, {1, 48, 5, 6}, {1, 1, 5, 6}},
|
||||
{{1, 72, 28, 28}, {1, 72, 1, 1}, {1, 72, 1, 1}, {1, 72, 1, 1}},
|
||||
{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
|
||||
{{1, 2, 3}, {3}, {3}, {3}},
|
||||
{{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}},
|
||||
{{3, 12, 5, 5}, {1, 12, 5, 1}, {3, 1, 1, 1}, {3, 12, 5, 5}},
|
||||
{{1, 1, 1, 1}, {1, 12, 5, 1}, {3, 12, 1, 5}, {3, 12, 5, 1}},
|
||||
{{1, 1, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}}
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<std::vector<InferenceEngine::Precision>> inputPrecisions = {
|
||||
{ Precision::FP32, Precision::FP32, Precision::FP32, Precision::FP32 },
|
||||
{ Precision::I32, Precision::I32, Precision::I32, Precision::I32 }
|
||||
};
|
||||
|
||||
std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
|
||||
{ EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT },
|
||||
{ EltwiseTypes::DIVIDE, EltwiseTypes::POWER, EltwiseTypes::ADD },
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_EltwiseChain, EltwiseChainTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(inputPrecisions),
|
||||
::testing::ValuesIn(eltwiseOps),
|
||||
::testing::Values(false),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
EltwiseChainTest::getTestCaseName);
|
||||
|
||||
std::vector<std::vector<std::vector<size_t>>> inputShapesFQ {
|
||||
{
|
||||
{{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}},
|
||||
{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
|
||||
{{2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}},
|
||||
{{2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}},
|
||||
{{2, 5, 7, 5}, {2, 5, 1, 5}, {2, 5, 7, 5}, {2, 5, 7, 5}},
|
||||
{{2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}},
|
||||
{{2, 256, 7, 5}, {2, 256, 7, 5}, {2, 256, 1, 5}, {2, 256, 7, 5}},
|
||||
{{1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}},
|
||||
{{1, 12, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}},
|
||||
{{1, 12, 1, 1, 6}, {1, 12, 5, 5, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 5, 1}},
|
||||
{{1, 12, 1, 1, 1}, {1, 12, 5, 1, 7}, {3, 12, 1, 5, 7}, {3, 12, 5, 1, 7}},
|
||||
{{1, 7, 1, 1, 12}, {1, 7, 5, 1, 12}, {3, 7, 1, 5, 12}, {3, 7, 5, 1, 12}},
|
||||
{{1, 7, 1, 1, 12, 3, 7}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 3, 7}, {3, 7, 5, 1, 12, 3, 7}},
|
||||
{{1, 7, 1, 1, 12, 3, 1}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 1, 7}, {3, 7, 5, 1, 12, 3, 1}}
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<std::vector<InferenceEngine::Precision>> inputPrecisionsFQ {
|
||||
{ Precision::FP32, Precision::FP32, Precision::FP32, Precision::FP32 }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_EltwiseChainWithFQ, EltwiseChainTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesFQ),
|
||||
::testing::ValuesIn(inputPrecisionsFQ),
|
||||
::testing::ValuesIn(eltwiseOps),
|
||||
::testing::Values(true),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
EltwiseChainTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -93,10 +93,25 @@ void EltwiseLayerTest::SetUp() {
|
||||
FAIL() << "Unsupported Secondary operation type";
|
||||
}
|
||||
|
||||
auto secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shape_input_secondary);
|
||||
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
input.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
|
||||
std::shared_ptr<ngraph::Node> secondaryInput;
|
||||
if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE ||
|
||||
eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD ||
|
||||
eltwiseType == ngraph::helpers::EltwiseTypes::MOD) {
|
||||
std::vector<float> data(ngraph::shape_size(shape_input_secondary));
|
||||
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape_input_secondary));
|
||||
for (float &i : data) {
|
||||
if (i == 0) {
|
||||
i = 1;
|
||||
}
|
||||
}
|
||||
secondaryInput = ngraph::builder::makeConstant(ngPrc, shape_input_secondary, data);
|
||||
} else {
|
||||
secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shape_input_secondary);
|
||||
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
|
||||
input.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
|
||||
}
|
||||
}
|
||||
|
||||
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
|
||||
function = std::make_shared<ngraph::Function>(eltwise, input, "Eltwise");
|
||||
}
|
||||
|
@ -564,6 +564,9 @@ std::ostream& operator<<(std::ostream & os, ngraph::helpers::EltwiseTypes type)
|
||||
case ngraph::helpers::EltwiseTypes::FLOOR_MOD:
|
||||
os << "FloorMod";
|
||||
break;
|
||||
case ngraph::helpers::EltwiseTypes::MOD:
|
||||
os << "Mod";
|
||||
break;
|
||||
default:
|
||||
throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
|
||||
}
|
||||
|
@ -261,655 +261,6 @@ std::string select_op(eltwise_test_params::opType op) {
|
||||
return str_op;
|
||||
}
|
||||
|
||||
class MKLDNNGraphEltwise3InputsTests: public TestsCommon,
|
||||
public WithParamInterface<eltwise_test_params> {
|
||||
std::string model_t = R"V0G0N(
|
||||
<net name="EltwiseOnly" version="3" precision="FP32" batch="1">
|
||||
<layers>
|
||||
<layer name="in1" type="Input" precision="FP32" id="1">
|
||||
<output>
|
||||
<port id="1">__SRC_DIMS_1__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="in2" type="Input" precision="FP32" id="2">
|
||||
<output>
|
||||
<port id="2">__SRC_DIMS_2__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="in3" type="Input" precision="FP32" id="3">
|
||||
<output>
|
||||
<port id="3">__SRC_DIMS_3__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="con" id="4" type="Eltwise" precision="FP32">
|
||||
<data operation="_OP_" _COEFF_/>
|
||||
<input>
|
||||
<port id="1">__SRC_DIMS_1__
|
||||
</port>
|
||||
<port id="2">__SRC_DIMS_2__
|
||||
</port>
|
||||
<port id="3">__SRC_DIMS_3__
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="4">__SRC_DIMS__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="4" to-port="2"/>
|
||||
<edge from-layer="3" from-port="3" to-layer="4" to-port="3"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
|
||||
protected:
|
||||
std::string getModel(eltwise_test_params p) {
|
||||
std::string model = model_t;
|
||||
std::string op = select_op(p.op);
|
||||
|
||||
std::string src_dims1;
|
||||
for (auto &dim : p.dims1) {
|
||||
src_dims1 += "\n <dim>";
|
||||
src_dims1 += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
|
||||
|
||||
std::string src_dims2;
|
||||
for (auto &dim : p.dims2) {
|
||||
src_dims2 += "\n <dim>";
|
||||
src_dims2 += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS_2__", src_dims2);
|
||||
|
||||
std::string src_dims3;
|
||||
for (auto &dim : p.dims3) {
|
||||
src_dims3 += "\n <dim>";
|
||||
src_dims3 += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS_3__", src_dims3);
|
||||
|
||||
std::string src_dims;
|
||||
std::vector<size_t> dims = p.dims1;
|
||||
for (int i = 0; i < dims.size(); i++) {
|
||||
dims[i] = std::max(p.dims1[i], p.dims2[i]);
|
||||
dims[i] = std::max(dims[i], p.dims3[i]);
|
||||
}
|
||||
for (auto &dim : dims) {
|
||||
src_dims += "\n <dim>";
|
||||
src_dims += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
|
||||
|
||||
std::string scale;
|
||||
if (!p.scales.empty()) {
|
||||
scale = std::string("coeff=\"") + to_string_c_locale(p.scales) + std::string("\"");
|
||||
}
|
||||
REPLACE_WITH_STR(model, "_OP_", op);
|
||||
REPLACE_WITH_STR(model, "_COEFF_", scale);
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
try {
|
||||
TestsCommon::SetUp();
|
||||
eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
|
||||
std::string model = getModel(p);
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
|
||||
|
||||
MKLDNNGraphTestClass graph;
|
||||
graph.CreateGraph(network);
|
||||
|
||||
auto& nodes = graph.getNodes();
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
|
||||
ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
|
||||
for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
|
||||
p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
|
||||
}
|
||||
ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
|
||||
ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
|
||||
}
|
||||
}
|
||||
InferenceEngine::SizeVector dims_src1 = p.dims1;
|
||||
InferenceEngine::Layout layout1 = InferenceEngine::ANY;
|
||||
switch (p.dims1.size()) {
|
||||
case 4:
|
||||
layout1 = InferenceEngine::NCHW;
|
||||
break;
|
||||
case 5:
|
||||
layout1 = InferenceEngine::NCDHW;
|
||||
break;
|
||||
}
|
||||
InferenceEngine::SizeVector dims_src2 = p.dims2;
|
||||
InferenceEngine::Layout layout2 = InferenceEngine::ANY;
|
||||
switch (p.dims2.size()) {
|
||||
case 4:
|
||||
layout2 = InferenceEngine::NCHW;
|
||||
break;
|
||||
case 5:
|
||||
layout2 = InferenceEngine::NCDHW;
|
||||
break;
|
||||
}
|
||||
InferenceEngine::SizeVector dims_src3 = p.dims3;
|
||||
InferenceEngine::Layout layout3 = InferenceEngine::ANY;
|
||||
switch (p.dims3.size()) {
|
||||
case 4:
|
||||
layout3 = InferenceEngine::NCHW;
|
||||
break;
|
||||
case 5:
|
||||
layout3 = InferenceEngine::NCDHW;
|
||||
break;
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout1});
|
||||
src1->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
|
||||
|
||||
if (srcPtr1 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
CommonTestUtils::fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
|
||||
InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout2});
|
||||
src2->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
|
||||
|
||||
if (srcPtr2 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
CommonTestUtils::fill_data_sine(src2->buffer(), src2->size(), 0.1, 0.9, 2);
|
||||
InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, layout3});
|
||||
src3->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
|
||||
|
||||
if (srcPtr3 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
CommonTestUtils::fill_data_sine(src3->buffer(), src3->size(), 0.1, 0.9, 3);
|
||||
InferenceEngine::BlobMap srcs;
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
|
||||
|
||||
InferenceEngine::OutputsDataMap out;
|
||||
out = network.getOutputsInfo();
|
||||
InferenceEngine::BlobMap outputBlobs;
|
||||
|
||||
std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr output;
|
||||
output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
|
||||
output->allocate();
|
||||
outputBlobs[item.first] = output;
|
||||
|
||||
graph.Infer(srcs, outputBlobs);
|
||||
|
||||
InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
|
||||
dst_ref.allocate();
|
||||
|
||||
std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2, *srcPtr3};
|
||||
|
||||
ref_eltwise(src_vec, dst_ref, p);
|
||||
|
||||
compare(*output, dst_ref, 0.0005f);
|
||||
} catch (const InferenceEngine::details::InferenceEngineException &e) {
|
||||
FAIL() << e.what();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MKLDNNGraphEltwise3InputsTests, TestsEltwise) {}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TestsEltwise, MKLDNNGraphEltwise3InputsTests,
|
||||
::testing::Values(
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(3, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}
|
||||
} },
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(3, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}
|
||||
} },
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(3, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}
|
||||
} },
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(3, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}
|
||||
} },
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(3, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}
|
||||
} },
|
||||
eltwise_test_params{{1, 32, 16, 16, 16},{1, 32, 16, 16, 16},{1, 32, 16, 16, 16}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
|
||||
ASSERT_EQ(3, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(1).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(2).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}
|
||||
} },
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
|
||||
));
|
||||
|
||||
class MKLDNNGraphEltwise2InputsTests: public TestsCommon,
|
||||
public WithParamInterface<eltwise_test_params> {
|
||||
std::string model_t = R"V0G0N(
|
||||
<net name="EltwiseOnly" version="2" precision="FP32">
|
||||
<layers>
|
||||
<layer name="in1" type="Input" precision="FP32" id="1">
|
||||
<output>
|
||||
<port id="1">__SRC_DIMS_1__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="in2" type="Input" precision="FP32" id="2">
|
||||
<output>
|
||||
<port id="2">__SRC_DIMS_2__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="con" id="3" type="Eltwise" precision="FP32">
|
||||
<data operation="_OP_" _COEFF_/>
|
||||
<input>
|
||||
<port id="1">__SRC_DIMS_1__
|
||||
</port>
|
||||
<port id="2">__SRC_DIMS_2__
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="3">__SRC_DIMS__
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
|
||||
<edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
|
||||
protected:
|
||||
std::string getModel(eltwise_test_params p) {
|
||||
std::string model = model_t;
|
||||
std::string op = select_op(p.op);
|
||||
|
||||
std::string src_dims1 = "";
|
||||
for (auto &dim : p.dims1) {
|
||||
src_dims1 += "\n <dim>";
|
||||
src_dims1 += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
|
||||
|
||||
std::string src_dims2 = "";
|
||||
for (auto &dim : p.dims2) {
|
||||
src_dims2 += "\n <dim>";
|
||||
src_dims2 += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS_2__", src_dims2);
|
||||
|
||||
std::string src_dims;
|
||||
std::vector<size_t> dims = (p.dims1.size() >= p.dims2.size()) ? p.dims1 : p.dims2;
|
||||
int i = dims.size() - 1, j = p.dims1.size() - 1, k = p.dims2.size() - 1;
|
||||
for (; j >= 0 && k >= 0; i--, j--, k-- ) {
|
||||
dims[i] = std::max(p.dims1[j], p.dims2[k]);
|
||||
}
|
||||
|
||||
for (auto &dim : dims) {
|
||||
src_dims += "\n <dim>";
|
||||
src_dims += std::to_string(dim) + "</dim>";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
|
||||
|
||||
std::string scale;
|
||||
if (!p.scales.empty()) {
|
||||
scale = std::string("coeff=\"") + to_string_c_locale(p.scales) + std::string("\"");
|
||||
}
|
||||
REPLACE_WITH_STR(model, "_OP_", op);
|
||||
REPLACE_WITH_STR(model, "_COEFF_", scale);
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
try {
|
||||
TestsCommon::SetUp();
|
||||
eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
|
||||
std::string model = getModel(p);
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
|
||||
|
||||
MKLDNNGraphTestClass graph;
|
||||
graph.CreateGraph(network);
|
||||
|
||||
auto& nodes = graph.getNodes();
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
|
||||
ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
|
||||
for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
|
||||
p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
|
||||
}
|
||||
ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
|
||||
ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
|
||||
}
|
||||
}
|
||||
InferenceEngine::SizeVector dims_src1 = p.dims1;
|
||||
InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::TensorDesc::getLayoutByDims(p.dims1) });
|
||||
src1->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
|
||||
|
||||
if (srcPtr1 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
|
||||
CommonTestUtils::fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
|
||||
|
||||
InferenceEngine::SizeVector dims_src2 = p.dims2;
|
||||
InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::TensorDesc::getLayoutByDims(p.dims2) });
|
||||
src2->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
|
||||
|
||||
if (srcPtr2 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
|
||||
CommonTestUtils::fill_data_sine(src2->buffer(), src2->size(), 0.1, 0.9, 2);
|
||||
|
||||
InferenceEngine::BlobMap srcs;
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
|
||||
|
||||
InferenceEngine::OutputsDataMap out;
|
||||
out = network.getOutputsInfo();
|
||||
InferenceEngine::BlobMap outputBlobs;
|
||||
|
||||
std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr output;
|
||||
output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
|
||||
output->allocate();
|
||||
outputBlobs[item.first] = output;
|
||||
|
||||
graph.Infer(srcs, outputBlobs);
|
||||
|
||||
InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
|
||||
dst_ref.allocate();
|
||||
|
||||
std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2};
|
||||
|
||||
ref_eltwise(src_vec, dst_ref, p);
|
||||
|
||||
compare(*output, dst_ref, 0.0005f);
|
||||
} catch (const InferenceEngine::details::InferenceEngineException &e) {
|
||||
FAIL() << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST_P(MKLDNNGraphEltwise2InputsTests, TestsEltwise) {}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TestsEltwise, MKLDNNGraphEltwise2InputsTests,
|
||||
::testing::Values(
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Squared_diff, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Less, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Less_equal, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Greater, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Greater_equal, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Equal, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Not_equal, "", 3, MKLDNNPlugin::impl_desc_type::ref}
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TestsBroadcasting, MKLDNNGraphEltwise2InputsTests,
|
||||
::testing::Values(
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Prod, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Max, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Min, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Sub, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Div, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Squared_diff, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_AND, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_OR, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_XOR, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
// batch broadcasting
|
||||
eltwise_test_params{{1, 3, 224},{224, 3, 1},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{2, 3, 1, 2},{1, 3, 2, 1},{}, eltwise_test_params::opType::Sub, "", 1, MKLDNNPlugin::impl_desc_type::ref}
|
||||
|
||||
));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
TestsDiffDims, MKLDNNGraphEltwise2InputsTests,
|
||||
::testing::Values(
|
||||
eltwise_test_params{{},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3},{3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3},{3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref}
|
||||
));
|
||||
|
||||
class MKLDNNGraphEltwiseDynBatchTests: public MKLDNNGraphEltwise3InputsTests {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
try {
|
||||
TestsCommon::SetUp();
|
||||
eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
|
||||
std::string model = getModel(p);
|
||||
size_t MB = p.dims1[0];
|
||||
if (MB < 2)
|
||||
MB = 2;
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
|
||||
|
||||
auto implNet = dynamic_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
|
||||
ASSERT_NE(nullptr, implNet) << "Failed to cast ICNNNetwork to CNNNetworkImpl";
|
||||
InferenceEngine::ResponseDesc resp;
|
||||
InferenceEngine::StatusCode sts = implNet->setBatchSizeReshape(MB, &resp);
|
||||
ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
|
||||
|
||||
MKLDNNGraphTestClass graph;
|
||||
graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
|
||||
graph.CreateGraph(network);
|
||||
|
||||
InferenceEngine::SizeVector dims_src1 = p.dims1;
|
||||
InferenceEngine::Layout layout1 = InferenceEngine::ANY;
|
||||
switch (p.dims1.size()) {
|
||||
case 4:
|
||||
layout1 = InferenceEngine::NCHW;
|
||||
break;
|
||||
case 5:
|
||||
layout1 = InferenceEngine::NCDHW;
|
||||
break;
|
||||
}
|
||||
InferenceEngine::SizeVector dims_src2 = p.dims2;
|
||||
InferenceEngine::Layout layout2 = InferenceEngine::ANY;
|
||||
switch (p.dims2.size()) {
|
||||
case 4:
|
||||
layout2 = InferenceEngine::NCHW;
|
||||
break;
|
||||
case 5:
|
||||
layout2 = InferenceEngine::NCDHW;
|
||||
break;
|
||||
}
|
||||
InferenceEngine::SizeVector dims_src3 = p.dims3;
|
||||
InferenceEngine::Layout layout3 = InferenceEngine::ANY;
|
||||
switch (p.dims3.size()) {
|
||||
case 4:
|
||||
layout3 = InferenceEngine::NCHW;
|
||||
break;
|
||||
case 5:
|
||||
layout3 = InferenceEngine::NCDHW;
|
||||
break;
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout1});
|
||||
src1->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
|
||||
|
||||
if (srcPtr1 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
|
||||
fill_data(src1->buffer(), src1->size());
|
||||
InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout2});
|
||||
src2->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
|
||||
|
||||
if (srcPtr2 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
fill_data(src2->buffer(), src2->size());
|
||||
InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, layout3});
|
||||
src3->allocate();
|
||||
|
||||
InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
|
||||
|
||||
if (srcPtr3 == nullptr)
|
||||
FAIL() << "Cannot cast blob to TBlob<float>.";
|
||||
fill_data(src3->buffer(), src3->size());
|
||||
InferenceEngine::BlobMap srcs;
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
|
||||
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
|
||||
|
||||
InferenceEngine::OutputsDataMap out;
|
||||
out = network.getOutputsInfo();
|
||||
InferenceEngine::BlobMap outputBlobs;
|
||||
|
||||
std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr output;
|
||||
output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
|
||||
output->allocate();
|
||||
outputBlobs[item.first] = output;
|
||||
|
||||
|
||||
auto checkDepthwise = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
|
||||
return node->getType() == MKLDNNPlugin::Eltwise;
|
||||
};
|
||||
|
||||
graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkDepthwise);
|
||||
graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkDepthwise);
|
||||
} catch (const InferenceEngine::details::InferenceEngineException &e) {
|
||||
FAIL() << e.what();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MKLDNNGraphEltwiseDynBatchTests, TestsDynBatchEltwise) {}
|
||||
|
||||
// TODO: rewrite to ngraph to have reshape functionality
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
DISABLED_TestsDynBatchEltwise, MKLDNNGraphEltwiseDynBatchTests,
|
||||
::testing::Values(
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Pow, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
|
||||
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
|
||||
));
|
||||
|
||||
struct precisions_test_2params {
|
||||
struct {
|
||||
std::string precision0;
|
||||
@ -1022,7 +373,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TestsEltwise2Precisions, MKLDNNGraphEltwise2PrecisionsTests,
|
||||
::testing::Values(
|
||||
precisions_test_2params{ {"FP32", "FP32"}, 4, 0 },
|
||||
precisions_test_2params{ { "U8", "FP32"}, 5, 1 },
|
||||
precisions_test_2params{ {"FP32", "U8"}, 5, 1 },
|
||||
precisions_test_2params{ { "U8", "U8"}, 6, 2 }
|
||||
precisions_test_2params{ { "U8", "FP32"}, 4, 0 },
|
||||
precisions_test_2params{ {"FP32", "U8"}, 4, 0 },
|
||||
precisions_test_2params{ { "U8", "U8"}, 4, 0 }
|
||||
));
|
||||
|
@ -116,13 +116,12 @@ protected:
|
||||
graph.CreateGraph(network);
|
||||
auto& nodes = graph.getNodes();
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
if (nodes[i]->getType() == MKLDNNPlugin::Power) {
|
||||
if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
|
||||
ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
|
||||
for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
|
||||
p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
|
||||
}
|
||||
ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
|
||||
ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,25 +173,16 @@ INSTANTIATE_TEST_CASE_P(
|
||||
power_test_params{
|
||||
{1, 3, 13, 13}, 1, 2, 0.5f, 3, MKLDNNPlugin::impl_desc_type::unknown, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}}},
|
||||
power_test_params{{1, 1, 23, 23}, 3, 8, 2, 3 },
|
||||
power_test_params{{1, 8, 23, 23}, 8, 2, 1, 3 },
|
||||
@ -306,7 +296,7 @@ protected:
|
||||
outputBlobs[item.first] = output;
|
||||
|
||||
auto checkPower = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
|
||||
return node->getType() == MKLDNNPlugin::Power;
|
||||
return node->getType() == MKLDNNPlugin::Eltwise;
|
||||
};
|
||||
graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkPower);
|
||||
graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkPower);
|
||||
@ -325,25 +315,16 @@ INSTANTIATE_TEST_CASE_P(
|
||||
power_test_params{
|
||||
{1, 3, 13, 13}, 1, 2, 0.5f, 3, MKLDNNPlugin::impl_desc_type::unknown, {
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
},
|
||||
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
|
||||
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
|
||||
ASSERT_EQ(1, impl.getConfig().inConfs.size());
|
||||
ASSERT_EQ(1, impl.getConfig().outConfs.size());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
|
||||
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
|
||||
}}},
|
||||
power_test_params{{1, 1, 23, 23}, 3, 8, 2, 3 },
|
||||
power_test_params{{1, 8, 23, 23}, 8, 2, 1, 3 },
|
||||
|
@ -257,14 +257,14 @@ protected:
|
||||
ASSERT_EQ(nodes.size(), 3);
|
||||
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
|
||||
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
|
||||
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
|
||||
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
|
||||
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
|
||||
} else {
|
||||
ASSERT_EQ(nodes.size(), 5);
|
||||
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
|
||||
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
|
||||
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution);
|
||||
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
|
||||
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
|
||||
ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
|
||||
ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
|
||||
}
|
||||
|
@ -186,10 +186,9 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReorders) {
|
||||
for (auto &node : nodes) {
|
||||
if (node->getType() == MKLDNNPlugin::Reorder) {
|
||||
reorders_num++;
|
||||
ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(reorders_num, 1);
|
||||
ASSERT_EQ(reorders_num, 3);
|
||||
}
|
||||
|
||||
TEST_F(MKLDNNGraphStructureTests, TestRedundantReorderBeforeConvWithC_3) {
|
||||
@ -3781,7 +3780,7 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForXceptionTopology) {
|
||||
weights->allocate();
|
||||
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
|
||||
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
|
||||
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
|
||||
@ -4020,7 +4019,7 @@ TEST_F(MKLDNNGraphStructureTests, TestFailedPartPlateRecognitionBarrier0001) {
|
||||
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
|
||||
|
||||
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
|
||||
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
|
||||
@ -4629,7 +4628,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
|
||||
memset((float *) weights->buffer(), 0, weights->size());
|
||||
|
||||
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
|
||||
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
network = core.ReadNetwork(model, weights_ptr);
|
||||
@ -5127,7 +5126,7 @@ TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWithConcat) {
|
||||
weights->allocate();
|
||||
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
|
||||
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
|
||||
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
|
||||
@ -5412,7 +5411,7 @@ TEST_F(MKLDNNGraphStructureTests, TestRefPoolingWithConcat) {
|
||||
weights->allocate();
|
||||
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
|
||||
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
|
||||
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
|
||||
@ -5566,7 +5565,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2DepthwiseOpFusing) {
|
||||
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
|
||||
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
|
||||
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution);
|
||||
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
|
||||
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
|
||||
ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
|
||||
ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
|
||||
|
||||
@ -5704,7 +5703,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2EltwiseOpFusing) {
|
||||
ASSERT_EQ(nodes.size(), 4);
|
||||
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
|
||||
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
|
||||
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Activation));
|
||||
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
|
||||
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
|
||||
ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Output);
|
||||
|
||||
@ -5846,7 +5845,7 @@ TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWith2DepthwiseOpFusing) {
|
||||
ASSERT_EQ(nodes.size(), 3);
|
||||
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
|
||||
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
|
||||
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
|
||||
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
|
||||
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
|
||||
|
||||
InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 8, 300, 600}, InferenceEngine::NCHW);
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <nodes/mkldnn_input_node.h>
|
||||
#include <functional>
|
||||
#include <cmath>
|
||||
#include <legacy/details/ie_cnn_network_tools.h>
|
||||
|
||||
#define GARB_VAL(x) ((x + 100.0f + sin(x)) / (x + 150.f))
|
||||
|
||||
@ -212,13 +213,66 @@ public:
|
||||
return graphNodes;
|
||||
}
|
||||
|
||||
void MoveInternalBlobsToConstLayers(InferenceEngine::details::CNNNetworkImpl* netImpl) {
|
||||
auto createConstInputTo = [&](InferenceEngine::CNNLayerPtr layer, InferenceEngine::Blob::Ptr blob, std::string name) {
|
||||
InferenceEngine::LayerParams attrs = {layer.get()->name + "_const_" + name, "Const", InferenceEngine::Precision::FP32};
|
||||
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
|
||||
constLayer->blobs["custom"] = blob;
|
||||
|
||||
std::vector<size_t> constDims(layer->insData[0].lock()->getDims().size(), 1);
|
||||
if (constDims.size() > 1)
|
||||
constDims[1] = blob.get()->size();
|
||||
else
|
||||
constDims[0] = blob.get()->size();
|
||||
const InferenceEngine::TensorDesc& td = {InferenceEngine::Precision::FP32, constDims, InferenceEngine::TensorDesc::getLayoutByDims(constDims)};
|
||||
|
||||
InferenceEngine::DataPtr newEdgeAfterLayer(new InferenceEngine::Data(constLayer->name, td));
|
||||
newEdgeAfterLayer->setName(constLayer->name);
|
||||
getCreatorLayer(newEdgeAfterLayer) = constLayer;
|
||||
getInputTo(newEdgeAfterLayer).clear();
|
||||
|
||||
|
||||
netImpl->addData(constLayer->name.c_str(), newEdgeAfterLayer);
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
netImpl->addLayer(constLayer);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
constLayer->outData.push_back(newEdgeAfterLayer);
|
||||
getInputTo(newEdgeAfterLayer)[layer->name] = layer;
|
||||
layer->insData.push_back(newEdgeAfterLayer);
|
||||
};
|
||||
|
||||
auto all_layers = InferenceEngine::details::CNNNetSortTopologically(*netImpl);
|
||||
for (auto &layer : all_layers) {
|
||||
if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
|
||||
InferenceEngine::Blob::Ptr scalesBlob = layer->blobs["weights"];
|
||||
if (scalesBlob != nullptr)
|
||||
createConstInputTo(layer, scalesBlob, "weights");
|
||||
|
||||
InferenceEngine::Blob::Ptr shiftBlob = layer->blobs["biases"];
|
||||
if (shiftBlob != nullptr)
|
||||
createConstInputTo(layer, shiftBlob, "biases");
|
||||
} else if (layer->type == "PReLU" && layer->insData.size() == 1) {
|
||||
InferenceEngine::Blob::Ptr scalesBlob = layer->blobs["weights"];
|
||||
if (scalesBlob != nullptr)
|
||||
createConstInputTo(layer, scalesBlob, "weights");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CreateGraph(InferenceEngine::ICNNNetwork &network, const MKLDNNPlugin::MKLDNNExtensionManager::Ptr& extMgr,
|
||||
MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache = {}) {
|
||||
if (network.getFunction()) {
|
||||
auto convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
|
||||
MoveInternalBlobsToConstLayers(convertedNetwork.get());
|
||||
MKLDNNGraph::CreateGraph(static_cast<InferenceEngine::ICNNNetwork&>(*convertedNetwork),
|
||||
extMgr, cache);
|
||||
extMgr, cache);
|
||||
} else {
|
||||
InferenceEngine::details::CNNNetworkImpl* netImpl = dynamic_cast<InferenceEngine::details::CNNNetworkImpl*>(&network);
|
||||
if (netImpl == nullptr) {
|
||||
THROW_IE_EXCEPTION << "unexpected network type";
|
||||
}
|
||||
MoveInternalBlobsToConstLayers(netImpl);
|
||||
MKLDNNGraph::CreateGraph(network, extMgr, cache);
|
||||
}
|
||||
}
|
||||
@ -227,9 +281,15 @@ public:
|
||||
MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
|
||||
if (network.getFunction()) {
|
||||
auto convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
|
||||
MoveInternalBlobsToConstLayers(convertedNetwork.get());
|
||||
MKLDNNGraph::CreateGraph(static_cast<InferenceEngine::ICNNNetwork&>(*convertedNetwork),
|
||||
extensionManager, cache);
|
||||
} else {
|
||||
InferenceEngine::details::CNNNetworkImpl* netImpl = dynamic_cast<InferenceEngine::details::CNNNetworkImpl*>(&network);
|
||||
if (netImpl == nullptr) {
|
||||
THROW_IE_EXCEPTION << "unexpected network type";
|
||||
}
|
||||
MoveInternalBlobsToConstLayers(netImpl);
|
||||
MKLDNNGraph::CreateGraph(network, extensionManager, cache);
|
||||
}
|
||||
}
|
||||
|
2
inference-engine/thirdparty/mkl-dnn
vendored
2
inference-engine/thirdparty/mkl-dnn
vendored
@ -1 +1 @@
|
||||
Subproject commit 4b239023043318899e1c0a3b79158a68b7efe6e4
|
||||
Subproject commit d7d8ed46078b637794bc91215e1a982bb0f1683a
|
@ -115,11 +115,6 @@ xfail_issue_38084 = xfail_test(reason="RuntimeError: AssertionFailed: layer->get
|
||||
xfail_issue_38085 = xfail_test(reason="RuntimeError: Interpolate operation should be converted to Interp")
|
||||
xfail_issue_38086 = xfail_test(reason="RuntimeError: Quantize layer input '<value>' doesn't have blobs")
|
||||
xfail_issue_38087 = xfail_test(reason="RuntimeError: Cannot cast to tensor desc. Format is unsupported!")
|
||||
xfail_issue_38088 = xfail_test(reason="RuntimeError: Check '((axis >= axis_range_min) && "
|
||||
"(axis <= axis_range_max))' failed at "
|
||||
"/openvino/ngraph/core/src/validation_util.cpp:913: "
|
||||
"Split Parameter axis <value> out of the tensor rank range <value>.")
|
||||
xfail_issue_38089 = xfail_test(reason="RuntimeError: Node 2 contains empty child edge for index 0")
|
||||
xfail_issue_38090 = xfail_test(reason="AssertionError: Items types are not equal")
|
||||
xfail_issue_38091 = xfail_test(reason="AssertionError: Mismatched elements")
|
||||
xfail_issue_38699 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"
|
||||
|
@ -22,7 +22,6 @@ from tests import (xfail_issue_34323,
|
||||
skip_segfault,
|
||||
xfail_issue_34327,
|
||||
xfail_issue_36485,
|
||||
xfail_issue_35923,
|
||||
xfail_issue_36486,
|
||||
xfail_issue_34314,
|
||||
xfail_issue_36487)
|
||||
@ -418,7 +417,6 @@ def test_grn_operator():
|
||||
assert np.allclose(result, expected)
|
||||
|
||||
|
||||
@xfail_issue_35923
|
||||
def test_prelu_operator():
|
||||
runtime = get_runtime()
|
||||
|
||||
|
@ -38,7 +38,6 @@ from tests import (BACKEND_NAME,
|
||||
xfail_issue_33616,
|
||||
xfail_issue_38086,
|
||||
xfail_issue_38087,
|
||||
xfail_issue_35923,
|
||||
xfail_issue_36483,
|
||||
xfail_issue_34323,
|
||||
xfail_issue_35915,
|
||||
@ -46,8 +45,6 @@ from tests import (BACKEND_NAME,
|
||||
xfail_issue_36476,
|
||||
xfail_issue_36478,
|
||||
xfail_issue_36437,
|
||||
xfail_issue_38088,
|
||||
xfail_issue_38089,
|
||||
xfail_issue_38090,
|
||||
xfail_issue_38091,
|
||||
xfail_issue_35929,
|
||||
@ -220,9 +217,6 @@ tests_expected_to_fail = [
|
||||
"OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
|
||||
(xfail_issue_38087,
|
||||
"OnnxBackendNodeModelTest.test_convtranspose_1d_cpu"),
|
||||
(xfail_issue_35923,
|
||||
"OnnxBackendNodeModelTest.test_prelu_broadcast_cpu",
|
||||
"OnnxBackendNodeModelTest.test_prelu_example_cpu"),
|
||||
(xfail_issue_36483,
|
||||
"OnnxBackendNodeModelTest.test_ceil_cpu",
|
||||
"OnnxBackendNodeModelTest.test_ceil_example_cpu"),
|
||||
@ -286,10 +280,6 @@ tests_expected_to_fail = [
|
||||
"OnnxBackendNodeModelTest.test_argmin_keepdims_example_select_last_index_cpu",
|
||||
"OnnxBackendNodeModelTest.test_argmin_keepdims_random_select_last_index_cpu",
|
||||
"OnnxBackendNodeModelTest.test_pow_types_float32_uint32_cpu"),
|
||||
(xfail_issue_38088,
|
||||
"OnnxBackendPyTorchConvertedModelTest.test_GLU_cpu"),
|
||||
(xfail_issue_38089,
|
||||
"OnnxBackendPyTorchConvertedModelTest.test_GLU_dim_cpu"),
|
||||
(xfail_issue_38090,
|
||||
"OnnxBackendNodeModelTest.test_where_long_example_cpu",
|
||||
"OnnxBackendNodeModelTest.test_mod_int64_fmod_cpu",
|
||||
|
@ -18,7 +18,6 @@ import onnx
|
||||
import pytest
|
||||
|
||||
from tests.test_onnx.utils import run_node
|
||||
from tests import xfail_issue_35915
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -27,9 +26,9 @@ from tests import xfail_issue_35915
|
||||
pytest.param("And", np.logical_and, np.bool),
|
||||
pytest.param("Or", np.logical_or, np.bool),
|
||||
pytest.param("Xor", np.logical_xor, np.bool),
|
||||
pytest.param("Equal", np.equal, np.int32, marks=xfail_issue_35915),
|
||||
pytest.param("Greater", np.greater, np.int32, marks=xfail_issue_35915),
|
||||
pytest.param("Less", np.less, np.int32, marks=xfail_issue_35915),
|
||||
pytest.param("Equal", np.equal, np.int32),
|
||||
pytest.param("Greater", np.greater, np.int32),
|
||||
pytest.param("Less", np.less, np.int32),
|
||||
],
|
||||
)
|
||||
def test_logical(onnx_op, numpy_func, data_type):
|
||||
|
@ -18,7 +18,7 @@ import onnx
|
||||
import pytest
|
||||
|
||||
from tests.test_onnx.utils import run_node
|
||||
from tests import xfail_issue_35918, xfail_issue_35923, xfail_issue_35924
|
||||
from tests import xfail_issue_35918, xfail_issue_35924
|
||||
|
||||
|
||||
def import_and_compute(op_type, input_data, **node_attrs):
|
||||
@ -71,7 +71,6 @@ def test_leaky_relu():
|
||||
assert_onnx_import_equals_callable("LeakyRelu", leaky_relu, [[-3, -2, -1], [1, 2, 3]])
|
||||
|
||||
|
||||
@xfail_issue_35923
|
||||
@pytest.mark.parametrize(
|
||||
"x, slope",
|
||||
[
|
||||
|
Loading…
Reference in New Issue
Block a user