[CPU] Generic JIT Eltwise implementation (#1464)

This commit is contained in:
Gorokhov Dmitriy 2020-10-28 09:16:28 +03:00 committed by GitHub
parent e3ed796b2e
commit abb8817cf6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
54 changed files with 4855 additions and 5096 deletions

View File

@ -9,7 +9,6 @@ if (WIN32)
endif()
set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_activation_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_batchnorm_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_bin_conv_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_concat_node.cpp
@ -17,7 +16,6 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_crop_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_deconv_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_def_conv_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_depthwise_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_eltwise_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fullyconnected_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_gemm_node.cpp
@ -27,7 +25,6 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_permute_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_power_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_quantize_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reshape_node.cpp
@ -94,7 +91,10 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/unique.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/unsqueeze.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/emitter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/interp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_eltwise_emitters.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_mkldnn_emitters.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax_imp.cpp

View File

@ -150,19 +150,6 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::softmax_forward::desc>() {
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::depthwise_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::depthwise_forward::desc>(desc));
}
MKLDNNDescriptor::operator std::shared_ptr<mkldnn::depthwise_forward::desc>() {
DescFwdImpl<mkldnn::depthwise_forward::desc> *typeDesc =
dynamic_cast<DescFwdImpl<mkldnn::depthwise_forward::desc> *>(desc.get());
if (typeDesc == nullptr) {
THROW_IE_EXCEPTION << "Cannot cast descriptor!";
}
return typeDesc->getPtr();
}
MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::rnn_forward::desc> desc) {
this->desc.reset(new DescFwdImpl<mkldnn::rnn_forward::desc>(desc));
}

View File

@ -37,9 +37,6 @@ public:
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::softmax_forward::desc> desc);
operator std::shared_ptr<mkldnn::softmax_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::depthwise_forward::desc> desc);
operator std::shared_ptr<mkldnn::depthwise_forward::desc>();
explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::rnn_forward::desc> desc);
operator std::shared_ptr<mkldnn::rnn_forward::desc>();

View File

@ -30,6 +30,7 @@
#include <unordered_set>
#include <utility>
#include <cstring>
#include <legacy/details/ie_cnn_network_tools.h>
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
@ -57,18 +58,17 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) {
#ifdef USE_CNNNETWORK_LPT
auto params = LayerTransformation::Params(true, // updatePrecisions
true, // quantizeOutputs
true, // weightsToConst
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
true, // roundQuantizedValues
true, // updateBiases
true); // supportAsymmetricQuantization
true, // quantizeOutputs
true, // weightsToConst
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
true, // roundQuantizedValues
true, // updateBiases
true); // supportAsymmetricQuantization
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
addCleanup<ScaleShiftToConvolutionTransformation>(
LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
"ScaleShift"));
remove("ScaleShift").
remove("Power"));
transformer.transform(*_clonedNetwork);
#endif
@ -102,6 +102,59 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
MKLDNNGraph::ApplyUnrollPasses(static_cast<ICNNNetwork&>(*_clonedNetwork));
auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, std::string name) {
LayerParams attrs = {layer.get()->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()};
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
constLayer->blobs["custom"] = blob;
std::vector<size_t> constDims(layer->insData[0].lock()->getDims().size(), 1);
if (constDims.size() > 1)
constDims[1] = blob.get()->size();
else
constDims[0] = blob.get()->size();
const TensorDesc& td = {blob->getTensorDesc().getPrecision(), constDims, TensorDesc::getLayoutByDims(constDims)};
DataPtr newEdgeAfterLayer(new Data(constLayer->name, td));
newEdgeAfterLayer->setName(constLayer->name);
getCreatorLayer(newEdgeAfterLayer) = constLayer;
getInputTo(newEdgeAfterLayer).clear();
_clonedNetwork->addData(constLayer->name.c_str(), newEdgeAfterLayer);
IE_SUPPRESS_DEPRECATED_START
_clonedNetwork->addLayer(constLayer);
IE_SUPPRESS_DEPRECATED_END
constLayer->outData.push_back(newEdgeAfterLayer);
getInputTo(newEdgeAfterLayer)[layer->name] = layer;
layer->insData.push_back(newEdgeAfterLayer);
};
auto all_layers = details::CNNNetSortTopologically(*_clonedNetwork);
for (auto &layer : all_layers) {
if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
Blob::Ptr scalesBlob = layer->blobs["weights"];
if (scalesBlob != nullptr)
createConstInputTo(layer, scalesBlob, "weights");
Blob::Ptr shiftBlob = layer->blobs["biases"];
if (shiftBlob != nullptr) {
createConstInputTo(layer, shiftBlob, "biases");
} else if (scalesBlob != nullptr) {
Blob::Ptr biases = make_shared_blob<float>(scalesBlob->getTensorDesc());
biases->allocate();
auto biasesPtr = biases->buffer().as<float*>();
for (size_t i = 0; i < biases->size(); i++)
biasesPtr[i] = 0;
createConstInputTo(layer, biases, "biases");
}
} else if (layer->type == "PReLU" && layer->insData.size() == 1) {
Blob::Ptr scalesBlob = layer->blobs["weights"];
if (scalesBlob != nullptr)
createConstInputTo(layer, scalesBlob, "weights");
}
}
if (_cfg.batchLimit > 1) {
// check topology for applicability
if (!CanProcessDynBatch(*_clonedNetwork)) {
@ -272,7 +325,6 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::ICNNNetwork &n
type != SoftMax &&
type != Split &&
type != Concatenation &&
type != Power &&
type != Eltwise &&
type != Crop &&
type != BatchNormalization &&

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,7 @@
#pragma once
#include "mkldnn_graph.h"
#include "nodes/mkldnn_eltwise_node.h"
#include <vector>
namespace MKLDNNPlugin {
@ -18,18 +19,12 @@ public:
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
private:
void SLTMTransform(MKLDNNGraph& graph);
void MergeConversions(MKLDNNGraph& graph);
void MergeGroupConvolution(MKLDNNGraph& graph);
void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
void MergeSigmoidAndMultiplyToSwish(MKLDNNGraph& graph);
#if defined(COMPILED_CPU_MKLDNN_ACTIVATION_NODE)
void FuseConvolutionAndActivation(MKLDNNGraph &graph);
void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
#endif
#if defined (COMPILED_CPU_MKLDNN_DEPTHWISE_NODE)
void FuseConvolutionAndDepthwise(MKLDNNGraph &graph);
#endif
void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph);
void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
#if defined(COMPILED_CPU_MKLDNN_QUANTIZE_NODE)
@ -59,6 +54,9 @@ private:
void FuseClampAndQuantize(MKLDNNGraph &graph);
bool IsOneOf(Type type, std::vector<Type> types);
bool IsOneOf(EltwiseOpType alg, std::vector<EltwiseOpType> algs);
void removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge);
};
} // namespace MKLDNNPlugin

View File

@ -22,12 +22,9 @@
#include <nodes/mkldnn_input_node.h>
#include <nodes/mkldnn_lrn_node.h>
#include <nodes/mkldnn_pooling_node.h>
#include <nodes/mkldnn_power_node.h>
#include <nodes/mkldnn_activation_node.h>
#include <nodes/mkldnn_reorder_node.h>
#include <nodes/mkldnn_reshape_node.h>
#include <nodes/mkldnn_roi_pooling_node.h>
#include <nodes/mkldnn_depthwise_node.h>
#include <nodes/mkldnn_softmax_node.h>
#include <nodes/mkldnn_tile_node.h>
#include <nodes/mkldnn_split_node.h>
@ -63,23 +60,23 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
{ "Output", Output },
{ "Reorder", Reorder },
{ "Convolution", Convolution },
{ "ReLU", Activation },
{ "GELU", Activation },
{ "ELU", Activation },
{ "Sigmoid", Activation },
{ "Logistic", Activation },
{ "TanH", Activation },
{ "ReLU6", Activation },
{ "Exp", Activation },
{ "Not", Activation },
{ "Activation", Activation },
{ "Clamp", Activation },
{ "Swish", Activation },
{ "HSwish", Activation },
{ "Mish", Activation },
{ "HSigmoid", Activation },
{ "ScaleShift", Depthwise },
{ "PReLU", Depthwise },
{ "ReLU", Eltwise },
{ "GELU", Eltwise },
{ "ELU", Eltwise },
{ "Sigmoid", Eltwise },
{ "Logistic", Eltwise },
{ "TanH", Eltwise },
{ "ReLU6", Eltwise },
{ "Exp", Eltwise },
{ "Not", Eltwise },
{ "Activation", Eltwise },
{ "Clamp", Eltwise },
{ "Swish", Eltwise },
{ "HSwish", Eltwise },
{ "Mish", Eltwise },
{ "HSigmoid", Eltwise },
{ "ScaleShift", Eltwise },
{ "PReLU", Eltwise },
{ "Norm", Lrn },
{ "LRN", Lrn },
{ "Pooling", Pooling },
@ -91,9 +88,10 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
{ "Split", Split },
{ "Slice", Split },
{ "Concat", Concatenation },
{ "Power", Power },
{ "Deconvolution", Deconvolution },
{ "Eltwise", Eltwise },
{ "Mod", Eltwise },
{ "Power", Eltwise },
{ "Crop", Crop },
{ "Reshape", Reshape },
{ "Tile", Tile },

View File

@ -44,7 +44,6 @@ enum Type {
SoftMax,
Split,
Concatenation,
Power,
Eltwise,
Gemm,
Crop,
@ -118,8 +117,6 @@ static std::string NameFromType(Type type) {
return "Split";
case Concatenation:
return "Concatenation";
case Power:
return "Power";
case Depthwise:
return "Depthwise";
case Crop:

View File

@ -43,6 +43,7 @@
#include <transformations/op_conversions/softplus_decomposition.hpp>
#include <transformations/op_conversions/convert_space_to_batch.hpp>
#include <transformations/op_conversions/convert_batch_to_space.hpp>
#include <transformations/op_conversions/convert_mod.hpp>
#include <transformations/convert_precision.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
@ -145,6 +146,7 @@ static void Transformation(ICNNNetwork::Ptr& clonedNetwork, const Config& conf)
pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
pass_config->disable<ngraph::pass::ConvertMod>();
pass_config->enable<ngraph::pass::ConvertPadToGroupConvolution>();

View File

@ -0,0 +1,200 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "emitter.h"
#include <vector>
using namespace mkldnn::impl::cpu;
using namespace mkldnn::impl;
using namespace Xbyak;
namespace MKLDNNPlugin {
template <typename T, typename P>
constexpr bool one_of(T val, P item) { return val == item; }
template <typename T, typename P, typename... Args>
constexpr bool one_of(T val, P item, Args... item_others) {
return val == item || one_of(val, item_others...);
}
size_t jit_emitter::get_max_vecs_count() const {
return one_of(host_isa_, cpu::avx512_common, cpu::avx512_core) ? 32 : 16;
}
size_t jit_emitter::get_vec_length() const {
return one_of(host_isa_, cpu::avx512_common, cpu::avx512_core) ? 64 :
one_of(host_isa_, cpu::avx2) ? 32 : 16;
}
void jit_emitter::push_vec(const Xbyak::Address &addr, size_t vec_idx) const {
if (host_isa_ == cpu::sse42) {
h->uni_vmovups(addr, Xmm(vec_idx));
} else if (host_isa_ == cpu::avx2) {
h->uni_vmovups(addr, Ymm(vec_idx));
} else {
h->uni_vmovups(addr, Zmm(vec_idx));
}
}
void jit_emitter::pop_vec(size_t vec_idx, const Xbyak::Address &addr) const {
if (host_isa_ == cpu::sse42) {
h->uni_vmovups(Xmm(vec_idx), addr);
} else if (host_isa_ == cpu::avx2) {
h->uni_vmovups(Ymm(vec_idx), addr);
} else {
h->uni_vmovups(Zmm(vec_idx), addr);
}
}
size_t jit_emitter::aux_vecs_count() const {
return 0;
}
size_t jit_emitter::aux_gprs_count() const {
// We need one gpr to load table address
return entry_map_.empty() ? 0 : 1;
}
std::set<InferenceEngine::Precision> jit_emitter::get_supported_precisions() {
return {InferenceEngine::Precision::FP32};
}
void jit_emitter::emitter_preamble(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &pool_vec_idxs,
const std::vector<size_t> &pool_gpr_idxs) {
using namespace Xbyak::util;
for (auto idx : pool_vec_idxs)
aux_vec_idxs.push_back(idx);
// For sse42 mask register has to be Xmm(0)
if (host_isa_ == cpu::sse42 && aux_vecs_count() > 0) {
size_t idx = 0;
assert(std::find(in_vec_idxs.begin(), in_vec_idxs.end(), idx) == in_vec_idxs.end());
if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) == aux_vec_idxs.end()) {
aux_vec_idxs.push_back(idx);
preserved_vec_idxs.push_back(idx);
}
// moving mask vector at the beginning of aux vectors list to simplify further processing
for (int i = 0; i < aux_vec_idxs.size(); i++) {
if (aux_vec_idxs[i] == 0) {
size_t tmp = aux_vec_idxs[0];
aux_vec_idxs[0] = aux_vec_idxs[i];
aux_vec_idxs[i] = tmp;
break;
}
}
}
for (size_t idx = 0; idx < get_max_vecs_count(); idx++) {
if (aux_vec_idxs.size() >= aux_vecs_count()) break;
if (std::find(in_vec_idxs.begin(), in_vec_idxs.end(), idx) != in_vec_idxs.end()) continue;
if (std::find(aux_vec_idxs.begin(), aux_vec_idxs.end(), idx) != aux_vec_idxs.end()) continue;
if (std::find(preserved_vec_idxs.begin(), preserved_vec_idxs.end(), idx) != preserved_vec_idxs.end()) continue;
aux_vec_idxs.push_back(idx);
preserved_vec_idxs.push_back(idx);
}
assert(aux_vec_idxs.size() >= aux_vecs_count());
// Same logic but to allocate gprs
for (auto idx : pool_gpr_idxs)
aux_gpr_idxs.push_back(idx);
for (size_t gpr_idx = 0; gpr_idx <= Operand::R15; ++gpr_idx) {
size_t _idx = Operand::R15 - gpr_idx; // we allocate from the end
if (aux_gpr_idxs.size() >= aux_gprs_count()) break;
if (_idx == Operand::RSP) continue;
if (std::find(aux_gpr_idxs.begin(), aux_gpr_idxs.end(), _idx) != aux_gpr_idxs.end()) continue;
if (std::find(preserved_gpr_idxs.begin(), preserved_gpr_idxs.end(), _idx) != preserved_gpr_idxs.end()) continue;
aux_gpr_idxs.push_back(_idx);
preserved_gpr_idxs.push_back(_idx);
}
assert(aux_gpr_idxs.size() == aux_gprs_count());
if (!entry_map_.empty()) {
p_table = Reg64(aux_gpr_idxs[0]);
aux_gpr_idxs.erase(aux_gpr_idxs.begin());
}
for (size_t i = 0; i < preserved_gpr_idxs.size(); ++i)
h->push(Reg64(preserved_gpr_idxs[i]));
if (preserved_vec_idxs.size())
h->sub(h->rsp, preserved_vec_idxs.size() * get_vec_length());
for (size_t i = 0; i < preserved_vec_idxs.size(); ++i) {
push_vec(h->ptr[h->rsp + i * get_vec_length()], preserved_vec_idxs[i]);
}
if (!entry_map_.empty())
load_table_addr();
}
void jit_emitter::emitter_postamble() {
using namespace Xbyak::util;
for (size_t i = 0; i < preserved_vec_idxs.size(); ++i)
pop_vec(preserved_vec_idxs[i], h->ptr[h->rsp + i * get_vec_length()]);
if (preserved_vec_idxs.size())
h->add(h->rsp, preserved_vec_idxs.size() * get_vec_length());
for (int i = aux_gprs_count() - 1; i >= 0; --i)
h->pop(Reg64(preserved_gpr_idxs[i]));
preserved_vec_idxs.clear();
preserved_gpr_idxs.clear();
aux_vec_idxs.clear();
aux_gpr_idxs.clear();
}
void jit_emitter::emit_table() {
h->align(64);
h->L(l_table);
// Assumption: entries can be inserted with dd, so they should be 4 bytes.
assert(sizeof(table_entry_val_t) == 4);
// Run through the map and insert values stored there
for (auto it = entry_map_.begin(); it != entry_map_.end(); it++) {
const auto &te = (*it).second; // get map entry for a given key
const auto len = te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
for (size_t d = 0; d < len; d += sizeof(table_entry_val_t))
h->dd(te.val);
}
}
void jit_emitter::prepare_table() {
register_table_entries();
// Now that we registered the entries, we set the offsets. No
// entries should be registered after this point. This allows to
// expect the same order when injecting the table entries in
// prepare_table.
size_t off = 0;
for (auto it = entry_map_.begin(); it != entry_map_.end(); it++) {
auto &te = (*it).second;
te.off = off;
off += te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
}
}
void jit_emitter::emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) {
emitter_preamble(in_vec_idxs, pool_vec_idxs, pool_gpr_idxs);
emit_impl(in_vec_idxs, out_vec_idxs, pool_vec_idxs, pool_gpr_idxs);
emitter_postamble();
}
} // namespace MKLDNNPlugin

View File

@ -0,0 +1,128 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include "jit_generator.hpp"
#include "mkldnn_node.h"
#include <set>
namespace MKLDNNPlugin {
class jit_emitter {
public:
jit_emitter(mkldnn::impl::cpu::jit_generator* host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32)
: h(host), host_isa_(host_isa), n(node), exec_prc_(exec_prc) {
k_mask = Xbyak::Opmask(1); // FIXME: in general case we need preserve k_mask state as well
}
virtual void emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs = {}, const std::vector<size_t> &pool_gpr_idxs = {});
virtual void emit_table();
virtual size_t get_inputs_num() = 0;
virtual size_t aux_vecs_count() const;
static std::set<InferenceEngine::Precision> get_supported_precisions();
protected:
virtual size_t aux_gprs_count() const;
size_t get_max_vecs_count() const;
size_t get_vec_length() const;
const MKLDNNNode& n;
mkldnn::impl::cpu::jit_generator* h;
mkldnn::impl::cpu::cpu_isa_t host_isa_;
InferenceEngine::Precision exec_prc_;
Xbyak::Opmask k_mask;
virtual void prepare_table();
virtual void register_table_entries() {}
void load_table_addr() { h->mov(p_table, l_table); }
// we accept only 32bit hexadecimal table values to avoid any rounding
using table_entry_val_t = uint32_t;
using table_entry_offset_t = size_t; // offsets are in bytes wrt p_table
using table_entry_bcast_t = bool; // true => bcast value
struct table_entry_t {
table_entry_val_t val;
table_entry_bcast_t bcast;
};
struct mapped_table_entry_t {
table_entry_offset_t off;
table_entry_val_t val;
table_entry_bcast_t bcast;
};
Xbyak::Reg64 p_table;
Xbyak::Label l_table;
enum {
_cmp_eq_oq = mkldnn::impl::cpu::jit_generator::_cmp_eq_oq,
_cmp_neq_uq = mkldnn::impl::cpu::jit_generator::_cmp_neq_uq,
_cmp_lt_os = mkldnn::impl::cpu::jit_generator::_cmp_lt_os,
_cmp_le_os = mkldnn::impl::cpu::jit_generator::_cmp_le_os,
_cmp_ge_os = mkldnn::impl::cpu::jit_generator::_cmp_nlt_us,
_cmp_gt_os = mkldnn::impl::cpu::jit_generator::_cmp_nle_us,
};
virtual void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) {}
virtual void emitter_preamble(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &pool_vec_idxs,
const std::vector<size_t> &pool_gpr_idxs);
virtual void emitter_postamble();
std::vector<size_t> aux_vec_idxs;
std::vector<size_t> aux_gpr_idxs;
static constexpr int k_mask_size = 8;
Xbyak::Address table_val(std::string key, size_t key_off_val_shift = 0) const {
auto off = table_off(key, key_off_val_shift);
return h->ptr[p_table + off];
}
using table_t = std::multimap<std::string, table_entry_t>;
using mapped_table_t = std::multimap<std::string, mapped_table_entry_t>;
mapped_table_t entry_map_;
void push_arg_entry_of(const std::string key, const table_entry_val_t val, const bool broadcast) {
mapped_table_entry_t te {0, val, broadcast};
entry_map_.insert(std::make_pair(key, te));
}
void push_entries_of(const table_t &t) {
for (auto it = t.begin(); it != t.end(); it++) {
auto key = (*it).first;
auto te = (*it).second; // copy values from table
push_arg_entry_of(key, te.val, te.bcast);
}
}
private:
std::vector<size_t> preserved_vec_idxs;
std::vector<size_t> preserved_gpr_idxs;
void push_vec(const Xbyak::Address &addr, size_t vec_idx) const;
void pop_vec(size_t vec_idx, const Xbyak::Address &addr) const;
size_t table_off(std::string& key, size_t key_off_val_shift = 0) const {
// assumption: all table entries sharing the same key also
// share their broadcast property
// TODO: enforce through data structure
const auto it = entry_map_.find(key); // search an entry for a key
assert(it != entry_map_.end());
const auto &te = (*it).second;
const auto scale = te.bcast ? get_vec_length() : sizeof(table_entry_val_t);
return te.off + key_off_val_shift * scale;
}
};
} // namespace MKLDNNPlugin

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,417 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "common/emitter.h"
#include "jit_generator.hpp"
#include "mkldnn_node.h"
namespace MKLDNNPlugin {
class jit_add_emitter : public jit_emitter {
public:
jit_add_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_mul_add_emitter : public jit_emitter {
public:
jit_mul_add_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
size_t aux_vecs_count() const override;
};
class jit_subtract_emitter : public jit_emitter {
public:
jit_subtract_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_multiply_emitter : public jit_emitter {
public:
jit_multiply_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_divide_emitter : public jit_emitter {
public:
jit_divide_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_floor_mod_emitter : public jit_emitter {
public:
jit_floor_mod_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
size_t aux_vecs_count() const override;
};
class jit_mod_emitter : public jit_emitter {
public:
jit_mod_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
size_t aux_vecs_count() const override;
};
class jit_maximum_emitter : public jit_emitter {
public:
jit_maximum_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_minimum_emitter : public jit_emitter {
public:
jit_minimum_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
static std::set<InferenceEngine::Precision> get_supported_precisions();
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_squared_difference_emitter : public jit_emitter {
public:
jit_squared_difference_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_power_dynamic_emitter : public jit_emitter {
public:
jit_power_dynamic_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
};
class jit_equal_emitter : public jit_emitter {
public:
jit_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_not_equal_emitter : public jit_emitter {
public:
jit_not_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_greater_emitter : public jit_emitter {
public:
jit_greater_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_greater_equal_emitter : public jit_emitter {
public:
jit_greater_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_less_emitter : public jit_emitter {
public:
jit_less_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_less_equal_emitter : public jit_emitter {
public:
jit_less_equal_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_logical_and_emitter : public jit_emitter {
public:
jit_logical_and_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_logical_or_emitter : public jit_emitter {
public:
jit_logical_or_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_logical_xor_emitter : public jit_emitter {
public:
jit_logical_xor_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_logical_not_emitter : public jit_emitter {
public:
jit_logical_not_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_power_static_emitter : public jit_emitter {
public:
jit_power_static_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
void register_table_entries() override;
size_t aux_vecs_count() const override;
};
class jit_prelu_emitter : public jit_emitter {
public:
jit_prelu_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
private:
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
template <mkldnn::impl::cpu::cpu_isa_t isa>
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
size_t aux_vecs_count() const override;
};
} // namespace MKLDNNPlugin

View File

@ -0,0 +1,70 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "common/emitter.h"
#include "jit_mkldnn_emitters.hpp"
#include "mkldnn_eltwise_node.h"
#include "legacy/ie_layers.h"
using namespace mkldnn::impl::utils;
using namespace mkldnn::impl::cpu;
using namespace Xbyak;
namespace MKLDNNPlugin {
jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode& node, InferenceEngine::Precision exec_prc)
: jit_emitter(host, host_isa, node, exec_prc) {
auto& eltwiseNode = dynamic_cast<const MKLDNNEltwiseNode&>(n);
auto alg = static_cast<mkldnn_alg_kind_t>(eltwiseNode.getAlgorithm());
if (host_isa_ == cpu::sse42) {
eltwise_injector_sse42 = std::make_shared<jit_uni_eltwise_injector_f32<cpu::sse42>>(
host, alg, eltwiseNode.getAlpha(), eltwiseNode.getBeta());
} else if (host_isa_ == cpu::avx2) {
eltwise_injector_avx2 = std::make_shared<jit_uni_eltwise_injector_f32<cpu::avx2>>(
host, alg, eltwiseNode.getAlpha(), eltwiseNode.getBeta());
} else if (host_isa_ == cpu::avx512_common) {
eltwise_injector_avx512_common = std::make_shared<jit_uni_eltwise_injector_f32<cpu::avx512_common>>(
host, alg, eltwiseNode.getAlpha(), eltwiseNode.getBeta());
} else {
assert(!"unsupported isa");
}
}
size_t jit_mkldnn_emitter::get_inputs_num() { return 1; }
void jit_mkldnn_emitter::emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) {
if (host_isa_ == cpu::sse42) {
if (out_vec_idxs[0] != in_vec_idxs[0])
h->uni_vmovups(Xmm(out_vec_idxs[0]), Xmm(in_vec_idxs[0]));
eltwise_injector_sse42->compute_vector(out_vec_idxs[0]);
} else if (host_isa_ == cpu::avx2) {
if (out_vec_idxs[0] != in_vec_idxs[0])
h->uni_vmovups(Ymm(out_vec_idxs[0]), Ymm(in_vec_idxs[0]));
eltwise_injector_avx2->compute_vector(out_vec_idxs[0]);
} else if (host_isa_ == cpu::avx512_common) {
if (out_vec_idxs[0] != in_vec_idxs[0])
h->uni_vmovups(Zmm(out_vec_idxs[0]), Zmm(in_vec_idxs[0]));
eltwise_injector_avx512_common->compute_vector(out_vec_idxs[0]);
} else {
assert(!"unsupported isa");
}
}
void jit_mkldnn_emitter::emit_table() {
if (host_isa_ == cpu::sse42) {
eltwise_injector_sse42->prepare_table();
} else if (host_isa_ == cpu::avx2) {
eltwise_injector_avx2->prepare_table();
} else if (host_isa_ == cpu::avx512_common) {
eltwise_injector_avx512_common->prepare_table();
} else {
assert(!"unsupported isa");
}
}
} // namespace MKLDNNPlugin

View File

@ -0,0 +1,32 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "common/emitter.h"
#include "jit_generator.hpp"
#include "mkldnn_node.h"
#include "jit_uni_eltwise.hpp"
namespace MKLDNNPlugin {
class jit_mkldnn_emitter : public jit_emitter {
public:
jit_mkldnn_emitter(mkldnn::impl::cpu::jit_generator *host, mkldnn::impl::cpu::cpu_isa_t host_isa, const MKLDNNNode& node,
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32);
size_t get_inputs_num() override;
void emit(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs,
const std::vector<size_t> &pool_vec_idxs, const std::vector<size_t> &pool_gpr_idxs) override;
void emit_table() override;
private:
std::shared_ptr<mkldnn::impl::cpu::jit_uni_eltwise_injector_f32<mkldnn::impl::cpu::sse42>> eltwise_injector_sse42;
std::shared_ptr<mkldnn::impl::cpu::jit_uni_eltwise_injector_f32<mkldnn::impl::cpu::avx2>> eltwise_injector_avx2;
std::shared_ptr<mkldnn::impl::cpu::jit_uni_eltwise_injector_f32<mkldnn::impl::cpu::avx512_common>> eltwise_injector_avx512_common;
};
} // namespace MKLDNNPlugin

View File

@ -1,252 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mkldnn_activation_node.h"
#include "desc_iterator.hpp"
#include <legacy/ie_layers.h>
#include <algorithm>
#include <string>
#include <mkldnn_extension_utils.h>
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
// TODO: (ichuraev) I don't fully sure that names of types and parameters are correct for square, abs, sqrt, linear, bounded_relu and soft_relu
caseless_map<std::string, std::function<void(GenericLayer*, mkldnn::algorithm&, float&, float&)>> MKLDNNActivationNode::initializers = {
{"relu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("negative_slope", 0.0f);
beta = 0.0f;
algorithm = eltwise_relu;
}},
{"gelu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_gelu;
}},
{"elu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
beta = 0.0f;
algorithm = eltwise_elu;
}},
{"tanh", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_tanh;
}},
{"logistic", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_logistic;
}},
{"square", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_square;
}},
{"abs", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_abs;
}},
{"sqrt", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_sqrt;
}},
{"linear", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
beta = activationLayer->GetParamAsFloat("beta", 0.0f);
algorithm = eltwise_linear;
}},
{"bounded_relu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("alpha", 0.0f);
beta = 0.0f;
algorithm = eltwise_bounded_relu;
}},
{"soft_relu", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_soft_relu;
}},
{"relu6", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("n", 6.0f);
beta = 0.0f;
algorithm = eltwise_bounded_relu;
}},
{"clamp", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("max", 1.0f);
beta = activationLayer->GetParamAsFloat("min", 0.0f);
algorithm = eltwise_clamp;
}},
{"exp", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_exp;
}},
{"not", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_not;
}},
{"swish", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = activationLayer->GetParamAsFloat("alpha", 1.0f);
beta = 0.0f;
algorithm = eltwise_swish;
}},
{"hswish", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_hswish;
}},
{"mish", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_mish;
}},
{"hsigmoid", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
alpha = 0.0f;
beta = 0.0f;
algorithm = eltwise_hsigmoid;
}},
};
MKLDNNActivationNode::MKLDNNActivationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng,
MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(layer, eng, cache) {
GenericLayer* activationLayer = getCnnLayer().get();
if (activationLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot get CNNLayer.";
std::string type = activationLayer->type;
CaselessEq<std::string> comparator;
if (comparator(type, "activation"))
type = activationLayer->GetParamAsString("type");
if (comparator(type, "sigmoid"))
type = "logistic";
if (initializers.find(type) != initializers.end())
initializers[type](activationLayer, algorithm, alpha, beta);
}
void MKLDNNActivationNode::getSupportedDescriptors() {
if (!descs.empty())
return;
if (getParentEdges().size() != 1)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
if (!getChildEdges().size())
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
auto parentOutDims = getParentEdgeAt(0)->getDims();
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
// FIXME: MKLDNN doesn't support not inputs with number of dimensions less than 4 for activation
while (parentOutDims.ndims() < 4)
parentOutDims.push_back(1);
for (auto format : getAvailableFormatsForDims(parentOutDims)) {
MKLDNNMemoryDesc in_candidate(parentOutDims, MKLDNNExtensionUtils::IEPrecisionToDataType(precision), format);
createDescriptor({in_candidate}, {});
}
}
void MKLDNNActivationNode::createPrimitive() {
if (prim)
return;
auto prim_desc = createPrimitiveDescriptor<eltwise_forward::primitive_desc, eltwise_forward::desc>();
prim.reset(new eltwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
getChildEdgeAt(0)->getMemory().GetPrimitive()));
}
bool MKLDNNActivationNode::created() const {
return getType() == Activation;
}
void MKLDNNActivationNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
MKLDNNMemoryDesc inDesc(inputDesc[0]);
MKLDNNDescriptor desc(std::shared_ptr<eltwise_forward::desc>(
new eltwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), inDesc, getAlpha(), getBeta())));
descs.push_back(desc);
}
void MKLDNNActivationNode::initOptimalPrimitiveDescriptor() {
auto config = getSelectedPrimitiveDescriptor()->getConfig();
if (isInitConfig(config))
return;
if (config.inConfs.size() != 1 || config.outConfs.size() != 1 ||
(!isUninitTensorDesc(config.inConfs[0].desc) &&
!isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
THROW_IE_EXCEPTION << "Layer " << getName() << " has incorrect selected config!";
if (!isUninitTensorDesc(config.inConfs[0].desc)) {
config.outConfs[0].desc = config.inConfs[0].desc;
} else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
config.inConfs[0].desc = config.outConfs[0].desc;
} else {
config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
}
initDescriptor(config);
}
MKLDNNMemoryDesc MKLDNNActivationNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.src_primitive_desc(idx).desc());
auto parentOutDims = getParentEdgeAt(idx)->getDims().ToSizeVector();
SizeVector blocked_dims, order, dimOffsets, strides;
size_t offset = desc.getBlockingDesc().getOffsetPadding();
for (size_t i = 0; i < desc.getBlockingDesc().getStrides().size(); i++) {
if (desc.getBlockingDesc().getOrder()[i] >= parentOutDims.size())
continue;
blocked_dims.push_back(desc.getBlockingDesc().getBlockDims()[i]);
order.push_back(desc.getBlockingDesc().getOrder()[i]);
dimOffsets.push_back(desc.getBlockingDesc().getOffsetPaddingToData()[i]);
strides.push_back(desc.getBlockingDesc().getStrides()[i]);
}
if (desc.getLayout() == InferenceEngine::Layout::ANY)
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
parentOutDims,
desc.getLayout()));
else
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
parentOutDims,
{blocked_dims, order, offset, dimOffsets, strides}));
}
MKLDNNMemoryDesc MKLDNNActivationNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.dst_primitive_desc(idx).desc());
auto childInDims = getChildEdgeAt(idx)->getDims().ToSizeVector();
SizeVector blocked_dims, order, dimOffsets, strides;
size_t offset = desc.getBlockingDesc().getOffsetPadding();
for (size_t i = 0; i < desc.getBlockingDesc().getStrides().size(); i++) {
if (desc.getBlockingDesc().getOrder()[i] >= childInDims.size())
continue;
blocked_dims.push_back(desc.getBlockingDesc().getBlockDims()[i]);
order.push_back(desc.getBlockingDesc().getOrder()[i]);
dimOffsets.push_back(desc.getBlockingDesc().getOffsetPaddingToData()[i]);
strides.push_back(desc.getBlockingDesc().getStrides()[i]);
}
if (desc.getLayout() == InferenceEngine::Layout::ANY)
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
childInDims,
desc.getLayout()));
else
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
childInDims,
{blocked_dims, order, offset, dimOffsets, strides}));
}
REG_MKLDNN_PRIM_FOR(MKLDNNActivationNode, Activation);

View File

@ -1,44 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include <mkldnn_node.h>
#include "caseless.hpp"
#include <string>
#include <memory>
#include <vector>
namespace MKLDNNPlugin {
class MKLDNNActivationNode : public MKLDNNNode {
public:
MKLDNNActivationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNActivationNode() override = default;
void getSupportedDescriptors() override;
void initOptimalPrimitiveDescriptor() override;
void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
void createPrimitive() override;
bool created() const override;
mkldnn::algorithm getAlgorithm() const { return algorithm; }
float getAlpha() const { return alpha; }
float getBeta() const { return beta; }
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
private:
float alpha = 0.0f;
float beta = 0.0f;
static InferenceEngine::details::caseless_map<std::string,
std::function<void(InferenceEngine::GenericLayer*, mkldnn::algorithm&, float&, float&)>> initializers;
mkldnn::algorithm algorithm = mkldnn::algorithm::eltwise_relu;
};
} // namespace MKLDNNPlugin

View File

@ -3,7 +3,6 @@
//
#include "mkldnn_batchnorm_node.h"
#include "mkldnn_depthwise_node.h"
#include <mkldnn_extension_utils.h>
#include "common/cpu_memcpy.h"

View File

@ -25,7 +25,7 @@ public:
const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
void createPrimitive() override;
bool created() const override;
bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Depthwise
bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise
&& fusedWith[0]->getCnnLayer()->type == "ScaleShift";}
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;

View File

@ -5,10 +5,8 @@
#include "mkldnn_bin_conv_node.h"
#include "mkldnn_reorder_node.h"
#include "mkldnn_input_node.h"
#include "mkldnn_activation_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_eltwise_node.h"
#include "mkldnn_depthwise_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_quantize_node.h"
#include "mkldnn_conv_node.h"
#include <legacy/ie_layers.h>
@ -116,7 +114,6 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
paddingR[i] = (dst - calc_dst) * stride[i];
}
withSum = isFusedWith(Eltwise);
withDWConv = isFusedWith(Convolution);
withBinarization = isFusedWith(Quantize);
for (auto &node : fusedWith) {
@ -138,12 +135,19 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
#endif
}
int expectedInputEdgesNum = baseInputsNumber + isFusedWith(Eltwise);
withSum = false;
int expectedInputEdgesNum = baseInputsNumber;
for (int i = 0; i < fusedWith.size(); i++) {
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
if (convolutionNode) {
expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
}
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSum()) {
withSum = true;
expectedInputEdgesNum++;
}
}
if (getParentEdges().size() != expectedInputEdgesNum)
@ -164,88 +168,13 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool
for (auto &node : fusedWith) {
#if defined (COMPILED_CPU_MKLDNN_ELTWISE_NODE)
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode && eltwiseNode->isSum()) {
ops.append_sum(1.0);
continue;
}
if (eltwiseNode) {
if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
// currently there is the only one scale while we need scale by channel :(
ops.append_sum(it->second->buffer().as<float*>()[0]);
}
} else {
ops.append_sum(1.0);
}
continue;
}
#endif
#if defined(COMPILED_CPU_MKLDNN_ACTIVATION_NODE)
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
activationNode->getBeta());
continue;
}
#endif
#if defined (COMPILED_CPU_MKLDNN_DEPTHWISE_NODE)
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
if (initWeights) {
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx]->FillZero();
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_weights->buffer(),
depthwiseLayer->_weights->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
}
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_biases->buffer(),
depthwiseLayer->_biases->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
}
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
nullptr);
blob_idx += 1;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
eltwiseNode->appendPostOps(ops);
continue;
}
#endif

View File

@ -20,6 +20,7 @@
#include "mkldnn_conv_node.h"
#include "mkldnn_quantize_node.h"
#include "mkldnn_pooling_node.h"
#include "mkldnn_eltwise_node.h"
#include <limits>
#include "common/cpu_memcpy.h"
@ -93,12 +94,9 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
MKLDNNDims dstDims = getChildEdgeAt(0)->getDims();
InferenceEngine::LayerConfig config;
config.dynBatchSupport = true;
bool hasEltwise = false;
for (size_t i = 0; i < getParentEdges().size(); i++) {
auto parentEdge = getParentEdgeAt(i);
if (parentEdge->getParent()->getType() == Eltwise)
hasEltwise = true;
InferenceEngine::DataConfig dataConfig;
dataConfig.inPlace = -1;
@ -117,7 +115,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
config.outConfs.resize(1);
config.outConfs[0].inPlace = -1;
config.outConfs[0].constant = false;
if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1 || hasEltwise) {
if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1) {
auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? dims.ndims() == 2 ? memory::format::nc :
dims.ndims() == 4 ? memory::format::nhwc :
memory::format::ndhwc
@ -155,7 +153,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() {
}
}
if (axis != 1 || hasEltwise)
if (axis != 1)
return;
auto numOfDim = static_cast<size_t>(dstDims.ndims());

View File

@ -5,10 +5,8 @@
#include "mkldnn_conv_node.h"
#include "mkldnn_reorder_node.h"
#include "mkldnn_input_node.h"
#include "mkldnn_activation_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_eltwise_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_quantize_node.h"
#include "mkldnn_pooling_node.h"
#include "mkldnn_concat_node.h"
@ -110,6 +108,21 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
if (convLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
withSum = false;
int expectedInputEdgesNum = baseInputsNumber;
for (int i = 0; i < fusedWith.size(); i++) {
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
if (convolutionNode) {
expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
}
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (eltwiseNode && eltwiseNode->isSum()) {
withSum = true;
expectedInputEdgesNum++;
}
}
auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision());
if (!inputZeroPoints.empty())
inputDataType = memory::u8;
@ -127,10 +140,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
// We need to make sure that convolution output and second input of fused Eltwise operation
// have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32.
if (outputDataType != memory::f32 && outputDataType != memory::bf16 && isFusedWith(Eltwise)) {
if (outputDataType != memory::f32 && outputDataType != memory::bf16 && withSum) {
for (int i = 0; i < fusedWith.size(); i++) {
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (eltwiseNode) {
if (eltwiseNode && eltwiseNode->isSum()) {
eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) {
eltwisePrecision = Precision::FP32;
@ -142,14 +155,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
}
}
int expectedInputEdgesNum = baseInputsNumber + isFusedWith(Eltwise);
for (int i = 0; i < fusedWith.size(); i++) {
auto *convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(fusedWith[i].get());
if (convolutionNode) {
expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1;
}
}
if (getParentEdges().size() != expectedInputEdgesNum)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
if (getChildEdges().empty())
@ -232,7 +237,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
MKLDNNDims weightsDims = MKLDNNDims(weightDims);
withSum = isFusedWith(Eltwise);
withDWConv = isFusedWith(Convolution);
for (int i = 0; i < fusedWith.size(); i++) {
@ -287,7 +291,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
eltwisePrecision = Precision::FP32;
for (int i = 0; i < fusedWith.size(); i++) {
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
if (eltwiseNode) {
if (eltwiseNode && eltwiseNode->isSum()) {
eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i);
// TODO(amalyshe): there might be situation when convolution can be executed in BF16,
// output is required in FP32 but eltwise inplace tensor would be in BF16
@ -364,93 +368,16 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
if (node->getType() == Split || node->getType() == Concatenation)
continue;
#if defined (COMPILED_CPU_MKLDNN_ELTWISE_NODE)
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
// currently there is the only one scale while we need scale by channel :(
ops.append_sum(it->second->buffer().as<float*>()[0], mkldnn::memory::convert_to_c(precisionToDataType(eltwisePrecision)));
}
} else {
if (eltwiseNode && eltwiseNode->isSum()) {
ops.append_sum(1.0, mkldnn::memory::convert_to_c(precisionToDataType(eltwisePrecision)));
}
continue;
}
#endif
#if defined(COMPILED_CPU_MKLDNN_ACTIVATION_NODE)
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
activationNode->getBeta());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
continue;
}
#endif
#if defined (COMPILED_CPU_MKLDNN_DEPTHWISE_NODE)
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
if (initWeights) {
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx]->FillZero();
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_weights->buffer(),
depthwiseLayer->_weights->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
}
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_biases->buffer(),
depthwiseLayer->_biases->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
}
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
nullptr);
blob_idx += 1;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
continue;
}
#endif
auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
if (quantizeNode) {

View File

@ -5,10 +5,8 @@
#include "mkldnn_def_conv_node.h"
#include "mkldnn_reorder_node.h"
#include "mkldnn_input_node.h"
#include "mkldnn_activation_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_eltwise_node.h"
#include "mkldnn_depthwise_node.h"
#include <legacy/ie_layers.h>
#include <string>
#include <vector>

View File

@ -1,353 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mkldnn_depthwise_node.h"
#include "desc_iterator.hpp"
#include <legacy/ie_layers.h>
#include <string>
#include <vector>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "caseless.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
MKLDNNDepthwiseNode::MKLDNNDepthwiseNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(layer, eng, cache) {
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
});
internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
if (!isWithBiases())
return MKLDNNMemoryDesc();
return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(1).desc());
});
}
void MKLDNNDepthwiseNode::getSupportedDescriptors() {
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
auto parentOutDims = getParentEdgeAt(0)->getDims();
if (getParentEdges().size() != 1)
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect number of inputs!";
if (parentOutDims != getChildEdgeAt(0)->getDims())
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect dimensions!";
auto size = static_cast<size_t>(parentOutDims.ndims() == 1 ? parentOutDims[0] : parentOutDims[1]);
SizeVector weightDims = { size };
MKLDNNDims blocked_weightDims(weightDims);
auto * wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(getCnnLayer().get());
if (wLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot get weightable layer for node " << getName() << ".";
InferenceEngine::Blob::Ptr blb = wLayer->_weights;
if (blb)
realWeightSize = blb->size();
internalBlobs.push_back(createInternalBlob(weightDims, true));
if (isWithBiases()) {
InferenceEngine::Blob::Ptr blb = wLayer->_biases;
if (blb)
realBiasSize = blb->size();
internalBlobs.push_back(createInternalBlob(weightDims, false));
}
for (auto format : getAvailableFormatsForDims(parentOutDims)) {
MKLDNNMemoryDesc in_candidate{parentOutDims, inputDataType, format};
createDescriptor({in_candidate}, {});
}
}
void MKLDNNDepthwiseNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
auto parentOutDims = getParentEdgeAt(0)->getDims();
if (parentOutDims.ndims() <= 5) {
MKLDNNNode::initSupportedPrimitiveDescriptors();
} else {
createSpecificDescriptor5D();
if (specificDesc5DPtr == nullptr)
THROW_IE_EXCEPTION << "Cannot create specific MKLDNNDescriptor for depthwise node " << getName();
const auto& desc = *specificDesc5DPtr;
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine());
while (itpd.is_not_end()) {
InferenceEngine::LayerConfig config;
config.dynBatchSupport = true;
for (size_t i = 0; i < descInputNumbers(desc); i++) {
InferenceEngine::DataConfig dataConfig;
dataConfig.inPlace = -1;
dataConfig.constant = false;
dataConfig.desc = MKLDNNMemoryDesc(InferenceEngine::TensorDesc(Precision::FP32, parentOutDims.ToSizeVector(), Layout::ANY));
config.inConfs.push_back(dataConfig);
}
std::vector<mkldnn::memory::format> outFormats;
for (size_t i = 0; i < descOutputNumbers(desc); i++) {
InferenceEngine::DataConfig dataConfig;
dataConfig.inPlace = canBeInPlace() ? 0 : -1;
dataConfig.constant = false;
dataConfig.desc = MKLDNNMemoryDesc(InferenceEngine::TensorDesc(Precision::FP32, parentOutDims.ToSizeVector(), Layout::ANY));
config.outConfs.push_back(dataConfig);
auto primDesc = itpd.fetch();
auto dstPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(dst_pd), 0);
if (dstPrimDesc) {
outFormats.emplace_back(static_cast<memory::format>(itpd.dst_primitive_desc().desc().data.format));
} else {
// This path is needed to correctly handle Deconvolution node
auto diffSrcPrimDesc = mkldnn_primitive_desc_query_pd(primDesc.get(), mkldnn::convert_to_c(diff_src_pd), 0);
if (diffSrcPrimDesc) {
outFormats.emplace_back(static_cast<memory::format>(itpd.diff_src_primitive_desc().desc().data.format));
}
}
}
impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
supportedPrimitiveDescriptors.emplace_back(config, impl_type, outFormats);
itpd++;
}
}
}
void MKLDNNDepthwiseNode::createPrimitive() {
if (prim)
return;
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
auto createRightPrimitiveDescriptor = [&]() -> depthwise_forward::primitive_desc {
auto parentOutDims = getParentEdgeAt(0)->getDims();
if (parentOutDims.ndims() <= 5) {
return createPrimitiveDescriptor<depthwise_forward::primitive_desc, depthwise_forward::desc>();
} else {
const PrimitiveDescInfo *selected_pd = getSelectedPrimitiveDescriptor();
auto& desc = *specificDesc5DPtr;
auto itpd = desc.createPrimitiveDescriptorIterator(getEngine(), mkldnn::primitive_attr());
while (itpd.is_not_end()) {
impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
if (impl_type == getSelectedPrimitiveDescriptor()->getImplementationType()) {
specificPrepareMemory5D(itpd);
std::shared_ptr<depthwise_forward::desc> selected_desc_ptr = desc;
depthwise_forward::primitive_desc prim_desc = depthwise_forward::primitive_desc(*selected_desc_ptr, getEngine());
return prim_desc;
}
itpd++;
}
THROW_IE_EXCEPTION << "Cannot create specific primitive descriptor for depthwise node " << getName() << ".";
}
};
auto prim_desc = createRightPrimitiveDescriptor();
if (isBroadcast()) {
float broadcastValue = static_cast<float*>(internalBlobMemory[0]->GetData())[0];
size_t blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
for (int i = 1; i < blbSize && realWeightSize != blbSize; i++) {
static_cast<float*>(internalBlobMemory[0]->GetData())[i] = broadcastValue;
}
if (isWithBiases()) {
blbSize = internalBlobMemory[1]->GetPrimitiveDescriptor().desc().data.dims[0];
broadcastValue = static_cast<float*>(internalBlobMemory[1]->GetData())[0];
for (int i = 1; i < blbSize && realBiasSize != blbSize; i++) {
static_cast<float*>(internalBlobMemory[1]->GetData())[i] = broadcastValue;
}
}
} else {
size_t blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
if (realWeightSize != blbSize)
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect weights!";
if (isWithBiases()) {
blbSize = internalBlobMemory[1]->GetPrimitiveDescriptor().desc().data.dims[0];
if (realBiasSize != blbSize)
THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect biases!";
}
}
if (isWithBiases()) {
prim.reset(new depthwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
internalBlobMemory[0]->GetPrimitive(),
internalBlobMemory[1]->GetPrimitive(),
getChildEdgeAt(0)->getMemory().GetPrimitive()));
} else {
prim.reset(new depthwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
internalBlobMemory[0]->GetPrimitive(),
getChildEdgeAt(0)->getMemory().GetPrimitive()));
}
}
bool MKLDNNDepthwiseNode::created() const {
return getType() == Depthwise;
}
void MKLDNNDepthwiseNode::init() {
GenericLayer* depthwiseLayer = getCnnLayer().get();
if (depthwiseLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot get CNNLayer.";
CaselessEq<std::string> comparator;
if (comparator(depthwiseLayer->type, "ScaleShift")) {
auto *scshLayer = dynamic_cast<ScaleShiftLayer*>(getCnnLayer().get());
if (scshLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot get scale shift layer " << getName();
if (scshLayer->_weights == nullptr)
THROW_IE_EXCEPTION << "ScaleShift without weights is not supported";
algorithm = depthwise_scale_shift;
withBiases = scshLayer->_biases != nullptr;
broadcast = static_cast<bool>(scshLayer->_broadcast);
} else if (comparator(depthwiseLayer->type, "PReLU")) {
auto *preluLayer = dynamic_cast<PReLULayer*>(getCnnLayer().get());
if (preluLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot get PReLU layer " << getName();
if (preluLayer->_weights == nullptr)
THROW_IE_EXCEPTION << "PReLU without weights is not supported";
algorithm = depthwise_prelu;
withBiases = false;
broadcast = preluLayer->_channel_shared;
} else {
THROW_IE_EXCEPTION << "Unsupported depthwise operation";
}
}
void MKLDNNDepthwiseNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
MKLDNNMemoryDesc in_candidate(inputDesc[0]);
MKLDNNMemoryDesc out_candidate(inputDesc[0]);
MKLDNNDims weightDims({in_candidate.getDims().ndims() == 1 ? in_candidate.getDims()[0] : in_candidate.getDims()[1]});
MKLDNNMemoryDesc wgh_candidate{weightDims, in_candidate.getDataType(), memory::x};
if (isWithBiases()) {
MKLDNNMemoryDesc bias_candidate{weightDims, in_candidate.getDataType(), memory::x};
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate, bias_candidate)));
descs.push_back(desc);
} else {
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate)));
descs.push_back(desc);
}
}
void MKLDNNDepthwiseNode::initOptimalPrimitiveDescriptor() {
auto selected_pd = getSelectedPrimitiveDescriptor();
if (selected_pd == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
auto config = selected_pd->getConfig();
if (isInitConfig(config))
return;
if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || (!isUninitTensorDesc(config.inConfs[0].desc) &&
!isUninitTensorDesc(config.outConfs[0].desc) && config.inConfs[0].desc != config.outConfs[0].desc))
THROW_IE_EXCEPTION << "Layer " << getName() << " has incorrect selected config!";
if (getParentEdgeAt(0)->getDims().ndims() > 5)
return;
if (!isUninitTensorDesc(config.inConfs[0].desc)) {
config.outConfs[0].desc = config.inConfs[0].desc;
} else if (!isUninitTensorDesc(config.outConfs[0].desc)) {
config.inConfs[0].desc = config.outConfs[0].desc;
} else {
config.outConfs[0].desc = config.inConfs[0].desc = getConfiguredInputDesc(config, 0);
}
initDescriptor(config);
}
void MKLDNNDepthwiseNode::createSpecificDescriptor5D() {
auto parentOutDims = getParentEdgeAt(0)->getDims();
MKLDNNDims newDims;
for (int i = 0; i < 4; i++)
newDims.push_back(parentOutDims[i]);
int lastDim = 1;
for (int i = 4; i < parentOutDims.ndims(); i++) {
lastDim *= parentOutDims[i];
}
newDims.push_back(lastDim);
MKLDNNMemoryDesc in_candidate{newDims, MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32), mkldnn::memory::ncdhw};
MKLDNNMemoryDesc out_candidate(in_candidate);
MKLDNNDims weightDims({in_candidate.getDims()[1]});
MKLDNNMemoryDesc wgh_candidate{weightDims, in_candidate.getDataType(), memory::x};
if (isWithBiases()) {
MKLDNNMemoryDesc bias_candidate{weightDims, in_candidate.getDataType(), memory::x};
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate, bias_candidate)));
specificDesc5DPtr = std::make_shared<MKLDNNDescriptor>(desc);
} else {
MKLDNNDescriptor desc(std::shared_ptr<depthwise_forward::desc>(
new depthwise_forward::desc(prop_kind::forward_scoring, getAlgorithm(), in_candidate, out_candidate, wgh_candidate)));
specificDesc5DPtr = std::make_shared<MKLDNNDescriptor>(desc);
}
}
void MKLDNNDepthwiseNode::specificPrepareMemory5D(mkldnn::primitive_desc_iterator& itpd) {
std::vector<MKLDNNMemoryDesc> intDescs;
for (auto &it : internalBlobDesc)
intDescs.push_back(it(itpd, 0));
internalBlobMemory.clear();
for (size_t i = 0; i < internalBlobs.size(); i++) {
const auto &internalBlob = internalBlobs[i];
auto create = [&] () {
auto newDesc = MKLDNNMemoryDesc(internalBlob->getTensorDesc());
auto newFormat = newDesc.getFormat();
if (newFormat == mkldnn::memory::ncdhw) {
newFormat = mkldnn::memory::goihw;
}
if (newFormat == mkldnn::memory::nchw) {
newFormat = mkldnn::memory::oihw;
}
MKLDNNMemory memory{ getEngine() };
memory.Create(MKLDNNMemoryDesc(newDesc.getDims(), newDesc.getDataType(), newFormat), internalBlob->buffer());
MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()));
_ptr->Create(intDescs[i]);
_ptr->SetData(memory);
return _ptr;
};
MKLDNNMemoryPtr ptr;
if (weightCache != nullptr) {
const uint64_t data_hash = weightCache->GetHashFunc().hash(
internalBlob->buffer(), internalBlob->byteSize());
const std::string string_hash = getName() + "_" + std::to_string(i)
+ "_" + std::to_string(internalBlob->byteSize())
+ "_" + std::to_string(data_hash);
ptr = weightCache->findOrCreate(string_hash, create);
} else {
ptr = create();
}
internalBlobMemory.push_back(ptr);
}
}
REG_MKLDNN_PRIM_FOR(MKLDNNDepthwiseNode, Depthwise);

View File

@ -1,46 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include <mkldnn_node.h>
#include <string>
#include <memory>
#include <vector>
namespace MKLDNNPlugin {
class MKLDNNDepthwiseNode : public MKLDNNNode {
public:
MKLDNNDepthwiseNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNDepthwiseNode() override = default;
void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
void initOptimalPrimitiveDescriptor() override;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
bool created() const override;
mkldnn::algorithm getAlgorithm() const { return algorithm; }
bool isWithBiases() const { return withBiases; }
bool isBroadcast() const { return broadcast; }
private:
void init() override;
mkldnn::algorithm algorithm = mkldnn::algorithm::depthwise_scale_shift;
size_t realWeightSize = 0;
size_t realBiasSize = 0;
bool withBiases = false;
bool broadcast = false;
std::shared_ptr<MKLDNNDescriptor> specificDesc5DPtr;
void createSpecificDescriptor5D();
void specificPrepareMemory5D(mkldnn::primitive_desc_iterator& itpd);
};
} // namespace MKLDNNPlugin

View File

@ -8,45 +8,98 @@
#include <mkldnn_node.h>
#include <string>
#include <vector>
#include <c_types_map.hpp>
#include <memory>
#include <caseless.hpp>
namespace MKLDNNPlugin {
struct jit_eltwise_fq_params {
int src0_step;
int src1_step;
int dst_step;
mkldnn::memory::data_type src0_dt;
mkldnn::memory::data_type src1_dt;
mkldnn::memory::data_type dst_dt;
int src0_data_size;
int src1_data_size;
int dst_data_size;
#define MAX_ELTWISE_INPUTS 7
InferenceEngine::EltwiseLayer::eOperation eltwise_op;
enum EltwiseOpType {
Add = 0,
Multiply,
Subtract,
Divide,
FloorMod,
Mod,
Maximum,
Minimum,
SquaredDifference,
PowerDynamic,
PowerStatic,
MulAdd,
Equal,
NotEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
LogicalAnd,
LogicalOr,
LogicalXor,
LogicalNot,
Relu,
Gelu,
Elu,
Tanh,
Logistic,
Square,
Abs,
Sqrt,
Linear,
BoundedRelu,
SoftRelu,
Relu6,
Exp,
Clamp,
Swish,
Prelu,
Mish,
Hswish,
Hsigmoid
};
struct jit_eltwise_fq_call_args {
const void *src0;
const void *src1;
struct jit_eltwise_params {
size_t inputs_number;
size_t input_size;
InferenceEngine::Precision src_prc[MAX_ELTWISE_INPUTS];
InferenceEngine::Precision dst_prc;
std::vector<size_t> src_offsets[MAX_ELTWISE_INPUTS];
std::vector<size_t> dst_offsets;
size_t src_size[MAX_ELTWISE_INPUTS];
size_t dst_size;
size_t oc_size;
};
struct jit_eltwise_call_args {
const void *src_ptr[MAX_ELTWISE_INPUTS];
void *dst;
size_t work_amount;
size_t oc_off;
};
struct jit_uni_eltwise_fq_kernel {
void (*ker_)(const jit_eltwise_fq_call_args *);
class MKLDNNEltwiseNode;
void operator()(const jit_eltwise_fq_call_args *args) {
struct jit_uni_eltwise_kernel {
void (*ker_)(const jit_eltwise_call_args *);
void operator()(const jit_eltwise_call_args *args) {
assert(ker_);
ker_(args);
}
explicit jit_uni_eltwise_fq_kernel(jit_eltwise_fq_params jep, const mkldnn_primitive_attr &attr) : ker_(nullptr), jep_(jep), attr_(attr) {}
virtual ~jit_uni_eltwise_fq_kernel() {}
explicit jit_uni_eltwise_kernel(jit_eltwise_params jep, MKLDNNEltwiseNode& node) : ker_(nullptr), jep_(jep), eltwiseNode(node) {}
virtual ~jit_uni_eltwise_kernel() {}
jit_eltwise_fq_params jep_;
const mkldnn_primitive_attr &attr_;
jit_eltwise_params jep_;
MKLDNNEltwiseNode& eltwiseNode;
};
class MKLDNNEltwiseNode : public MKLDNNNode {
@ -56,54 +109,66 @@ public:
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void selectOptimalPrimitiveDescriptor() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
bool canBeInPlace() const override;
bool isSum();
bool isUnitScales();
bool isWithBroadcast();
void initOptimalPrimitiveDescriptor() override;
bool canFuse(const MKLDNNNodePtr& node) const;
size_t getOpInputsNum() const;
EltwiseOpType getOpType() const { return eltwiseOp; }
mkldnn::algorithm getAlgorithm() const { return eltwiseAlgorithm; }
float getAlpha() const { return alpha; }
float getBeta() const { return beta; }
void appendPostOps(mkldnn::post_ops& ops) override;
private:
InferenceEngine::EltwiseLayer::eOperation op;
std::vector<float> sum_scales;
bool broadcast = false;
int batch_dim = 5;
mkldnn::primitive_attr attr;
void init() override;
std::shared_ptr<jit_uni_eltwise_fq_kernel> eltiwse_fq_kernel;
jit_eltwise_fq_params jep;
EltwiseOpType eltwiseOp = Add;
mkldnn::algorithm eltwiseAlgorithm = mkldnn::algorithm_undef;
void jit_eltwise_fq();
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
std::shared_ptr<jit_uni_eltwise_kernel> eltwise_kernel = nullptr;
jit_eltwise_params jep = {};
template <typename T0, typename T1> void ref_eltwise(int in0, int in1);
template <typename T0, typename T1, typename T2> void ref_eltwise2(int in0, int in1);
void dims_calc(int *dims, const MKLDNNDims &edge_dims, bool channels_first);
void offset_out_calc(int *offset, int *dims);
void offset_in_calc(int *offset, int *dims_in, int *dims_out);
int optimalTensorRank = 6;
bool canUseOptimizedImpl = false;
bool isDynBatchEnabled = false;
size_t batchDimIdx = 0;
size_t tensorRank = 0;
size_t fullWorkAmount = 0;
size_t schedulerWorkAmount = 0;
std::vector<std::vector<size_t>> dims_in = {};
std::vector<std::vector<size_t>> offsets_in = {};
std::vector<size_t> dims_out = {};
std::vector<size_t> offsets_out = {};
std::vector<ptrdiff_t> start_offset_in = {};
ptrdiff_t start_offset_out = 0;
std::vector<size_t> offsets_oc = {};
template <typename T0, typename T1> void eltwise_add(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_prod(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_max(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_sub(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_min(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_div(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_squared_diff(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_floor_mod(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_pow(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_logical_and(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_logical_or(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1> void eltwise_logical_xor(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
float alpha = 0;
float beta = 0;
float gamma = 0;
template <typename T0, typename T1, typename T2> void eltwise_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1, typename T2> void eltwise_not_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1, typename T2> void eltwise_less(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1, typename T2> void eltwise_less_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1, typename T2> void eltwise_greater(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
template <typename T0, typename T1, typename T2> void eltwise_greater_equal(const T0 *src0_ptr, const T1 *src1_ptr, T2 *dst_ptr, size_t dst_data_size);
std::vector<float> scales = {};
std::vector<float> shifts = {};
inline void executeOptimized6D(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
inline void executeOptimizedGeneric(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
inline void executeReference(const std::vector<const uint8_t *>& src_ptrs, uint8_t *dst_ptr);
void offset_out_calc(std::vector<size_t>& offset, std::vector<size_t>& dims);
void offset_in_calc(std::vector<size_t>& offset, std::vector<size_t>& dims_in, std::vector<size_t>& dims_out);
static InferenceEngine::details::caseless_map<std::string,
std::function<void(InferenceEngine::GenericLayer*, EltwiseOpType&, mkldnn::algorithm&, float&, float&)>> initializers;
};
} // namespace MKLDNNPlugin

View File

@ -3,8 +3,7 @@
//
#include "mkldnn_fullyconnected_node.h"
#include "mkldnn_activation_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_eltwise_node.h"
#include "mkldnn_quantize_node.h"
#include "desc_iterator.hpp"
#include <legacy/ie_layers.h>
@ -199,10 +198,10 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
continue;
}
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode && (eltwiseNode->getOpType() == MulAdd || eltwiseNode->getOpType() == Prelu)) {
if (initWeights) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(eltwiseNode->getCnnLayer().get());
int ndims = getParentEdgeAt(0)->getDims().ndims();
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(ndims == 3 ? getChildEdgeAt(0)->getDims()[2] : getChildEdgeAt(0)->getDims()[1], 16))});
@ -211,7 +210,7 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
PostOpsIntBlobMemory[blob_idx]->FillZero();
// In case ndims == 3 graph optimizer allows fusing only if all weights values are the same
if (depthwiseNode->isBroadcast() || ndims == 3) {
if (depthwiseLayer->blobs["weights"]->size() == 1 || ndims == 3) {
float broadcastValue = static_cast<float *>(depthwiseLayer->_weights->buffer())[0];
for (int i = 0; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
@ -223,13 +222,13 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
if (eltwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
// In case ndims == 3 graph optimizer allows fusing only if all biases values are the same
if (depthwiseNode->isBroadcast() || ndims == 3) {
if (depthwiseLayer->blobs["biases"]->size() == 1 || ndims == 3) {
float broadcastValue = static_cast<float *>(depthwiseLayer->_biases->buffer())[0];
for (int i = 0; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
@ -241,20 +240,20 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
ops.append_depthwise(eltwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
ops.append_depthwise(eltwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
nullptr);
blob_idx += 1;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
ops.append_depthwise(eltwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
@ -262,11 +261,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
continue;
}
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
continue;
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
}
}

View File

@ -5,9 +5,8 @@
#include "mkldnn_interpolate_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_quantize_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_activation_node.h"
#include <legacy/ie_layers.h>
#include "mkldnn_eltwise_node.h"
#include <mkldnn.hpp>
#include <string>
#include <vector>
@ -1480,62 +1479,9 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
continue;
}
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
if (initWeights) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getChildEdgeAt(0)->getDims()[1], 16))});
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_weights->buffer(),
depthwiseLayer->_weights->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
}
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_biases->buffer(),
depthwiseLayer->_biases->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
}
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
continue;
}
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
continue;
}
@ -2153,7 +2099,7 @@ inline int MKLDNNInterpolateNode::nearestRound(float originCoord, bool isDownsam
}
bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
auto isOneOf = [](mkldnn::algorithm alg, std::vector<mkldnn::algorithm> algs) {
auto isOneOf = [&](EltwiseOpType alg, std::vector<EltwiseOpType> algs) {
for (auto a : algs) {
if (alg == a) {
return true;
@ -2170,22 +2116,16 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const {
if (node->getType() == Quantize) {
auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
if (quantizeNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get quantize layer " << node->getName();
THROW_IE_EXCEPTION << "Cannot get quantize node " << node->getName();
return !quantizeNode->isBinarization();
} else if (node->getType() == Depthwise) {
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode*>(node.get());
if (depthwiseNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get depthwise layer " << node->getName();
return ((depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_scale_shift && depthwiseNode->isWithBiases()) ||
(depthwiseNode->getAlgorithm() == mkldnn::algorithm::depthwise_prelu));
} else if (node->getType() == Activation) {
auto* activationNode = dynamic_cast<MKLDNNActivationNode*>(node.get());
if (activationNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get activation layer " << node->getName();
return isOneOf(activationNode->getAlgorithm(), {eltwise_relu, eltwise_gelu, eltwise_elu, eltwise_logistic,
eltwise_bounded_relu, eltwise_clamp, eltwise_tanh, eltwise_swish, eltwise_hswish, eltwise_mish, eltwise_hsigmoid,
eltwise_linear, eltwise_abs, eltwise_square, eltwise_sqrt});
} else if (node->getType() == Eltwise) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
if (eltwiseNode == nullptr)
THROW_IE_EXCEPTION << "Cannot get eltwise node " << node->getName();
return isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp,
Tanh, Swish, Hswish, Mish, Hsigmoid, Linear, Abs, Square, Sqrt});
}
return false;
}

View File

@ -5,9 +5,8 @@
#include "mkldnn_mvn_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_quantize_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_activation_node.h"
#include <legacy/ie_layers.h>
#include "mkldnn_eltwise_node.h"
#include <mkldnn.hpp>
#include <string>
#include <vector>
@ -597,64 +596,9 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
continue;
}
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
if (initWeights) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getChildEdgeAt(0)->getDims()[1], 16))});
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx]->FillZero();
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_weights->buffer(),
depthwiseLayer->_weights->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
}
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_biases->buffer(),
depthwiseLayer->_biases->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
}
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
continue;
}
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
continue;
}

View File

@ -3,8 +3,7 @@
//
#include "mkldnn_quantize_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_activation_node.h"
#include "mkldnn_eltwise_node.h"
#include <mkldnn_extension_utils.h>
#include <legacy/ie_layers_internal.hpp>
#include "ie_parallel.hpp"
@ -808,70 +807,9 @@ void MKLDNNNormalizeNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeig
continue;
}
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
if (initWeights) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getParentEdgeAt(0)->getDims()[1], 16))});
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx]->FillZero();
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_weights->buffer(),
depthwiseLayer->_weights->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
}
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_biases->buffer(),
depthwiseLayer->_biases->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
}
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
nullptr);
blob_idx += 1;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
continue;
}
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
continue;
}

View File

@ -1,133 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "mkldnn_power_node.h"
#include <legacy/ie_layers.h>
#include <string>
#include <cmath>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include <limits>
#include "ie_parallel.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace InferenceEngine;
MKLDNNPowerNode::MKLDNNPowerNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode(layer, eng, cache), scale(1.0f), shift(1.0f), power(1.0f) {}
void MKLDNNPowerNode::getSupportedDescriptors() {
auto * powerLayer = dynamic_cast<PowerLayer*>(getCnnLayer().get());
if (powerLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot convert power layer.";
scale = powerLayer->scale;
power = powerLayer->power;
shift = powerLayer->offset;
if (getParentEdges().size() != 1)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
if (getChildEdges().empty())
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
}
void MKLDNNPowerNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
precision = getCnnLayer()->outData[0]->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
precision = InferenceEngine::Precision::FP32;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
InferenceEngine::LayerConfig config;
config.dynBatchSupport = true;
config.inConfs.resize(1);
config.outConfs.resize(1);
config.inConfs[0].inPlace = -1;
config.inConfs[0].constant = false;
config.outConfs[0].inPlace = -1;
config.outConfs[0].constant = false;
for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) {
config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, format);
config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, format);
if (format != memory::any) {
config.inConfs[0].desc = InferenceEngine::TensorDesc(config.inConfs[0].desc.getPrecision(),
config.inConfs[0].desc.getDims(), {
config.inConfs[0].desc.getBlockingDesc().getBlockDims(),
config.inConfs[0].desc.getBlockingDesc().getOrder(),
(std::numeric_limits<size_t>::max)()
});
config.outConfs[0].desc = InferenceEngine::TensorDesc(config.outConfs[0].desc.getPrecision(),
config.outConfs[0].desc.getDims(), {
config.outConfs[0].desc.getBlockingDesc().getBlockDims(),
config.outConfs[0].desc.getBlockingDesc().getOrder(),
(std::numeric_limits<size_t>::max)()
});
}
supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, format);
}
}
void MKLDNNPowerNode::createPrimitive() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Destination memory didn't allocate.";
if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
THROW_IE_EXCEPTION << "Input memory didn't allocate.";
if (getSelectedPrimitiveDescriptor() == nullptr)
THROW_IE_EXCEPTION << "Preferable primitive descriptor is not set.";
}
void MKLDNNPowerNode::execute(mkldnn::stream strm) {
auto& srcMemory = getParentEdgeAt(0)->getMemory();
auto& dstMemory = getChildEdgeAt(0)->getMemory();
const size_t data_size = srcMemory.GetSize() / sizeof(float) / srcMemory.GetDims()[0] * batchToProcess();
const auto *src_ptr = reinterpret_cast<const float*>(srcMemory.GetData()) +
srcMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
float *dst_ptr = reinterpret_cast<float*>(dstMemory.GetData()) +
dstMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
if (power == -1.f) {
parallel_for(data_size, [&](size_t i) {
float val = src_ptr[i] * scale + shift;
dst_ptr[i] = 1 / val;
});
} else if (power == 0.5f) {
parallel_for(data_size, [&](size_t i) {
float val = src_ptr[i] * scale + shift;
dst_ptr[i] = sqrtf(val);
});
} else if (power == 1.0f) {
parallel_for(data_size, [&](size_t i) {
dst_ptr[i] = src_ptr[i] * scale + shift;
});
} else if (power == 2.0f) {
parallel_for(data_size, [&](size_t i) {
float val = src_ptr[i] * scale + shift;
dst_ptr[i] = val * val;
});
} else if (power == 3.0f) {
parallel_for(data_size, [&](size_t i) {
float val = src_ptr[i] * scale + shift;
dst_ptr[i] = val * val * val;
});
} else {
parallel_for(data_size, [&](size_t i) {
dst_ptr[i] = pow(src_ptr[i] * scale + shift, power);
});
}
}
bool MKLDNNPowerNode::created() const {
return getType() == Power;
}
REG_MKLDNN_PRIM_FOR(MKLDNNPowerNode, Power);

View File

@ -1,31 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ie_common.h>
#include <mkldnn_node.h>
#include <string>
namespace MKLDNNPlugin {
class MKLDNNPowerNode : public MKLDNNNode {
public:
MKLDNNPowerNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
~MKLDNNPowerNode() override = default;
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
bool created() const override;
private:
float scale;
float shift;
float power;
};
} // namespace MKLDNNPlugin

View File

@ -43,10 +43,6 @@ void MKLDNNQuantizeNode::init() {
THROW_IE_EXCEPTION << "Quantize layer " << getName() << " has unsupported number of parent edges at port " << i;
}
if (getParentEdgesAtPort(0)[0]->getDims().ndims() < 1ul || getParentEdgesAtPort(0)[0]->getDims().ndims() > 5ul) {
THROW_IE_EXCEPTION << "Unsupported number of dimensions for input at edge 0 in Quantize layer " << getName();
}
auto initAxisIdx = [&](size_t edgeIdx) {
auto edge = getParentEdgesAtPort(edgeIdx)[0];
@ -319,6 +315,10 @@ std::vector<mkldnn::memory::format> MKLDNNQuantizeNode::getDataFormats() const {
}
void MKLDNNQuantizeNode::getSupportedDescriptors() {
if (getParentEdgesAtPort(0)[0]->getDims().ndims() < 1ul || getParentEdgesAtPort(0)[0]->getDims().ndims() > 5ul) {
THROW_IE_EXCEPTION << "Unsupported number of dimensions for input at edge 0 in Quantize layer " << getName();
}
mkldnn::memory::data_type idt = MKLDNNExtensionUtils::IEPrecisionToDataType(getInputPrecision());
mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
mkldnn::memory::data_type ddt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOutputPrecision());

View File

@ -5,12 +5,11 @@
#include "mkldnn_reduce_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_quantize_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_activation_node.h"
#include <legacy/ie_layers.h>
#include <mkldnn.hpp>
#include <string>
#include <vector>
#include <set>
#include <mkldnn_types.h>
#include <mkldnn_extension_utils.h>
#include "ie_parallel.hpp"

View File

@ -5,9 +5,8 @@
#include "mkldnn_resample_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_quantize_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_activation_node.h"
#include <legacy/ie_layers.h>
#include "mkldnn_eltwise_node.h"
#include <mkldnn.hpp>
#include <string>
#include <vector>
@ -438,64 +437,9 @@ void MKLDNNResampleNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeigh
continue;
}
auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
if (depthwiseNode) {
if (initWeights) {
auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(getChildEdgeAt(0)->getDims()[1], 16))});
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
PostOpsIntBlobMemory[blob_idx]->FillZero();
PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_weights->buffer(),
depthwiseLayer->_weights->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
}
}
if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
memory::format::x);
PostOpsIntBlobMemory[blob_idx + 1]->FillZero();
PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
depthwiseLayer->_biases->buffer(),
depthwiseLayer->_biases->size() *
MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
if (depthwiseNode->isBroadcast()) {
float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
}
}
ops.append_depthwise(depthwiseNode->getAlgorithm(),
(const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
(const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
blob_idx += 2;
}
} else {
ops.append_depthwise(depthwiseNode->getAlgorithm(),
nullptr,
nullptr);
}
continue;
}
auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
if (activationNode) {
ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(), activationNode->getBeta());
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
continue;
}

View File

@ -5,8 +5,6 @@
#include "mkldnn_scatter_update_node.h"
#include "desc_iterator.hpp"
#include "mkldnn_quantize_node.h"
#include "mkldnn_depthwise_node.h"
#include "mkldnn_activation_node.h"
#include <legacy/ie_layers.h>
#include <mkldnn.hpp>
#include <string>

View File

@ -39,9 +39,14 @@ std::vector<CommonTestUtils::OpType> opTypes = {
};
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
ngraph::helpers::EltwiseTypes::ADD,
ngraph::helpers::EltwiseTypes::MULTIPLY,
ngraph::helpers::EltwiseTypes::SUBTRACT,
ngraph::helpers::EltwiseTypes::ADD
ngraph::helpers::EltwiseTypes::DIVIDE,
ngraph::helpers::EltwiseTypes::FLOOR_MOD,
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
ngraph::helpers::EltwiseTypes::POWER,
ngraph::helpers::EltwiseTypes::MOD
};
std::map<std::string, std::string> additional_config = {};

View File

@ -22,7 +22,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*(QuantGroupConv3D).*)",
// TODO: Issue 31845
R"(.*(FakeQuantizeLayerTest).*)",
R"(.*(EltwiseLayerTest).*IS=\(.*\..*\..*\..*\..*\).*secondaryInputType=PARAMETER.*opType=SCALAR.*)",
// TODO: failed to downgrade to opset v0 in interpreter backend
R"(.*Gather.*axis=-1.*)",
// TODO: Issue 33151

View File

@ -0,0 +1,327 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <single_layer_tests/eltwise.hpp>
#include <ngraph_functions/builders.hpp>
#include "test_utils/cpu_test_utils.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
LayerTestsDefinitions::EltwiseTestParams,
CPUSpecificParams> EltwiseLayerCPUTestParamsSet;
class EltwiseLayerCPUTest : public testing::WithParamInterface<EltwiseLayerCPUTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<EltwiseLayerCPUTestParamsSet> obj) {
LayerTestsDefinitions::EltwiseTestParams basicParamsSet;
CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::EltwiseLayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::EltwiseTestParams>(
basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() {
LayerTestsDefinitions::EltwiseTestParams basicParamsSet;
CPUSpecificParams cpuParams;
std::tie(basicParamsSet, cpuParams) = this->GetParam();
std::vector<std::vector<size_t>> inputShapes;
InferenceEngine::Precision netPrecision;
ngraph::helpers::InputLayerType secondaryInputType;
CommonTestUtils::OpType opType;
ngraph::helpers::EltwiseTypes eltwiseType;
std::map<std::string, std::string> additional_config;
std::tie(inputShapes, eltwiseType, secondaryInputType, opType, netPrecision, inPrc, outPrc, inLayout, targetDevice, additional_config) = basicParamsSet;
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::string isaType;
if (with_cpu_x86_avx512f()) {
isaType = "jit_avx512";
} else if (with_cpu_x86_avx2()) {
isaType = "jit_avx2";
} else if (with_cpu_x86_sse42()) {
isaType = "jit_sse42";
} else {
isaType = "ref";
}
selectedType = isaType + "_" + "FP32";
std::vector<size_t> inputShape1, inputShape2;
if (inputShapes.size() == 1) {
inputShape1 = inputShape2 = inputShapes.front();
} else if (inputShapes.size() == 2) {
inputShape1 = inputShapes.front();
inputShape2 = inputShapes.back();
} else {
THROW_IE_EXCEPTION << "Incorrect number of input shapes";
}
configuration.insert(additional_config.begin(), additional_config.end());
auto input = ngraph::builder::makeParams(ngPrc, {inputShape1});
std::vector<size_t> shape_input_secondary;
switch (opType) {
case CommonTestUtils::OpType::SCALAR: {
shape_input_secondary = std::vector<size_t>({1});
break;
}
case CommonTestUtils::OpType::VECTOR:
shape_input_secondary = inputShape2;
break;
default:
FAIL() << "Unsupported Secondary operation type";
}
std::shared_ptr<ngraph::Node> secondaryInput;
if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE ||
eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD ||
eltwiseType == ngraph::helpers::EltwiseTypes::MOD) {
std::vector<float> data(ngraph::shape_size(shape_input_secondary));
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape_input_secondary));
for (float &i : data) {
if (i == 0) {
i = 1;
}
}
secondaryInput = ngraph::builder::makeConstant(ngPrc, shape_input_secondary, data);
} else {
secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shape_input_secondary);
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
input.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
}
}
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
eltwise->get_rt_info() = CPUTestsBase::setCPUInfo(inFmts, outFmts, priority);
function = std::make_shared<ngraph::Function>(eltwise, input, "Eltwise");
}
};
TEST_P(EltwiseLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckCPUImpl(executableNetwork, "Eltwise", inFmts, outFmts, selectedType);
}
namespace {
std::vector<ngraph::helpers::InputLayerType> secondaryInputTypes = {
ngraph::helpers::InputLayerType::CONSTANT,
ngraph::helpers::InputLayerType::PARAMETER,
};
std::vector<CommonTestUtils::OpType> opTypes = {
CommonTestUtils::OpType::VECTOR,
};
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
ngraph::helpers::EltwiseTypes::ADD,
ngraph::helpers::EltwiseTypes::MULTIPLY,
// TODO: Disabled because memory formats filter is not propogated through ngraph transformations
// ngraph::helpers::EltwiseTypes::SUBTRACT,
// ngraph::helpers::EltwiseTypes::DIVIDE,
ngraph::helpers::EltwiseTypes::FLOOR_MOD,
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
};
std::map<std::string, std::string> additional_config = {};
std::vector<CPUSpecificParams> filterCPUSpecificParams(std::vector<CPUSpecificParams>& paramsVector) {
auto adjustBlockedFormatByIsa = [](std::vector<cpu_memory_format_t>& formats) {
for (int i = 0; i < formats.size(); i++) {
if (formats[i] == nChw16c)
formats[i] = nChw8c;
if (formats[i] == nCdhw16c)
formats[i] = nCdhw8c;
}
};
if (!with_cpu_x86_avx512f()) {
for (auto& param : paramsVector) {
adjustBlockedFormatByIsa(std::get<0>(param));
adjustBlockedFormatByIsa(std::get<1>(param));
}
}
return paramsVector;
}
std::vector<std::vector<std::vector<size_t>>> inShapes_4D = {
{{2, 4, 4, 1}},
{{2, 17, 5, 4}},
{{2, 17, 5, 4}, {1, 17, 1, 1}},
{{2, 17, 5, 1}, {1, 17, 1, 4}},
};
std::vector<CPUSpecificParams> cpuParams_4D = {
CPUSpecificParams({nChw16c, nChw16c}, {nChw16c}, {}, {}),
CPUSpecificParams({nhwc, nhwc}, {nhwc}, {}, {}),
CPUSpecificParams({nchw, nchw}, {nchw}, {}, {})
};
const auto params_4D_FP32 = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_4D),
::testing::ValuesIn(eltwiseOpTypes),
::testing::ValuesIn(secondaryInputTypes),
::testing::ValuesIn(opTypes),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32, EltwiseLayerCPUTest, params_4D_FP32, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_5D = {
{{2, 4, 3, 4, 1}},
{{2, 17, 7, 5, 4}},
{{2, 17, 6, 5, 4}, {1, 17, 6, 1, 1}},
{{2, 17, 6, 5, 1}, {1, 17, 1, 1, 4}},
};
std::vector<CPUSpecificParams> cpuParams_5D = {
CPUSpecificParams({nCdhw16c, nCdhw16c}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc, ndhwc}, {ndhwc}, {}, {}),
CPUSpecificParams({ncdhw, ncdhw}, {ncdhw}, {}, {})
};
const auto params_5D_FP32 = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_5D),
::testing::ValuesIn(eltwiseOpTypes),
::testing::ValuesIn(secondaryInputTypes),
::testing::ValuesIn(opTypes),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32, EltwiseLayerCPUTest, params_5D_FP32, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_4D_Blocked_Planar = {
{{2, 17, 31, 3}, {2, 1, 31, 3}},
{{2, 17, 5, 1}, {2, 1, 1, 4}},
};
std::vector<CPUSpecificParams> cpuParams_4D_Blocked_Planar = {
CPUSpecificParams({nChw16c, nchw}, {nChw16c}, {}, {}),
};
const auto params_4D_FP32_Blocked_Planar = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_4D_Blocked_Planar),
::testing::ValuesIn(eltwiseOpTypes),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
::testing::ValuesIn(opTypes),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Blocked_Planar)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_Blocked_Planar, EltwiseLayerCPUTest, params_4D_FP32_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_4D_Planar_Blocked = {
{{2, 1, 31, 3}, {2, 17, 31, 3}},
{{2, 1, 1, 4}, {2, 17, 5, 1}},
};
std::vector<CPUSpecificParams> cpuParams_4D_Planar_Blocked = {
CPUSpecificParams({nchw, nChw16c}, {nChw16c}, {}, {}),
};
const auto params_4D_FP32_Planar_Blocked = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_4D_Planar_Blocked),
::testing::ValuesIn(eltwiseOpTypes),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
::testing::ValuesIn(opTypes),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_Planar_Blocked)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_FP32_Planar_Blocked, EltwiseLayerCPUTest, params_4D_FP32_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_5D_Blocked_Planar = {
{{2, 17, 31, 4, 3}, {2, 1, 31, 1, 3}},
{{2, 17, 5, 3, 1}, {2, 1, 1, 3, 4}},
};
std::vector<CPUSpecificParams> cpuParams_5D_Blocked_Planar = {
CPUSpecificParams({nCdhw16c, ncdhw}, {nCdhw16c}, {}, {}),
};
const auto params_5D_FP32_Blocked_Planar = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_5D_Blocked_Planar),
::testing::ValuesIn(eltwiseOpTypes),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
::testing::ValuesIn(opTypes),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Blocked_Planar)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_Blocked_Planar, EltwiseLayerCPUTest, params_5D_FP32_Blocked_Planar, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_5D_Planar_Blocked = {
{{2, 1, 31, 1, 3}, {2, 17, 31, 4, 3}},
{{2, 1, 1, 3, 4}, {2, 17, 5, 3, 1}},
};
std::vector<CPUSpecificParams> cpuParams_5D_Planar_Blocked = {
CPUSpecificParams({ncdhw, nCdhw16c}, {nCdhw16c}, {}, {}),
};
const auto params_5D_FP32_Planar_Blocked = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_5D_Planar_Blocked),
::testing::ValuesIn(eltwiseOpTypes),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
::testing::ValuesIn(opTypes),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_Planar_Blocked)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_FP32_Planar_Blocked, EltwiseLayerCPUTest, params_5D_FP32_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,184 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include <debug.h>
#include <functional_test_utils/layer_test_utils.hpp>
#include <ngraph_functions/builders.hpp>
#include <ie_precision.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/precision_utils.hpp"
#include "functional_test_utils/skip_tests_config.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "ie_system_conf.h"
using namespace CPUTestUtils;
using InferenceEngine::Precision;
using ngraph::helpers::EltwiseTypes;
using FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc;
namespace CPULayerTestsDefinitions {
typedef std::tuple<
std::vector<std::vector<size_t>>, // Input shapes
std::vector<InferenceEngine::Precision>, // Input precisions
std::vector<EltwiseTypes>, // Eltwise operations
bool, // With quantization
std::string // Device name
> EltwiseChainTuple;
class EltwiseChainTest : public testing::WithParamInterface<EltwiseChainTuple>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(const testing::TestParamInfo<EltwiseChainTuple> &obj) {
std::vector<std::vector<size_t>> inputShapes;
std::vector<InferenceEngine::Precision> inputPrecisions;
std::vector<EltwiseTypes> eltwiseOpTypes;
bool withQuantization;
std::string targetName;
std::tie(inputShapes, inputPrecisions, eltwiseOpTypes, withQuantization, targetName) = obj.param;
std::ostringstream results;
for (int i = 0; i < inputShapes.size(); i++) {
results << "IS" << std::to_string(i) << "=" << CommonTestUtils::vec2str(inputShapes[i]) << "_";
}
for (int i = 0; i < inputPrecisions.size(); i++) {
results << "InPRC" << std::to_string(i) << "=" << inputPrecisions[i].name() << "_";
}
for (int i = 0; i < eltwiseOpTypes.size(); i++) {
results << "Op" << std::to_string(i) << "=" << eltwiseOpTypes[i] << "_";
}
results << "WithQuant=" << withQuantization << "_";
results << "targetDevice=" << targetName;
return results.str();
}
protected:
void SetUp() {
threshold = 0.1f;
std::vector<std::vector<size_t>> inputShapes;
std::vector<InferenceEngine::Precision> inputPrecisions;
std::vector<EltwiseTypes> eltwiseOpTypes;
bool withQuantization;
std::tie(inputShapes, inputPrecisions, eltwiseOpTypes, withQuantization, targetDevice) = this->GetParam();
auto ngraphParam = ngraph::builder::makeParams(convertIE2nGraphPrc(inputPrecisions[0]), {inputShapes[0]});
std::vector<std::shared_ptr<ngraph::Node>> ngraphInputs;
for (int i = 1; i < inputPrecisions.size(); i++) {
std::vector<float> ngraphInput1Data(ngraph::shape_size(ngraph::Shape{inputShapes[i]}));
ngraphInputs.push_back(ngraph::builder::makeConstant(convertIE2nGraphPrc(inputPrecisions[i]), ngraph::Shape{inputShapes[i]},
ngraphInput1Data, true));
}
if (withQuantization) {
std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
for (int i = 1; i < eltwiseOpTypes.size() - 1; i++) {
eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
}
std::vector<size_t> constShape(inputShapes[0].size(), 1);
constShape[1] = inputShapes[0][1];
auto fq = ngraph::builder::makeFakeQuantize(eltwiseOps[eltwiseOps.size() - 1],
::ngraph::element::Type(::ngraph::element::Type_t::f32),
256, constShape);
eltwiseOps.push_back(ngraph::builder::makeEltwise(fq, ngraphInputs[eltwiseOpTypes.size() - 1], eltwiseOpTypes[eltwiseOpTypes.size() - 1]));
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain_fq");
} else {
std::vector<std::shared_ptr<ngraph::Node>> eltwiseOps;
eltwiseOps.push_back(ngraph::builder::makeEltwise(ngraphParam[0], ngraphInputs[0], eltwiseOpTypes[0]));
for (int i = 1; i < eltwiseOpTypes.size(); i++) {
eltwiseOps.push_back(ngraph::builder::makeEltwise(eltwiseOps[eltwiseOps.size() - 1], ngraphInputs[i], eltwiseOpTypes[i]));
}
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(eltwiseOps[eltwiseOps.size() - 1])};
function = std::make_shared<ngraph::Function>(results, ngraphParam, "eltwise_chain");
}
}
};
TEST_P(EltwiseChainTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
}
namespace {
std::vector<std::vector<std::vector<size_t>>> inputShapes {
{
{{1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}, {1, 1, 2, 3}},
{{1, 48, 5, 6}, {1, 48, 1, 1}, {1, 48, 5, 6}, {1, 1, 5, 6}},
{{1, 72, 28, 28}, {1, 72, 1, 1}, {1, 72, 1, 1}, {1, 72, 1, 1}},
{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
{{1, 2, 3}, {3}, {3}, {3}},
{{1, 12, 5, 5}, {5, 5}, {12, 5, 5}, {1}},
{{3, 12, 5, 5}, {1, 12, 5, 1}, {3, 1, 1, 1}, {3, 12, 5, 5}},
{{1, 1, 1, 1}, {1, 12, 5, 1}, {3, 12, 1, 5}, {3, 12, 5, 1}},
{{1, 1, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}}
}
};
std::vector<std::vector<InferenceEngine::Precision>> inputPrecisions = {
{ Precision::FP32, Precision::FP32, Precision::FP32, Precision::FP32 },
{ Precision::I32, Precision::I32, Precision::I32, Precision::I32 }
};
std::vector<std::vector<EltwiseTypes>> eltwiseOps = {
{ EltwiseTypes::ADD, EltwiseTypes::MULTIPLY, EltwiseTypes::SUBTRACT },
{ EltwiseTypes::DIVIDE, EltwiseTypes::POWER, EltwiseTypes::ADD },
};
INSTANTIATE_TEST_CASE_P(smoke_EltwiseChain, EltwiseChainTest,
::testing::Combine(
::testing::ValuesIn(inputShapes),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(eltwiseOps),
::testing::Values(false),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
EltwiseChainTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inputShapesFQ {
{
{{1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}, {1, 2, 2, 3}},
{{2, 33, 5, 5}, {2, 33, 5, 5}, {2, 33, 1, 5}, {2, 33, 5, 5}},
{{2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}, {2, 33, 5, 17}},
{{2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}, {2, 33, 5, 256}},
{{2, 5, 7, 5}, {2, 5, 1, 5}, {2, 5, 7, 5}, {2, 5, 7, 5}},
{{2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}, {2, 17, 7, 5}},
{{2, 256, 7, 5}, {2, 256, 7, 5}, {2, 256, 1, 5}, {2, 256, 7, 5}},
{{1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}, {1, 36, 34, 34}},
{{1, 12, 1, 1, 6}, {1, 12, 5, 1, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 1, 1}},
{{1, 12, 1, 1, 6}, {1, 12, 5, 5, 6}, {3, 12, 1, 5, 1}, {3, 12, 5, 5, 1}},
{{1, 12, 1, 1, 1}, {1, 12, 5, 1, 7}, {3, 12, 1, 5, 7}, {3, 12, 5, 1, 7}},
{{1, 7, 1, 1, 12}, {1, 7, 5, 1, 12}, {3, 7, 1, 5, 12}, {3, 7, 5, 1, 12}},
{{1, 7, 1, 1, 12, 3, 7}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 3, 7}, {3, 7, 5, 1, 12, 3, 7}},
{{1, 7, 1, 1, 12, 3, 1}, {1, 7, 5, 1, 12, 3, 7}, {3, 7, 1, 5, 12, 1, 7}, {3, 7, 5, 1, 12, 3, 1}}
}
};
std::vector<std::vector<InferenceEngine::Precision>> inputPrecisionsFQ {
{ Precision::FP32, Precision::FP32, Precision::FP32, Precision::FP32 }
};
INSTANTIATE_TEST_CASE_P(smoke_EltwiseChainWithFQ, EltwiseChainTest,
::testing::Combine(
::testing::ValuesIn(inputShapesFQ),
::testing::ValuesIn(inputPrecisionsFQ),
::testing::ValuesIn(eltwiseOps),
::testing::Values(true),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
EltwiseChainTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -93,10 +93,25 @@ void EltwiseLayerTest::SetUp() {
FAIL() << "Unsupported Secondary operation type";
}
auto secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shape_input_secondary);
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
input.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
std::shared_ptr<ngraph::Node> secondaryInput;
if (eltwiseType == ngraph::helpers::EltwiseTypes::DIVIDE ||
eltwiseType == ngraph::helpers::EltwiseTypes::FLOOR_MOD ||
eltwiseType == ngraph::helpers::EltwiseTypes::MOD) {
std::vector<float> data(ngraph::shape_size(shape_input_secondary));
data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(shape_input_secondary));
for (float &i : data) {
if (i == 0) {
i = 1;
}
}
secondaryInput = ngraph::builder::makeConstant(ngPrc, shape_input_secondary, data);
} else {
secondaryInput = ngraph::builder::makeInputLayer(ngPrc, secondaryInputType, shape_input_secondary);
if (secondaryInputType == ngraph::helpers::InputLayerType::PARAMETER) {
input.push_back(std::dynamic_pointer_cast<ngraph::opset3::Parameter>(secondaryInput));
}
}
auto eltwise = ngraph::builder::makeEltwise(input[0], secondaryInput, eltwiseType);
function = std::make_shared<ngraph::Function>(eltwise, input, "Eltwise");
}

View File

@ -564,6 +564,9 @@ std::ostream& operator<<(std::ostream & os, ngraph::helpers::EltwiseTypes type)
case ngraph::helpers::EltwiseTypes::FLOOR_MOD:
os << "FloorMod";
break;
case ngraph::helpers::EltwiseTypes::MOD:
os << "Mod";
break;
default:
throw std::runtime_error("NOT_SUPPORTED_OP_TYPE");
}

View File

@ -261,655 +261,6 @@ std::string select_op(eltwise_test_params::opType op) {
return str_op;
}
class MKLDNNGraphEltwise3InputsTests: public TestsCommon,
public WithParamInterface<eltwise_test_params> {
std::string model_t = R"V0G0N(
<net name="EltwiseOnly" version="3" precision="FP32" batch="1">
<layers>
<layer name="in1" type="Input" precision="FP32" id="1">
<output>
<port id="1">__SRC_DIMS_1__
</port>
</output>
</layer>
<layer name="in2" type="Input" precision="FP32" id="2">
<output>
<port id="2">__SRC_DIMS_2__
</port>
</output>
</layer>
<layer name="in3" type="Input" precision="FP32" id="3">
<output>
<port id="3">__SRC_DIMS_3__
</port>
</output>
</layer>
<layer name="con" id="4" type="Eltwise" precision="FP32">
<data operation="_OP_" _COEFF_/>
<input>
<port id="1">__SRC_DIMS_1__
</port>
<port id="2">__SRC_DIMS_2__
</port>
<port id="3">__SRC_DIMS_3__
</port>
</input>
<output>
<port id="4">__SRC_DIMS__
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="1" from-port="1" to-layer="4" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="4" to-port="2"/>
<edge from-layer="3" from-port="3" to-layer="4" to-port="3"/>
</edges>
</net>
)V0G0N";
protected:
std::string getModel(eltwise_test_params p) {
std::string model = model_t;
std::string op = select_op(p.op);
std::string src_dims1;
for (auto &dim : p.dims1) {
src_dims1 += "\n <dim>";
src_dims1 += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
std::string src_dims2;
for (auto &dim : p.dims2) {
src_dims2 += "\n <dim>";
src_dims2 += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS_2__", src_dims2);
std::string src_dims3;
for (auto &dim : p.dims3) {
src_dims3 += "\n <dim>";
src_dims3 += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS_3__", src_dims3);
std::string src_dims;
std::vector<size_t> dims = p.dims1;
for (int i = 0; i < dims.size(); i++) {
dims[i] = std::max(p.dims1[i], p.dims2[i]);
dims[i] = std::max(dims[i], p.dims3[i]);
}
for (auto &dim : dims) {
src_dims += "\n <dim>";
src_dims += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
std::string scale;
if (!p.scales.empty()) {
scale = std::string("coeff=\"") + to_string_c_locale(p.scales) + std::string("\"");
}
REPLACE_WITH_STR(model, "_OP_", op);
REPLACE_WITH_STR(model, "_COEFF_", scale);
return model;
}
virtual void TearDown() {
}
virtual void SetUp() {
try {
TestsCommon::SetUp();
eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
std::string model = getModel(p);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
MKLDNNGraphTestClass graph;
graph.CreateGraph(network);
auto& nodes = graph.getNodes();
for (int i = 0; i < nodes.size(); i++) {
if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
}
ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
}
}
InferenceEngine::SizeVector dims_src1 = p.dims1;
InferenceEngine::Layout layout1 = InferenceEngine::ANY;
switch (p.dims1.size()) {
case 4:
layout1 = InferenceEngine::NCHW;
break;
case 5:
layout1 = InferenceEngine::NCDHW;
break;
}
InferenceEngine::SizeVector dims_src2 = p.dims2;
InferenceEngine::Layout layout2 = InferenceEngine::ANY;
switch (p.dims2.size()) {
case 4:
layout2 = InferenceEngine::NCHW;
break;
case 5:
layout2 = InferenceEngine::NCDHW;
break;
}
InferenceEngine::SizeVector dims_src3 = p.dims3;
InferenceEngine::Layout layout3 = InferenceEngine::ANY;
switch (p.dims3.size()) {
case 4:
layout3 = InferenceEngine::NCHW;
break;
case 5:
layout3 = InferenceEngine::NCDHW;
break;
}
InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout1});
src1->allocate();
InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
if (srcPtr1 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
CommonTestUtils::fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout2});
src2->allocate();
InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
if (srcPtr2 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
CommonTestUtils::fill_data_sine(src2->buffer(), src2->size(), 0.1, 0.9, 2);
InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, layout3});
src3->allocate();
InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
if (srcPtr3 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
CommonTestUtils::fill_data_sine(src3->buffer(), src3->size(), 0.1, 0.9, 3);
InferenceEngine::BlobMap srcs;
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
InferenceEngine::OutputsDataMap out;
out = network.getOutputsInfo();
InferenceEngine::BlobMap outputBlobs;
std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
InferenceEngine::TBlob<float>::Ptr output;
output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
output->allocate();
outputBlobs[item.first] = output;
graph.Infer(srcs, outputBlobs);
InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
dst_ref.allocate();
std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2, *srcPtr3};
ref_eltwise(src_vec, dst_ref, p);
compare(*output, dst_ref, 0.0005f);
} catch (const InferenceEngine::details::InferenceEngineException &e) {
FAIL() << e.what();
}
}
};
TEST_P(MKLDNNGraphEltwise3InputsTests, TestsEltwise) {}
INSTANTIATE_TEST_CASE_P(
TestsEltwise, MKLDNNGraphEltwise3InputsTests,
::testing::Values(
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(3, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
}
} },
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(3, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
}
} },
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(3, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
}
} },
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(3, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
}
} },
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(3, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
}
} },
eltwise_test_params{{1, 32, 16, 16, 16},{1, 32, 16, 16, 16},{1, 32, 16, 16, 16}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
ASSERT_EQ(3, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(1).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(2).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
}
} },
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
));
class MKLDNNGraphEltwise2InputsTests: public TestsCommon,
public WithParamInterface<eltwise_test_params> {
std::string model_t = R"V0G0N(
<net name="EltwiseOnly" version="2" precision="FP32">
<layers>
<layer name="in1" type="Input" precision="FP32" id="1">
<output>
<port id="1">__SRC_DIMS_1__
</port>
</output>
</layer>
<layer name="in2" type="Input" precision="FP32" id="2">
<output>
<port id="2">__SRC_DIMS_2__
</port>
</output>
</layer>
<layer name="con" id="3" type="Eltwise" precision="FP32">
<data operation="_OP_" _COEFF_/>
<input>
<port id="1">__SRC_DIMS_1__
</port>
<port id="2">__SRC_DIMS_2__
</port>
</input>
<output>
<port id="3">__SRC_DIMS__
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
<edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
</edges>
</net>
)V0G0N";
protected:
std::string getModel(eltwise_test_params p) {
std::string model = model_t;
std::string op = select_op(p.op);
std::string src_dims1 = "";
for (auto &dim : p.dims1) {
src_dims1 += "\n <dim>";
src_dims1 += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
std::string src_dims2 = "";
for (auto &dim : p.dims2) {
src_dims2 += "\n <dim>";
src_dims2 += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS_2__", src_dims2);
std::string src_dims;
std::vector<size_t> dims = (p.dims1.size() >= p.dims2.size()) ? p.dims1 : p.dims2;
int i = dims.size() - 1, j = p.dims1.size() - 1, k = p.dims2.size() - 1;
for (; j >= 0 && k >= 0; i--, j--, k-- ) {
dims[i] = std::max(p.dims1[j], p.dims2[k]);
}
for (auto &dim : dims) {
src_dims += "\n <dim>";
src_dims += std::to_string(dim) + "</dim>";
}
REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
std::string scale;
if (!p.scales.empty()) {
scale = std::string("coeff=\"") + to_string_c_locale(p.scales) + std::string("\"");
}
REPLACE_WITH_STR(model, "_OP_", op);
REPLACE_WITH_STR(model, "_COEFF_", scale);
return model;
}
virtual void TearDown() {
}
virtual void SetUp() {
try {
TestsCommon::SetUp();
eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
std::string model = getModel(p);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
MKLDNNGraphTestClass graph;
graph.CreateGraph(network);
auto& nodes = graph.getNodes();
for (int i = 0; i < nodes.size(); i++) {
if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
}
ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
}
}
InferenceEngine::SizeVector dims_src1 = p.dims1;
InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, InferenceEngine::TensorDesc::getLayoutByDims(p.dims1) });
src1->allocate();
InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
if (srcPtr1 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
CommonTestUtils::fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
InferenceEngine::SizeVector dims_src2 = p.dims2;
InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, InferenceEngine::TensorDesc::getLayoutByDims(p.dims2) });
src2->allocate();
InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
if (srcPtr2 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
CommonTestUtils::fill_data_sine(src2->buffer(), src2->size(), 0.1, 0.9, 2);
InferenceEngine::BlobMap srcs;
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
InferenceEngine::OutputsDataMap out;
out = network.getOutputsInfo();
InferenceEngine::BlobMap outputBlobs;
std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
InferenceEngine::TBlob<float>::Ptr output;
output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
output->allocate();
outputBlobs[item.first] = output;
graph.Infer(srcs, outputBlobs);
InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
dst_ref.allocate();
std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2};
ref_eltwise(src_vec, dst_ref, p);
compare(*output, dst_ref, 0.0005f);
} catch (const InferenceEngine::details::InferenceEngineException &e) {
FAIL() << e.what();
}
}
};
TEST_P(MKLDNNGraphEltwise2InputsTests, TestsEltwise) {}
INSTANTIATE_TEST_CASE_P(
TestsEltwise, MKLDNNGraphEltwise2InputsTests,
::testing::Values(
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Squared_diff, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Less, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Less_equal, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Greater, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Greater_equal, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Equal, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Not_equal, "", 3, MKLDNNPlugin::impl_desc_type::ref}
));
INSTANTIATE_TEST_CASE_P(
TestsBroadcasting, MKLDNNGraphEltwise2InputsTests,
::testing::Values(
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Prod, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Max, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Min, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Sub, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Div, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Squared_diff, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_AND, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_OR, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_XOR, "", 1, MKLDNNPlugin::impl_desc_type::ref},
// batch broadcasting
eltwise_test_params{{1, 3, 224},{224, 3, 1},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{2, 3, 1, 2},{1, 3, 2, 1},{}, eltwise_test_params::opType::Sub, "", 1, MKLDNNPlugin::impl_desc_type::ref}
));
INSTANTIATE_TEST_CASE_P(
TestsDiffDims, MKLDNNGraphEltwise2InputsTests,
::testing::Values(
eltwise_test_params{{},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3},{3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3},{3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3, 3},{},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref}
));
class MKLDNNGraphEltwiseDynBatchTests: public MKLDNNGraphEltwise3InputsTests {
protected:
virtual void SetUp() {
try {
TestsCommon::SetUp();
eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
std::string model = getModel(p);
size_t MB = p.dims1[0];
if (MB < 2)
MB = 2;
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
auto implNet = dynamic_cast<InferenceEngine::details::CNNNetworkImpl *>(&((InferenceEngine::ICNNNetwork&)network));
ASSERT_NE(nullptr, implNet) << "Failed to cast ICNNNetwork to CNNNetworkImpl";
InferenceEngine::ResponseDesc resp;
InferenceEngine::StatusCode sts = implNet->setBatchSizeReshape(MB, &resp);
ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg;
MKLDNNGraphTestClass graph;
graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
graph.CreateGraph(network);
InferenceEngine::SizeVector dims_src1 = p.dims1;
InferenceEngine::Layout layout1 = InferenceEngine::ANY;
switch (p.dims1.size()) {
case 4:
layout1 = InferenceEngine::NCHW;
break;
case 5:
layout1 = InferenceEngine::NCDHW;
break;
}
InferenceEngine::SizeVector dims_src2 = p.dims2;
InferenceEngine::Layout layout2 = InferenceEngine::ANY;
switch (p.dims2.size()) {
case 4:
layout2 = InferenceEngine::NCHW;
break;
case 5:
layout2 = InferenceEngine::NCDHW;
break;
}
InferenceEngine::SizeVector dims_src3 = p.dims3;
InferenceEngine::Layout layout3 = InferenceEngine::ANY;
switch (p.dims3.size()) {
case 4:
layout3 = InferenceEngine::NCHW;
break;
case 5:
layout3 = InferenceEngine::NCDHW;
break;
}
InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src1, layout1});
src1->allocate();
InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
if (srcPtr1 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
fill_data(src1->buffer(), src1->size());
InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src2, layout2});
src2->allocate();
InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
if (srcPtr2 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
fill_data(src2->buffer(), src2->size());
InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float>({InferenceEngine::Precision::FP32, dims_src3, layout3});
src3->allocate();
InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
if (srcPtr3 == nullptr)
FAIL() << "Cannot cast blob to TBlob<float>.";
fill_data(src3->buffer(), src3->size());
InferenceEngine::BlobMap srcs;
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in3", src3));
InferenceEngine::OutputsDataMap out;
out = network.getOutputsInfo();
InferenceEngine::BlobMap outputBlobs;
std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
InferenceEngine::TBlob<float>::Ptr output;
output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
output->allocate();
outputBlobs[item.first] = output;
auto checkDepthwise = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
return node->getType() == MKLDNNPlugin::Eltwise;
};
graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkDepthwise);
graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkDepthwise);
} catch (const InferenceEngine::details::InferenceEngineException &e) {
FAIL() << e.what();
}
}
};
TEST_P(MKLDNNGraphEltwiseDynBatchTests, TestsDynBatchEltwise) {}
// TODO: rewrite to ngraph to have reshape functionality
INSTANTIATE_TEST_CASE_P(
DISABLED_TestsDynBatchEltwise, MKLDNNGraphEltwiseDynBatchTests,
::testing::Values(
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Pow, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
));
struct precisions_test_2params {
struct {
std::string precision0;
@ -1022,7 +373,7 @@ INSTANTIATE_TEST_CASE_P(
TestsEltwise2Precisions, MKLDNNGraphEltwise2PrecisionsTests,
::testing::Values(
precisions_test_2params{ {"FP32", "FP32"}, 4, 0 },
precisions_test_2params{ { "U8", "FP32"}, 5, 1 },
precisions_test_2params{ {"FP32", "U8"}, 5, 1 },
precisions_test_2params{ { "U8", "U8"}, 6, 2 }
precisions_test_2params{ { "U8", "FP32"}, 4, 0 },
precisions_test_2params{ {"FP32", "U8"}, 4, 0 },
precisions_test_2params{ { "U8", "U8"}, 4, 0 }
));

View File

@ -116,13 +116,12 @@ protected:
graph.CreateGraph(network);
auto& nodes = graph.getNodes();
for (int i = 0; i < nodes.size(); i++) {
if (nodes[i]->getType() == MKLDNNPlugin::Power) {
if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
}
ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
}
}
@ -174,25 +173,16 @@ INSTANTIATE_TEST_CASE_P(
power_test_params{
{1, 3, 13, 13}, 1, 2, 0.5f, 3, MKLDNNPlugin::impl_desc_type::unknown, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
}}},
power_test_params{{1, 1, 23, 23}, 3, 8, 2, 3 },
power_test_params{{1, 8, 23, 23}, 8, 2, 1, 3 },
@ -306,7 +296,7 @@ protected:
outputBlobs[item.first] = output;
auto checkPower = [](const MKLDNNPlugin::MKLDNNNodePtr& node) {
return node->getType() == MKLDNNPlugin::Power;
return node->getType() == MKLDNNPlugin::Eltwise;
};
graph.checkDynBatch(srcs, outputBlobs, MB, MB, checkPower);
graph.checkDynBatch(srcs, outputBlobs, 1, MB, checkPower);
@ -325,25 +315,16 @@ INSTANTIATE_TEST_CASE_P(
power_test_params{
{1, 3, 13, 13}, 1, 2, 0.5f, 3, MKLDNNPlugin::impl_desc_type::unknown, {
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
},
[](MKLDNNPlugin::PrimitiveDescInfo impl) {
ASSERT_EQ(MKLDNNPlugin::impl_desc_type::unknown, impl.getImplementationType());
ASSERT_EQ(1, impl.getConfig().inConfs.size());
ASSERT_EQ(1, impl.getConfig().outConfs.size());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().inConfs.at(0).desc.getLayout());
ASSERT_EQ(InferenceEngine::Layout::BLOCKED, impl.getConfig().outConfs.at(0).desc.getLayout());
}}},
power_test_params{{1, 1, 23, 23}, 3, 8, 2, 3 },
power_test_params{{1, 8, 23, 23}, 8, 2, 1, 3 },

View File

@ -257,14 +257,14 @@ protected:
ASSERT_EQ(nodes.size(), 3);
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
} else {
ASSERT_EQ(nodes.size(), 5);
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution);
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
}

View File

@ -186,10 +186,9 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReorders) {
for (auto &node : nodes) {
if (node->getType() == MKLDNNPlugin::Reorder) {
reorders_num++;
ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
}
}
ASSERT_EQ(reorders_num, 1);
ASSERT_EQ(reorders_num, 3);
}
TEST_F(MKLDNNGraphStructureTests, TestRedundantReorderBeforeConvWithC_3) {
@ -3781,7 +3780,7 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForXceptionTopology) {
weights->allocate();
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
@ -4020,7 +4019,7 @@ TEST_F(MKLDNNGraphStructureTests, TestFailedPartPlateRecognitionBarrier0001) {
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
@ -4629,7 +4628,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionDWConvolutionSumFusing) {
memset((float *) weights->buffer(), 0, weights->size());
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
network = core.ReadNetwork(model, weights_ptr);
@ -5127,7 +5126,7 @@ TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWithConcat) {
weights->allocate();
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
@ -5412,7 +5411,7 @@ TEST_F(MKLDNNGraphStructureTests, TestRefPoolingWithConcat) {
weights->allocate();
fill_data((float *) weights->buffer(), weights->size() / sizeof(float));
InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));
@ -5566,7 +5565,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2DepthwiseOpFusing) {
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution);
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
ASSERT_TRUE(nodes[2].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
@ -5704,7 +5703,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConvolutionWith2EltwiseOpFusing) {
ASSERT_EQ(nodes.size(), 4);
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Activation));
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Output);
@ -5846,7 +5845,7 @@ TEST_F(MKLDNNGraphStructureTests, TestGemmConvolutionWith2DepthwiseOpFusing) {
ASSERT_EQ(nodes.size(), 3);
ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution);
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Depthwise));
ASSERT_TRUE(nodes[1].get()->isFusedWith(MKLDNNPlugin::Type::Eltwise));
ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
InferenceEngine::TensorDesc src_desc(InferenceEngine::Precision::FP32, {1, 8, 300, 600}, InferenceEngine::NCHW);

View File

@ -27,6 +27,7 @@
#include <nodes/mkldnn_input_node.h>
#include <functional>
#include <cmath>
#include <legacy/details/ie_cnn_network_tools.h>
#define GARB_VAL(x) ((x + 100.0f + sin(x)) / (x + 150.f))
@ -212,13 +213,66 @@ public:
return graphNodes;
}
void MoveInternalBlobsToConstLayers(InferenceEngine::details::CNNNetworkImpl* netImpl) {
auto createConstInputTo = [&](InferenceEngine::CNNLayerPtr layer, InferenceEngine::Blob::Ptr blob, std::string name) {
InferenceEngine::LayerParams attrs = {layer.get()->name + "_const_" + name, "Const", InferenceEngine::Precision::FP32};
auto constLayer = std::make_shared<InferenceEngine::CNNLayer>(attrs);
constLayer->blobs["custom"] = blob;
std::vector<size_t> constDims(layer->insData[0].lock()->getDims().size(), 1);
if (constDims.size() > 1)
constDims[1] = blob.get()->size();
else
constDims[0] = blob.get()->size();
const InferenceEngine::TensorDesc& td = {InferenceEngine::Precision::FP32, constDims, InferenceEngine::TensorDesc::getLayoutByDims(constDims)};
InferenceEngine::DataPtr newEdgeAfterLayer(new InferenceEngine::Data(constLayer->name, td));
newEdgeAfterLayer->setName(constLayer->name);
getCreatorLayer(newEdgeAfterLayer) = constLayer;
getInputTo(newEdgeAfterLayer).clear();
netImpl->addData(constLayer->name.c_str(), newEdgeAfterLayer);
IE_SUPPRESS_DEPRECATED_START
netImpl->addLayer(constLayer);
IE_SUPPRESS_DEPRECATED_END
constLayer->outData.push_back(newEdgeAfterLayer);
getInputTo(newEdgeAfterLayer)[layer->name] = layer;
layer->insData.push_back(newEdgeAfterLayer);
};
auto all_layers = InferenceEngine::details::CNNNetSortTopologically(*netImpl);
for (auto &layer : all_layers) {
if (layer->type == "ScaleShift" && layer->insData.size() == 1) {
InferenceEngine::Blob::Ptr scalesBlob = layer->blobs["weights"];
if (scalesBlob != nullptr)
createConstInputTo(layer, scalesBlob, "weights");
InferenceEngine::Blob::Ptr shiftBlob = layer->blobs["biases"];
if (shiftBlob != nullptr)
createConstInputTo(layer, shiftBlob, "biases");
} else if (layer->type == "PReLU" && layer->insData.size() == 1) {
InferenceEngine::Blob::Ptr scalesBlob = layer->blobs["weights"];
if (scalesBlob != nullptr)
createConstInputTo(layer, scalesBlob, "weights");
}
}
}
void CreateGraph(InferenceEngine::ICNNNetwork &network, const MKLDNNPlugin::MKLDNNExtensionManager::Ptr& extMgr,
MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache = {}) {
if (network.getFunction()) {
auto convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
MoveInternalBlobsToConstLayers(convertedNetwork.get());
MKLDNNGraph::CreateGraph(static_cast<InferenceEngine::ICNNNetwork&>(*convertedNetwork),
extMgr, cache);
extMgr, cache);
} else {
InferenceEngine::details::CNNNetworkImpl* netImpl = dynamic_cast<InferenceEngine::details::CNNNetworkImpl*>(&network);
if (netImpl == nullptr) {
THROW_IE_EXCEPTION << "unexpected network type";
}
MoveInternalBlobsToConstLayers(netImpl);
MKLDNNGraph::CreateGraph(network, extMgr, cache);
}
}
@ -227,9 +281,15 @@ public:
MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
if (network.getFunction()) {
auto convertedNetwork = std::make_shared<InferenceEngine::details::CNNNetworkImpl>(network);
MoveInternalBlobsToConstLayers(convertedNetwork.get());
MKLDNNGraph::CreateGraph(static_cast<InferenceEngine::ICNNNetwork&>(*convertedNetwork),
extensionManager, cache);
} else {
InferenceEngine::details::CNNNetworkImpl* netImpl = dynamic_cast<InferenceEngine::details::CNNNetworkImpl*>(&network);
if (netImpl == nullptr) {
THROW_IE_EXCEPTION << "unexpected network type";
}
MoveInternalBlobsToConstLayers(netImpl);
MKLDNNGraph::CreateGraph(network, extensionManager, cache);
}
}

@ -1 +1 @@
Subproject commit 4b239023043318899e1c0a3b79158a68b7efe6e4
Subproject commit d7d8ed46078b637794bc91215e1a982bb0f1683a

View File

@ -115,11 +115,6 @@ xfail_issue_38084 = xfail_test(reason="RuntimeError: AssertionFailed: layer->get
xfail_issue_38085 = xfail_test(reason="RuntimeError: Interpolate operation should be converted to Interp")
xfail_issue_38086 = xfail_test(reason="RuntimeError: Quantize layer input '<value>' doesn't have blobs")
xfail_issue_38087 = xfail_test(reason="RuntimeError: Cannot cast to tensor desc. Format is unsupported!")
xfail_issue_38088 = xfail_test(reason="RuntimeError: Check '((axis >= axis_range_min) && "
"(axis <= axis_range_max))' failed at "
"/openvino/ngraph/core/src/validation_util.cpp:913: "
"Split Parameter axis <value> out of the tensor rank range <value>.")
xfail_issue_38089 = xfail_test(reason="RuntimeError: Node 2 contains empty child edge for index 0")
xfail_issue_38090 = xfail_test(reason="AssertionError: Items types are not equal")
xfail_issue_38091 = xfail_test(reason="AssertionError: Mismatched elements")
xfail_issue_38699 = xfail_test(reason="RuntimeError: nGraph does not support the following ONNX operations:"

View File

@ -22,7 +22,6 @@ from tests import (xfail_issue_34323,
skip_segfault,
xfail_issue_34327,
xfail_issue_36485,
xfail_issue_35923,
xfail_issue_36486,
xfail_issue_34314,
xfail_issue_36487)
@ -418,7 +417,6 @@ def test_grn_operator():
assert np.allclose(result, expected)
@xfail_issue_35923
def test_prelu_operator():
runtime = get_runtime()

View File

@ -38,7 +38,6 @@ from tests import (BACKEND_NAME,
xfail_issue_33616,
xfail_issue_38086,
xfail_issue_38087,
xfail_issue_35923,
xfail_issue_36483,
xfail_issue_34323,
xfail_issue_35915,
@ -46,8 +45,6 @@ from tests import (BACKEND_NAME,
xfail_issue_36476,
xfail_issue_36478,
xfail_issue_36437,
xfail_issue_38088,
xfail_issue_38089,
xfail_issue_38090,
xfail_issue_38091,
xfail_issue_35929,
@ -220,9 +217,6 @@ tests_expected_to_fail = [
"OnnxBackendNodeModelTest.test_quantizelinear_cpu"),
(xfail_issue_38087,
"OnnxBackendNodeModelTest.test_convtranspose_1d_cpu"),
(xfail_issue_35923,
"OnnxBackendNodeModelTest.test_prelu_broadcast_cpu",
"OnnxBackendNodeModelTest.test_prelu_example_cpu"),
(xfail_issue_36483,
"OnnxBackendNodeModelTest.test_ceil_cpu",
"OnnxBackendNodeModelTest.test_ceil_example_cpu"),
@ -286,10 +280,6 @@ tests_expected_to_fail = [
"OnnxBackendNodeModelTest.test_argmin_keepdims_example_select_last_index_cpu",
"OnnxBackendNodeModelTest.test_argmin_keepdims_random_select_last_index_cpu",
"OnnxBackendNodeModelTest.test_pow_types_float32_uint32_cpu"),
(xfail_issue_38088,
"OnnxBackendPyTorchConvertedModelTest.test_GLU_cpu"),
(xfail_issue_38089,
"OnnxBackendPyTorchConvertedModelTest.test_GLU_dim_cpu"),
(xfail_issue_38090,
"OnnxBackendNodeModelTest.test_where_long_example_cpu",
"OnnxBackendNodeModelTest.test_mod_int64_fmod_cpu",

View File

@ -18,7 +18,6 @@ import onnx
import pytest
from tests.test_onnx.utils import run_node
from tests import xfail_issue_35915
@pytest.mark.parametrize(
@ -27,9 +26,9 @@ from tests import xfail_issue_35915
pytest.param("And", np.logical_and, np.bool),
pytest.param("Or", np.logical_or, np.bool),
pytest.param("Xor", np.logical_xor, np.bool),
pytest.param("Equal", np.equal, np.int32, marks=xfail_issue_35915),
pytest.param("Greater", np.greater, np.int32, marks=xfail_issue_35915),
pytest.param("Less", np.less, np.int32, marks=xfail_issue_35915),
pytest.param("Equal", np.equal, np.int32),
pytest.param("Greater", np.greater, np.int32),
pytest.param("Less", np.less, np.int32),
],
)
def test_logical(onnx_op, numpy_func, data_type):

View File

@ -18,7 +18,7 @@ import onnx
import pytest
from tests.test_onnx.utils import run_node
from tests import xfail_issue_35918, xfail_issue_35923, xfail_issue_35924
from tests import xfail_issue_35918, xfail_issue_35924
def import_and_compute(op_type, input_data, **node_attrs):
@ -71,7 +71,6 @@ def test_leaky_relu():
assert_onnx_import_equals_callable("LeakyRelu", leaky_relu, [[-3, -2, -1], [1, 2, 3]])
@xfail_issue_35923
@pytest.mark.parametrize(
"x, slope",
[