[CPU] MVN, FQ, Convert dynamic nodes (#7817)

This commit is contained in:
Maxim Andronov 2021-10-28 10:52:14 +03:00 committed by GitHub
parent 6908023a42
commit 6416b73855
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1085 additions and 536 deletions

View File

@ -486,7 +486,7 @@ void MKLDNNGraph::InitEdges() {
std::string convertName = edge->getParent()->getName() + "_" +
inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape().getStaticDims(), inDesc.getPrecision(), outDesc.getPrecision(),
auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(),
convertName, this->getEngine(), this->weightsCache);
convertNode->setDescs(inDesc, outDesc);
InsertNode(edge, convertNode, true);

View File

@ -1609,7 +1609,30 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph
std::vector<float> scalesBuffer;
std::vector<float> shiftsBuffer;
parent->fillScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get(), scalesBuffer, shiftsBuffer, 1);
auto parentEltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(parent);
if (!parentEltwise) {
IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node";
}
std::tie(scalesBuffer, shiftsBuffer) = parentEltwise->getScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get());
const auto &outputShape = child->getOutputShapeAtPort(0);
VectorDims outputDims = outputShape.getDims();
const size_t channelPos = outputDims.size() > 1 ? 1 : 0;
if (outputShape.isDynamic()) {
if (outputDims[channelPos] == Shape::UNDEFINED_DIM) {
if (scalesBuffer.size() > 1) {
outputDims[channelPos] = scalesBuffer.size();
} else if (shiftsBuffer.size() > 1) {
outputDims[channelPos] = shiftsBuffer.size();
} else {
return false;
}
}
}
scalesBuffer = makeAlignedBuffer(outputDims[channelPos], scalesBuffer, 1);
shiftsBuffer = makeAlignedBuffer(outputDims[channelPos], shiftsBuffer, 1);
for (int i = 0; i < scalesBuffer.size(); i++)
if (scalesBuffer[i] == 0.f)

View File

@ -503,8 +503,9 @@ void MKLDNNNode::execute(mkldnn::stream strm) {
}
void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
if (needShapeInfer())
if (needShapeInfer()) {
redefineOutputMemory(shapeInfer());
}
if (needPrepareParams()) {
IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
" since the input shapes are not defined.";
@ -1045,7 +1046,7 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
}
}
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
}
@ -1192,7 +1193,7 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
if (i == fusingPort)
continue;
auto& weightShape = getInputShapeAtPort(i).getDims();
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
return false;
}
return true;
@ -1213,6 +1214,66 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|| isConvertablePowerStatic();
}
std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
std::vector<float> scales, shifts;
const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
auto constBlob = constInputNode->getMemoryPtr();
const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
buffer.resize(elementsCount);
cpu_convert(constBlob->GetPtr(),
&buffer[0],
MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
Precision::FP32,
elementsCount);
};
const auto constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
} else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
} else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
} else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
if (!power) {
IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
}
scales.push_back(power->getBeta());
shifts.push_back(power->getGamma());
} else {
IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
}
switch (getAlgorithm()) {
case EltwiseAdd: {
scales.resize(shifts.size(), 1.0f);
break;
}
case EltwiseSubtract: {
scales.resize(shifts.size(), 1.0f);
std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
break;
}
case EltwiseMultiply: {
shifts.resize(scales.size(), 0.0f);
break;
}
case EltwiseDivide: {
shifts.resize(scales.size(), 0.0f);
std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
break;
}
default: break;
}
return {scales, shifts};
}
bool MKLDNNNode::inputShapesDefined() const {
for (size_t i = 0; i < getParentEdges().size(); i++) {
if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined())
@ -1307,86 +1368,6 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
return false;
}
void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, int align) {
scales.clear();
shifts.clear();
const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
auto constBlob = constInputNode->getMemoryPtr();
const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
buffer.resize(elementsCount);
cpu_convert(constBlob->GetPtr(),
&buffer[0],
MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
Precision::FP32,
elementsCount);
};
const size_t constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
} else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
} else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
} else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
if (!power) {
IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
}
scales.push_back(power->getBeta());
shifts.push_back(power->getGamma());
} else {
IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
}
const size_t bufferSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
if (align == -1) {
align = bufferSize;
}
const size_t bufferSizeAligned = rnd_up(bufferSize, static_cast<size_t>(align));
size_t initSize = scales.size();
if (initSize > 0) {
scales.resize(bufferSizeAligned, 0);
if (initSize == 1) {
std::fill(scales.begin() + 1, scales.begin() + bufferSize, scales[0]);
}
}
initSize = shifts.size();
if (initSize > 0) {
shifts.resize(bufferSizeAligned, 0);
if (initSize == 1) {
std::fill(shifts.begin() + 1, shifts.begin() + bufferSize, shifts[0]);
}
}
switch (getAlgorithm()) {
case EltwiseAdd: {
scales.resize(bufferSizeAligned, 1.0f);
break;
}
case EltwiseSubtract: {
scales.resize(bufferSizeAligned, 1.0f);
std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
break;
}
case EltwiseMultiply: {
shifts.resize(bufferSizeAligned, 0.0f);
break;
}
case EltwiseDivide: {
shifts.resize(bufferSizeAligned, 0.0f);
std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
break;
}
default: break;
}
}
void MKLDNNNode::createShapeInferSubgraph(const std::shared_ptr<ngraph::Node>& op) {
ngraph::OutputVector inputsForShapeInfer;
for (size_t i = 0; i < inputShapes.size(); i++) {

View File

@ -556,10 +556,18 @@ public:
return outputShapes[port];
}
/**
* @brief Return scales and shift if nodes can be executed as ScaleShift, else raise exception
* If node has only scale or shift value, fill missing value with default values
* i.e. EltwiseAdd: fill shifts from constant, fill scales with default values = 1.0f
* @param parentNode
* node from which data comes
* @return pair of scales and shifts
*/
std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const MKLDNNNode *parentNode) const;
protected:
bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
// TODO [mandrono]: place outside of the node API
void fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, const int align = -1);
void setType(Type type) {
this->type = type;
@ -578,7 +586,7 @@ protected:
* Seed node should call this routine and pass its post operations list as parameter.
* @param ops List of fused post operations
*/
virtual void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false);
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() const { return nullptr; }
typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>

View File

@ -19,6 +19,7 @@
#include "cpu/x64/cpu_isa_traits.hpp"
#include "utils/general_utils.h"
#include <ngraph/opsets/opset1.hpp>
#include "utils/cpu_utils.hpp"
// WA for xbyak.h
#ifdef _WIN32
@ -1127,16 +1128,19 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
for (auto &node : fusedWith) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
if (eltwiseNode->isSpecialConvolutionAddFusing())
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
ops.append_sum(1.0);
else
eltwiseNode->appendPostOps(ops);
} else {
// TODO [DS]: change to shape from memory
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
}
continue;
}
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
fakeQuantizeNode->appendPostOps(ops);
fakeQuantizeNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
continue;
}

View File

@ -355,7 +355,7 @@ void MKLDNNConcatNode::createPrimitive() {
IE_THROW() << "Source memory from " << parent->getName() << " didn't allocate for node "
<< getName() << ".";
}
// DnnlBlockedMemoryDesc
auto desc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
auto& dims = getInputShapeAtPort(i).getStaticDims();
for (size_t j = 0; j < dims.size(); j++) {

View File

@ -20,6 +20,7 @@
#include "common/cpu_convert.h"
#include <memory_desc/cpu_memory_desc_utils.h>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -338,7 +339,8 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
} else {
eltwiseNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
if (initBinaryMemory) {
if (eltwiseNode->scalesMemory)
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
@ -351,7 +353,9 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
fakeQuantizeNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
constexpr int align = -1;
// no need to fill post ops dims for fq, make sense only for bin fq
fakeQuantizeNode->appendPostOps(ops, VectorDims{}, align, initAsBinary, initBinaryMemory);
if (initBinaryMemory) {
if (fakeQuantizeNode->cropHighMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());

View File

@ -15,11 +15,6 @@ using namespace InferenceEngine;
bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
if (!convert) {
errorMessage = "Only opset1 Convert operation is supported";
@ -41,14 +36,20 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, co
}
}
MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
return std::vector<VectorDims>{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
}
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
: MKLDNNNode("Convert", nodeName, eng, cache) {
inputShapes.emplace_back(dims);
inputShapes.push_back(shape);
addOriginalInputPrecision(inPrc);
outputShapes.emplace_back(dims);
outputShapes.push_back(shape);
addOriginalOutputPrecision(outPrc);
isDynamic = shape.isDynamic();
errorPrefix = "Convert node with name '" + getName() + "'";
}

View File

@ -14,13 +14,14 @@ namespace MKLDNNPlugin {
class MKLDNNConvertNode : public MKLDNNNode {
public:
MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
void getSupportedDescriptors() override;
void initSupportedPrimitiveDescriptors() override;
void createPrimitive() override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
bool created() const override;
bool canBeInPlace() const override {
return false;
@ -38,6 +39,9 @@ public:
const MemoryDesc& getInput() const { return *input; }
const MemoryDesc& getOutput() const { return *output; }
std::vector<VectorDims> shapeInfer() const override;
bool needPrepareParams() const override { return false; }
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
static bool isSupportedDesc(const MemoryDesc &desc);

View File

@ -18,6 +18,7 @@
#include <nodes/common/cpu_memcpy.h>
#include <memory_desc/cpu_memory_desc_utils.h>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -273,7 +274,9 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
for (auto &node : fusedWith) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
// TODO [DS]: change to shape from memory
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
continue;
}
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());

View File

@ -23,6 +23,7 @@
#include "emitters/jit_bf16_emitters.hpp"
#include <mkldnn_selective_build.h>
#include "utils/general_utils.h"
#include "utils/cpu_utils.hpp"
#include "ngraph/ngraph.hpp"
#include <ngraph/opsets/opset1.hpp>
@ -1007,9 +1008,9 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const {
// TODO [DS]: used only in FuseConvolutionSumAndConvolutionSumActivation
// fix when reimplement this transformation for dynamic shapes
bool MKLDNNEltwiseNode::isWithBroadcast() {
auto oDims = outputShapes[0].getStaticDims();
auto oDims = getOutputShapeAtPort(0).getStaticDims();
for (size_t i = 0; i < inputShapes.size(); i++) {
auto iDims = inputShapes[i].getStaticDims();
auto iDims = getInputShapeAtPort(i).getStaticDims();
if (iDims != oDims)
return true;
}
@ -1039,7 +1040,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
return;
// if dim rank is greater than the maximum possible, we should use the reference execution
canUseOptimizedImpl = mayiuse(x64::sse41) && inputShapes[0].getRank() <= MAX_ELTWISE_DIM_RANK;
canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;
size_t expectedInputsNum = getOpInputsNum();
for (auto& postOp : fusedWith) {
@ -1246,10 +1247,6 @@ std::vector<VectorDims> MKLDNNEltwiseNode::shapeInfer() const {
}
void MKLDNNEltwiseNode::prepareParams() {
if (!inputShapesDefined()) {
IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
}
if (memPtrs.empty()) {
for (auto i = 0; i < inputNum; i++)
memPtrs.push_back(getParentEdgeAt(i)->getMemoryPtr());
@ -1520,7 +1517,7 @@ void MKLDNNEltwiseNode::offset_in_calc(VectorDims& offset, VectorDims& dims_in,
}
}
void MKLDNNEltwiseNode::executeOptimized6D(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
void MKLDNNEltwiseNode::executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
const VectorDims &dims_out) const {
parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4],
[&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
@ -1535,7 +1532,7 @@ void MKLDNNEltwiseNode::executeOptimized6D(const std::shared_ptr<jit_uni_eltwise
});
}
void MKLDNNEltwiseNode::executeOptimizedGeneric(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
void MKLDNNEltwiseNode::executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
const VectorDims &dims_out, const size_t schedulerWorkAmount) const {
parallel_nt(0, [&](const int ithr, const int nthr) {
size_t start = 0, end = 0;
@ -1690,19 +1687,14 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
getInputShapeAtPort(0) == getInputShapeAtPort(1);
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
if ((parentNode->getType() == FullyConnected) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
fillScalesAndShifts(parentNode.get(), scales, shifts);
} else {
fillScalesAndShifts(parentNode.get(), scales, shifts, 16);
}
scalesSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
}
MKLDNNNode::fuseInto(parentNode);
}
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
switch (getMKLDNNAlgorithm()) {
case mkldnn::algorithm::eltwise_relu:
@ -1730,18 +1722,21 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
default: IE_THROW() << errorPrefix << "as post operation is not supported";
}
} else {
const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
if (getAlgorithm() != EltwisePrelu) {
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
}
if (initAsBinary) {
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
if (data.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
auto outShape = outputShapes[0].getStaticDims();
auto chIdx = outputShapes[0].getRank() > 1 ? 1 : 0;
std::vector<size_t> binaryDims(postOpDims.size(), 1);
binaryDims[chIdx] = postOpDims[chIdx];
std::vector<size_t> binaryShape(outShape.size(), 1);
binaryShape[chIdx] = outShape[chIdx];
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
ops.append_binary(alg, memoryDesc.getDnnlDesc());
if (initBinaryMemory) {
@ -1752,19 +1747,19 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
switch (getAlgorithm()) {
case EltwiseAdd:
case EltwiseSubtract:
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
break;
case EltwiseMultiply:
case EltwiseDivide:
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
break;
case EltwiseMulAdd:
case EltwisePowerStatic:
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
break;
case EltwisePrelu:
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
break;
default:
IE_THROW() << errorPrefix << "as post operation is not supported";
@ -1777,14 +1772,14 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
case EltwiseDivide:
case EltwiseMulAdd:
case EltwisePowerStatic:
if (scales.empty() || shifts.empty())
if (scalesBuffer.empty() || shiftsBuffer.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]);
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
break;
case EltwisePrelu:
if (scales.empty())
if (scalesBuffer.empty())
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr);
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
break;
default:
IE_THROW() << errorPrefix << "as post operation is not supported";
@ -1810,7 +1805,7 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
return true;
};
if (!mayiuse(x64::sse41) || inputShapes[0].getRank() > MAX_ELTWISE_DIM_RANK)
if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK)
return false;
if (!isSuitableNode(this)) {

View File

@ -75,7 +75,7 @@ public:
bool created() const override;
bool canBeInPlace() const override;
bool canFuse(const MKLDNNNodePtr& node) const override;
void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false) override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
void fuseInto(MKLDNNNodePtr& parentNode) override;
InferenceEngine::Precision getRuntimePrecision() const override;
@ -116,7 +116,7 @@ private:
void exec(const MKLDNNEltwiseNode& node, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override;
const jit_eltwise_params& getJep() const override;
std::shared_ptr<jit_uni_eltwise_kernel> pKernel;
std::unique_ptr<jit_uni_eltwise_kernel> pKernel;
size_t schedulerWorkAmount = 0;
};
@ -149,15 +149,16 @@ private:
std::vector<float> scales = {};
std::vector<float> shifts = {};
size_t scalesSize = 0;
std::vector<float> scalesBuffer = {};
std::vector<float> shiftsBuffer = {};
std::vector<MKLDNNMemoryPtr> memPtrs = {};
static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>> initializers;
void executeOptimized6D(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
const VectorDims &dims_out) const;
void executeOptimizedGeneric(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
const VectorDims &dims_out, const size_t schedulerWorkAmount) const;
void executeReference(const jit_eltwise_params &jep, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out,
const size_t fullWorkAmount) const;

View File

@ -22,6 +22,7 @@
#include <memory_desc/cpu_memory_desc_utils.h>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/ngraph_utils.hpp"
#include "common/cpu_memcpy.h"
// Quantization ranges validation is switched off by default in order to avoid regressions on user side
// #define VALIDATE_QUANTIZATION_RANGES
@ -825,23 +826,19 @@ private:
bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(op);
if (!fq) {
errorMessage = "Only opset1 FakeQuantize operation is supported";
return false;
}
if (fq->get_input_shape(0).size() < 2 || fq->get_input_shape(0).size() > 5) {
errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(fq->get_input_shape(0).size());
const auto dataRank = fq->get_input_partial_shape(0).rank().get_length();
if (dataRank < 2 || dataRank > 5) {
errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(dataRank);
return false;
}
for (size_t i = 1; i < fq->get_input_size(); i++) {
if (fq->get_input_shape(i).size() > 5) {
errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_shape(i).size());
if (fq->get_input_partial_shape(i).rank().get_length() > 5) {
errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_partial_shape(i).rank().get_length());
return false;
}
}
@ -853,7 +850,7 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ng
}
for (size_t i = 1; i < fq->get_input_size(); i++) {
size_t count_not_unit_axis = 0;
auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), fq->get_input_shape(0).size());
auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), dataRank);
if (ngraph::shape_size(shape) != 1) {
size_t not_unit_axis = 0;
@ -892,12 +889,12 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
if (levels <= 1)
IE_THROW() << errorPrefix << "supports 'levels' attribute greater than or equal to 2";
if (fq->get_input_size() != 5)
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << fq->get_input_size();
if (fq->get_output_size() != 1)
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << fq->get_output_size();
if (inputShapes.size() != 5)
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << inputShapes.size();
if (outputShapes.size() != 1)
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << outputShapes.size();
auto initAxisIdx = [&](const ngraph::Shape& inputDims) {
auto initAxisIdx = [&](const VectorDims& inputDims) {
size_t axisIdx = 0;
for (int i = 1; i < inputDims.size(); i++) {
if (inputDims[i] > 1) {
@ -908,11 +905,11 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
return axisIdx;
};
const size_t dataNDims = fq->get_input_shape(0).size();
axis = dataNDims == 1 ? 0 : 1;
const size_t dataRank = getInputShapeAtPort(0).getRank();
axis = dataRank == 1 ? 0 : 1;
int axisSize = -1;
const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataNDims);
const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataRank);
auto inputLowAxis = initAxisIdx(ilShape);
isInputLowBroadcasted = (ngraph::is_scalar(ilShape) || ilShape[inputLowAxis] == 1);
if (!isInputLowBroadcasted) {
@ -920,7 +917,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
axisSize = ilShape[inputLowAxis];
}
const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataNDims);
const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataRank);
auto inputHighAxis = initAxisIdx(ihShape);
isInputHighBroadcasted = (ngraph::is_scalar(ihShape) || ihShape[inputHighAxis] == 1);
if (!isInputHighBroadcasted) {
@ -928,7 +925,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
axisSize = ihShape[inputHighAxis];
}
const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataNDims);
const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataRank);
auto outputLowAxis = initAxisIdx(olShape);
isOutputLowBroadcasted = (ngraph::is_scalar(olShape) || olShape[outputLowAxis] == 1);
if (!isOutputLowBroadcasted) {
@ -936,7 +933,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
axisSize = olShape[outputLowAxis];
}
const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataNDims);
const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataRank);
auto outputHighAxis = initAxisIdx(ohShape);
isOutputHighBroadcasted = (ngraph::is_scalar(ohShape) || ohShape[outputHighAxis] == 1);
if (!isOutputHighBroadcasted) {
@ -949,11 +946,9 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
auto outputLowAxisSize = ngraph::is_scalar(olShape) ? 1 : olShape[outputLowAxis];
auto outputHighAxisSize = ngraph::is_scalar(ohShape) ? 1 : ohShape[outputHighAxis];
int axisRealSize = static_cast<int>(fq->get_input_shape(0)[axis]);
size_t axisPaddedSize = static_cast<size_t>(rnd_up(fq->get_input_shape(0)[axis], 16));
if (axisSize != -1 && axisSize != axisRealSize)
if (axisSize != -1 && !dimsEqualWeak(axisSize, getInputShapeAtPort(0).getDims()[axis])) {
IE_THROW() << errorPrefix << "has different quantization axis size on 'data' and 'range' inputs";
}
const auto inputLowNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(1));
auto inputLowData = inputLowNode->cast_vector<float>();
@ -995,12 +990,24 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
if (binarization) {
algorithm = FQBinarization;
binarizationThresholds.resize(axisPaddedSize);
binarizationOutputMask.resize(axisPaddedSize);
if (isInputLowBroadcasted) {
binarizationThresholds.push_back(inputLowData[0]);
} else {
IE_ASSERT(axisSize != -1);
binarizationThresholds.resize(rnd_up(axisSize, 16));
for (int i = 0; i < axisSize; i++) {
binarizationThresholds[i] = inputLowData[i];
}
}
for (int i = 0; i < axisRealSize; i++) {
binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i];
binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000;
if (isOutputHighBroadcasted) {
binarizationOutputMask.push_back(outputHighData[0] == 1.f ? 0xffffffff : 0x00000000);
} else {
IE_ASSERT(axisSize != -1);
binarizationOutputMask.resize(rnd_up(axisSize, 16));
for (int i = 0; i < axisSize; i++) {
binarizationOutputMask[i] = outputHighData[i] == 1.f ? 0xffffffff : 0x00000000;
}
}
} else {
auto allElementsAreEqual = [&](const std::vector<float> &data, size_t size) {
@ -1117,13 +1124,14 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
std::vector<LayoutType> MKLDNNFakeQuantizeNode::getDataFormats() const {
// Special case for first FQ in the network
if (getInputShapeAtPort(0).getStaticDims()[getAxis()] == 3) {
const auto &dims = getInputShapeAtPort(0).getDims();
if (dims[getAxis()] == 3) {
return { LayoutType::ncsp };
} else {
if (isBinarization()) {
return { LayoutType::nspc };
} else {
if (one_of(getInputShapeAtPort(0).getRank(), 4, 5)) {
if (one_of(dims.size(), 4, 5)) {
if (getAxis() == 1) {
auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c;
return { blkFormat, LayoutType::nspc, LayoutType::ncsp };
@ -1235,81 +1243,139 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() {
}
}
void MKLDNNFakeQuantizeNode::createPrimitive() {
auto config = getSelectedPrimitiveDescriptor()->getConfig();
auto inDims = config.inConfs[0].desc->getShape().getStaticDims();
jqp.c = inDims.size() > 1 ? inDims[1] : 1;
jqp.src_prc = config.inConfs[0].desc->getPrecision();
jqp.wei_prc = Precision::FP32;
jqp.dst_prc = config.outConfs[0].desc->getPrecision();
auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
jqp.s_str = srcDesc->getStrides();
auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
jqp.d_str = dstDesc->getStrides();
jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5);
jqp.op_type = getAlgorithm();
bool MKLDNNFakeQuantizeNode::needPrepareParams() const {
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
if (mayiuse(cpu::x64::avx512_common)) {
if (isBinarization())
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(jqp));
else
quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx512_common>(jqp));
} else if (mayiuse(cpu::x64::avx2)) {
if (isBinarization())
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx2>(jqp));
else
quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx2>(jqp));
} else if (mayiuse(cpu::x64::sse41)) {
if (isBinarization())
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::sse41>(jqp));
else
quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::sse41>(jqp));
if (internalBlobMemory.empty() || (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref && inputShapesModified())) {
return true;
}
const auto axisSize = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[getAxis()];
const auto newPaddedSize = rnd_up(axisSize, 16);
const auto currPaddedSize = rnd_up(currentAxisSize, 16);
return newPaddedSize != currPaddedSize || (isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) &&
axisSize != currentAxisSize);
}
void MKLDNNFakeQuantizeNode::prepareParams() {
const size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
const size_t newPaddedSize = rnd_up(axisSize, 16);
IE_ASSERT(newPaddedSize != 0);
if (internalBlobMemory.empty() || newPaddedSize != rnd_up(currentAxisSize, 16) ||
(isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) && axisSize != currentAxisSize)) {
DnnlBlockedMemoryDesc weightsDataDesc(Shape(VectorDims{newPaddedSize}), memory::data_type::f32, memory::format_tag::x);
if (isBinarization()) {
constexpr size_t numBinFqIntBlob = 2;
bool needUpdThr = false, needUpdMask = false;
if (isInputLowBroadcasted && axisSize != currentAxisSize) {
binarizationThresholds.resize(newPaddedSize);
std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + axisSize, binarizationThresholds[0]);
std::fill(binarizationThresholds.begin() + axisSize, binarizationThresholds.end(), 0);
needUpdThr = true;
}
if (isOutputHighBroadcasted && axisSize != currentAxisSize) {
binarizationOutputMask.resize(newPaddedSize);
std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + axisSize, binarizationOutputMask[0]);
std::fill(binarizationOutputMask.begin() + axisSize, binarizationOutputMask.end(), 0);
needUpdMask = true;
}
if (internalBlobMemory.empty() || needUpdThr) {
auto binarizationThresholdsDataMem = std::make_shared<MKLDNNMemory>(getEngine());
binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr());
if (internalBlobMemory.empty()) {
internalBlobMemory.push_back(binarizationThresholdsDataMem);
} else {
internalBlobMemory[0] = binarizationThresholdsDataMem;
}
}
if (internalBlobMemory.size() == (numBinFqIntBlob - 1) || needUpdMask) {
auto binarizationMaskDataMem = std::make_shared<MKLDNNMemory>(getEngine());
binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr());
if (internalBlobMemory.size() == (numBinFqIntBlob - 1)) {
internalBlobMemory.push_back(binarizationMaskDataMem);
} else {
internalBlobMemory[1] = binarizationMaskDataMem;
}
}
} else if (levels != 2) {
constexpr size_t numFqIntBlob = 6;
auto pushInternalBlob = [&](std::vector<float>& data, size_t idx) {
auto memory = std::make_shared<MKLDNNMemory>(getEngine());
bool needOverwrite = getInputShapeAtPort(0).getDims()[getAxis()] == Shape::UNDEFINED_DIM && data.size() == 1;
if (needOverwrite) {
memory->Create(weightsDataDesc);
float *ptr = reinterpret_cast<float *>(memory->GetPtr());
std::fill(ptr, ptr + newPaddedSize, data[0]);
} else {
if (data.size() == 1) {
data.resize(newPaddedSize, data[0]);
} else {
data.resize(newPaddedSize);
}
memory->Create(weightsDataDesc, &data[0]);
}
if (internalBlobMemory.size() != numFqIntBlob) {
internalBlobMemory.push_back(memory);
} else if (needOverwrite) {
internalBlobMemory[idx] = memory;
}
};
pushInternalBlob(cropLow, 0);
pushInternalBlob(cropHigh, 1);
pushInternalBlob(inputScale, 2);
pushInternalBlob(inputShift, 3);
pushInternalBlob(outputScale, 4);
pushInternalBlob(outputShift, 5);
} else {
IE_THROW() << "Can't fill internal blob for FakeQuantize node with name: " << getName();
}
}
if (quantize_kernel)
quantize_kernel->create_ker();
currentAxisSize = axisSize;
size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
size_t axisPaddedSize = rnd_up(axisSize, 16);
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
if (!selectedPrimitiveDescriptor)
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
const auto& config = getSelectedPrimitiveDescriptor()->getConfig();
DnnlBlockedMemoryDesc weightsDataDesc(Shape(InferenceEngine::SizeVector{axisPaddedSize}), memory::data_type::f32, memory::format_tag::x);
const auto& inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims();
if (isBinarization()) {
auto binarizationThresholdsDataMem = std::make_shared<MKLDNNMemory>(getEngine());
binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr());
internalBlobMemory.push_back(binarizationThresholdsDataMem);
jit_quantize_params jqp = {};
jqp.c = inDims.size() > 1 ? inDims[1] : 1;
auto binarizationMaskDataMem = std::make_shared<MKLDNNMemory>(getEngine());
binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr());
internalBlobMemory.push_back(binarizationMaskDataMem);
} else if (levels != 2) {
auto pushInternalBlob = [&](std::vector<float>& data) {
if (data.size() == 1)
data.resize(axisPaddedSize, data[0]);
else
data.resize(axisPaddedSize);
auto memory = std::make_shared<MKLDNNMemory>(getEngine());
memory->Create(weightsDataDesc, &data[0]);
internalBlobMemory.push_back(memory);
};
jqp.src_prc = config.inConfs[0].desc->getPrecision();
jqp.wei_prc = Precision::FP32;
jqp.dst_prc = config.outConfs[0].desc->getPrecision();
pushInternalBlob(cropLow);
pushInternalBlob(cropHigh);
pushInternalBlob(inputScale);
pushInternalBlob(inputShift);
pushInternalBlob(outputScale);
pushInternalBlob(outputShift);
auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
jqp.s_str = srcDesc->getStrides();
auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
jqp.d_str = dstDesc->getStrides();
jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5);
jqp.op_type = getAlgorithm();
execPtr = std::make_shared<FakeQuantizeJitExecutor>(jqp);
}
}
void MKLDNNFakeQuantizeNode::createPrimitive() {
if (inputShapesDefined()) {
prepareParams();
updateLastInputDims();
}
}
@ -1322,8 +1388,8 @@ void MKLDNNFakeQuantizeNode::executeReference() {
auto srcDims = srcMemory->getStaticDims();
auto dstDims = dstMemory->getStaticDims();
auto s_str = jqp.s_str;
auto d_str = jqp.d_str;
auto s_str = srcMemory->GetDescWithType<BlockedMemoryDesc>()->getStrides();
auto d_str = dstMemory->GetDescWithType<BlockedMemoryDesc>()->getStrides();
const int N = srcDims[0];
const int C = srcDims.size() > 1 ? srcDims[1] : 1;
@ -1331,7 +1397,7 @@ void MKLDNNFakeQuantizeNode::executeReference() {
const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1;
const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1;
if (jqp.op_type == FQBinarization) {
if (isBinarization()) {
size_t tmp = s_str[s_str.size() - 1];
for (int i = s_str.size() - 1; i > 1; i--) {
s_str[i] = s_str[i - 1];
@ -1430,7 +1496,7 @@ void MKLDNNFakeQuantizeNode::executeReference() {
}
}
void MKLDNNFakeQuantizeNode::executeBinarization() {
void MKLDNNFakeQuantizeNode::executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
@ -1442,6 +1508,7 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {
auto src_dims = srcMemory->getStaticDims();
const auto &jqp = pKernel->jqp_;
std::vector<size_t> s_str = jqp.s_str;
size_t tmp = s_str[s_str.size() - 1];
for (int i = s_str.size() - 1; i > 1; i--) {
@ -1465,11 +1532,11 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {
arg.output_mask = &output_mask[0];
arg.work_amount = (size_t)C;
(*quantize_kernel)(&arg);
(*pKernel)(&arg);
});
}
void MKLDNNFakeQuantizeNode::executeQuantization() {
void MKLDNNFakeQuantizeNode::executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
@ -1490,6 +1557,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
int blk_size = (srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5))
? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8;
const auto &jqp = pKernel->jqp_;
auto src_type_size = jqp.src_prc.size();
auto dst_type_size = jqp.dst_prc.size();
@ -1536,7 +1604,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
arg.block_size = (size_t) blk_size;
arg.work_amount = (size_t)H;
(*quantize_kernel)(&arg);
(*pKernel)(&arg);
});
} else {
parallel_nd(N, CB, D, H, [&](int n, int cb, int d, int h) {
@ -1564,7 +1632,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t) blk_size : nstl::min(blk_size, C - c);
arg.work_amount = (size_t) W;
(*quantize_kernel)(&arg);
(*pKernel)(&arg);
});
}
}
@ -1575,29 +1643,40 @@ void MKLDNNFakeQuantizeNode::execute(mkldnn::stream strm) {
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
if (jqp.op_type == FQBinarization)
executeBinarization();
else
executeQuantization();
execPtr->exec(*this);
} else {
executeReference();
}
}
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
const size_t bufferAlignment = 16;
if (getAlgorithm() == FQBinarization) {
const auto realAxisSize = postOpDims[postOpDims.size() > 1 ? 1 : 0];
const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment);
if (!isPostOpDataInitialized) {
size_t paddedSize = rnd_up(binarizationThresholds.size(), bufferAlignment);
binarizationThresholds.resize(paddedSize, 0);
binarizationOutputMask.resize(paddedSize, 0);
binarizationThresholds.resize(axisPaddedSize, 0);
binarizationOutputMask.resize(axisPaddedSize, 0);
if (isInputLowBroadcasted) {
std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
}
if (isOutputHighBroadcasted) {
std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
}
}
ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);
if (!isInputLowBroadcasted && !isOutputHighBroadcasted) {
isPostOpDataInitialized = true;
}
} else {
if (!isPostOpDataInitialized) {
if (cropLow.size() > 1)
@ -1626,10 +1705,10 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBin
if (initAsBinary) {
auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) {
auto outShape = outputShapes[0].getStaticDims();
auto chIdx = outputShapes[0].getRank() > 1 ? 1 : 0;
const auto rank = getOutputShapeAtPort(0).getRank();
auto chIdx = rank > 1 ? 1 : 0;
std::vector<size_t> binaryShape(outShape.size(), 1);
std::vector<size_t> binaryShape(rank, 1);
binaryShape[chIdx] = dataSize;
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
@ -1654,10 +1733,45 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBin
} else {
ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData);
}
}
if (!isPostOpDataInitialized)
isPostOpDataInitialized = true;
}
}
MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) {
bool isBinarization = _jqp.op_type == FQBinarization;
if (mayiuse(cpu::x64::avx512_common)) {
if (isBinarization)
pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(_jqp));
else
pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx512_common>(_jqp));
} else if (mayiuse(cpu::x64::avx2)) {
if (isBinarization)
pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx2>(_jqp));
else
pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx2>(_jqp));
} else if (mayiuse(cpu::x64::sse41)) {
if (isBinarization)
pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::sse41>(_jqp));
else
pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::sse41>(_jqp));
} else {
IE_THROW() << "Can't create jit fake quantize kernel";
}
if (pKernel) {
pKernel->create_ker();
}
}
void MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::exec(const MKLDNNFakeQuantizeNode& node) {
if (!pKernel)
IE_THROW() << "Can't execute, kernel for fake quantize node is not compiled";
if (pKernel->jqp_.op_type == FQBinarization) {
node.executeBinarization(pKernel);
} else {
node.executeQuantization(pKernel);
}
}
bool MKLDNNFakeQuantizeNode::created() const {

View File

@ -73,11 +73,15 @@ public:
void createPrimitive() override;
bool created() const override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
size_t getAxis() const { return axis; }
bool isBinarization() const { return getAlgorithm() == Algorithm::FQBinarization; }
bool needPrepareParams() const override;
void prepareParams() override;
const float* getBinarizationTresholdsPtr() const { return &binarizationThresholds[0]; }
const float* getBinarizationOutputMaskPtr() const { return reinterpret_cast<const float*>(&binarizationOutputMask[0]); }
size_t getBinarizationTresholdsSize() const { return binarizationThresholds.size(); }
@ -117,7 +121,8 @@ public:
InferenceEngine::Precision getInputPrecision() const { return inputPrecision; }
InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; }
void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false) override;
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = -1, bool initAsBinary = false,
bool initBinaryMemory = false) override;
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
@ -129,11 +134,24 @@ public:
MKLDNNMemoryPtr outputShiftMemory;
private:
struct FakeQuantizeExecutor {
virtual void exec(const MKLDNNFakeQuantizeNode& node) = 0;
virtual ~FakeQuantizeExecutor() = default;
};
using executorPtr = std::shared_ptr<FakeQuantizeExecutor>;
executorPtr execPtr = nullptr;
struct FakeQuantizeJitExecutor : public FakeQuantizeExecutor {
FakeQuantizeJitExecutor(const jit_quantize_params &_jqp);
void exec(const MKLDNNFakeQuantizeNode& node) override;
std::unique_ptr<jit_uni_quantize_kernel> pKernel;
};
void init() override;
std::vector<LayoutType> getDataFormats() const;
void executeReference();
void executeBinarization();
void executeQuantization();
void executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
void executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
size_t levels = 0;
@ -170,15 +188,12 @@ private:
bool isOutputLowBroadcasted = false;
bool isOutputHighBroadcasted = false;
size_t currentAxisSize = 0;
size_t axis = 0;
InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
jit_quantize_params jqp = {};
std::shared_ptr<jit_uni_quantize_kernel> quantize_kernel = nullptr;
std::string errorPrefix;
};

View File

@ -14,6 +14,7 @@
#include "utils/general_utils.h"
#include <memory_desc/cpu_memory_desc_utils.h>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -189,7 +190,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
for (auto &node : fusedWith) {
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
fakeQuantizeNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
// no need to fill post ops dims for fq, make sense only for bin fq
fakeQuantizeNode->appendPostOps(ops, VectorDims{}, -1, initAsBinary, initBinaryMemory);
if (initBinaryMemory) {
if (fakeQuantizeNode->cropHighMemory)
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
@ -209,7 +211,9 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
// TODO [DS]: change to shape from memory
constexpr int align = -1;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
if (initBinaryMemory) {
if (eltwiseNode->scalesMemory)
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());

View File

@ -25,6 +25,7 @@
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset4.hpp>
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -2394,7 +2395,9 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
constexpr int align = 16;
// TODO [DS]: change to shape from memory
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
continue;
}

View File

@ -24,6 +24,7 @@
#include "utils/general_utils.h"
#include "memory_desc/cpu_memory_desc_utils.h"
#include "mkldnn_extension_utils.h"
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -89,7 +90,8 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights
for (auto &node : fusedWith) {
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
eltwiseNode->appendPostOps(ops);
// TODO [DS]: change to shape from memory
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
continue;
}

View File

@ -24,6 +24,7 @@
#include <ngraph/opsets/opset6.hpp>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -604,11 +605,6 @@ private:
bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
try {
if (isDynamicNgraphNode(op)) {
errorMessage = "Doesn't support op with dynamic shapes";
return false;
}
if (op->get_output_partial_shape(0).rank().is_dynamic()) {
errorMessage = "Unsupported dynamic input rank.";
return false;
@ -680,7 +676,6 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
IE_THROW(NotImplemented) << errorMessage;
}
const ngraph::Shape& inDataShape = op->input_value(0).get_shape();
if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v6::MVN>(op)) {
normalizeVariance_ = mvnOp->get_normalize_variance();
epsValue_ = mvnOp->get_eps();
@ -689,27 +684,25 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
epsMode_ = OUTSIDE_SQRT;
}
acrossChannels_ = false;
const auto& inDataShapeSize = inDataShape.size();
initAcrossChannels_ = false;
const auto& inDataShapeSize = getInputShapeAtPort(0).getRank();
if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1)
acrossChannels_ = true;
initAcrossChannels_ = true;
} else if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v0::MVN>(op)) {
normalizeVariance_ = mvnOp->get_normalize_variance();
epsValue_ = mvnOp->get_eps();
epsMode_ = INSIDE_SQRT;
acrossChannels_ = mvnOp->get_across_channels();
initAcrossChannels_ = mvnOp->get_across_channels();
}
execAcrossChannels_ = initAcrossChannels_;
}
void MKLDNNMVNNode::getSupportedDescriptors() {
}
void MKLDNNMVNNode::getSupportedDescriptors() {}
void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;
setPostOps(attr, true);
Precision inputPrecision = getOriginalInputPrecisionAtPort(0);
Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
if (!mayiuse(avx512_core)) {
@ -729,7 +722,8 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
src_data_size = inputPrecision.size();
dst_data_size = outputPrecision.size();
bool canBeInplace = (src_data_size == dst_data_size) &&
// TODO [DS]: inplace
bool canBeInplace = !isDynamicNode() && (src_data_size == dst_data_size) &&
(getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) &&
!getParentEdgeAt(0)->getParent()->isConstant();
@ -788,7 +782,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
pushDesc(LayoutType::ncsp, impl_type);
}
void MKLDNNMVNNode::createPrimitive() {
void MKLDNNMVNNode::prepareParams() {
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
@ -800,74 +794,87 @@ void MKLDNNMVNNode::createPrimitive() {
const SizeVector in_dims = srcMemPtr->getStaticDims();
transformTo5DCase(in_dims);
auto selectedPD = getSelectedPrimitiveDescriptor();
auto jcp = jit_mvn_config_params();
jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
jcp.normalize_variance = normalizeVariance_;
jcp.across_channels = acrossChannels_;
int N = 0;
std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = shape5D;
if (mayiuse(cpu::x64::avx512_common)) {
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
setPostOps(attr, true);
jcp.normalize_variance = false;
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
if (normalizeVariance_) {
jcp.normalize_variance = true;
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
if (mayiuse(cpu::x64::sse41)) {
auto selectedPD = getSelectedPrimitiveDescriptor();
auto jcp = jit_mvn_config_params();
jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
jcp.normalize_variance = normalizeVariance_;
jcp.across_channels = execAcrossChannels_;
int N = 0;
std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = shape5D;
if (mayiuse(cpu::x64::avx512_common)) {
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
jcp.normalize_variance = false;
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
if (normalizeVariance_) {
jcp.normalize_variance = true;
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
}
} else if (mayiuse(cpu::x64::avx2)) {
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
jcp.normalize_variance = false;
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
if (normalizeVariance_) {
jcp.normalize_variance = true;
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
}
} else if (mayiuse(cpu::x64::sse41)) {
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
jcp.normalize_variance = false;
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
if (normalizeVariance_) {
jcp.normalize_variance = true;
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
}
}
} else if (mayiuse(cpu::x64::avx2)) {
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
jcp.normalize_variance = false;
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
if (normalizeVariance_) {
jcp.normalize_variance = true;
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
}
} else if (mayiuse(cpu::x64::sse41)) {
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
if (mvn_kernel)
mvn_kernel->create_ker();
jcp.normalize_variance = false;
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
if (normalizeVariance_) {
jcp.normalize_variance = true;
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
if (mvn_mean_kernel)
mvn_mean_kernel->create_ker();
if (mvn_variance_kernel)
mvn_variance_kernel->create_ker();
}
}
void MKLDNNMVNNode::createPrimitive() {
if (inputShapesDefined()) {
if (needPrepareParams())
prepareParams();
updateLastInputDims();
}
if (mvn_kernel)
mvn_kernel->create_ker();
if (mvn_mean_kernel)
mvn_mean_kernel->create_ker();
if (mvn_variance_kernel)
mvn_variance_kernel->create_ker();
}
void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) {
switch (shape.size()) {
// for 1 and 2 rank, if acrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure.
// for 1 and 2 rank, if initAcrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure.
// otherwise there are not enough data in spatial dimension to process in one kernel.
case 1 : // C
if (acrossChannels_) {
if (initAcrossChannels_) {
shape5D = std::make_tuple(1, 1, 1, 1, shape[0]);
acrossChannels_ = false;
execAcrossChannels_ = false;
break;
} else {
shape5D = std::make_tuple(1, shape[0], 1, 1, 1);
break;
}
case 2 : // NC
if (acrossChannels_) {
if (initAcrossChannels_) {
shape5D = std::make_tuple(1, shape[0], 1, shape[1], 1);
acrossChannels_ = false;
execAcrossChannels_ = false;
break;
} else {
shape5D = std::make_tuple(shape[0], shape[1], 1, 1, 1);
@ -882,6 +889,8 @@ void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) {
void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
mkldnn::post_ops ops;
VectorDims postOpDims(5);
std::tie(postOpDims[0], postOpDims[1], postOpDims[2], postOpDims[3], postOpDims[4]) = shape5D;
for (auto &node : fusedWith) {
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
if (fakeQuantizeNode) {
@ -891,7 +900,8 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, postOpDims, align);
continue;
}
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
@ -906,22 +916,21 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) {
uint8_t *dst_data = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
uint8_t *src_data = reinterpret_cast<uint8_t*>(srcMemPtr->GetPtr());
auto dim = srcMemPtr->getStaticDims();
if (mayiuse(cpu::x64::sse41)) {
if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) {
IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform.";
}
if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) {
mvn_pln(src_data, dst_data, dim);
mvn_pln(src_data, dst_data);
} else {
mvn_blk(src_data, dst_data, dim);
mvn_blk(src_data, dst_data);
}
} else {
mvn_ref(src_data, dst_data, dim);
mvn_ref(src_data, dst_data);
}
}
void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data) {
size_t blk_size = 1; // blk size in vmm
if (mayiuse(cpu::x64::avx512_common)) {
blk_size = 16;
@ -943,7 +952,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
for (size_t b = 0lu; b < N; b++) {
size_t cb = b * C3;
if (acrossChannels_) {
if (execAcrossChannels_) {
// Calculate mean value for one instance in batch
// Parallel sum for each channel
float C3inv = 1.f / static_cast<float>(C3);
@ -1056,7 +1065,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
}
}
void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data) {
const float *src_data_ptr = reinterpret_cast<const float *>(src_data);
float *dst_data_ptr = reinterpret_cast<float *>(dst_data);
size_t N = 0; size_t C = 0; size_t D = 0; size_t H = 0; size_t W = 0;
@ -1068,7 +1077,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
for (size_t b = 0lu; b < N; b++) {
size_t cb = b * C3;
if (acrossChannels_) {
if (execAcrossChannels_) {
// Parallel sum for each channel for mean
float C3inv = 1.f / static_cast<float>(C3);
float mean_temp = 0.0f;
@ -1154,7 +1163,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
}
}
void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data) {
size_t blk_size = 1; // channel blk for memory layout
if (mayiuse(cpu::x64::avx512_common)) {
blk_size = 16;
@ -1176,7 +1185,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
size_t C5 = C * D * H * W;
size_t threads_num = parallel_get_num_threads();
size_t aux_buffer_size = acrossChannels_ ? blk_size : rnd_up(C, blk_size);
size_t aux_buffer_size = execAcrossChannels_ ? blk_size : rnd_up(C, blk_size);
std::vector<float> mean_buffer(aux_buffer_size * threads_num);
std::vector<float> variance_buffer(aux_buffer_size * threads_num);
@ -1185,7 +1194,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
for (size_t b = 0lu; b < N; b++) {
size_t b_offset = is_nhwc ? b * C5 : b * C3;
if (acrossChannels_) {
if (execAcrossChannels_) {
// mean for this instance in batch
float C5inv = 1.f / static_cast<float>(C5);
float mean_temp = 0.0f;
@ -1213,7 +1222,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
arg.src_stride = src_stride_size;
arg.work_amount = static_cast<size_t>(W);
arg.oc_off = static_cast<size_t>(cb * blk_size * sizeof(float)); // for tail process
(*mvn_mean_kernel)(&arg); // for W * blk
(*mvn_mean_kernel)(&arg); // for W * blk
size_t min_cb = (std::min)(blk_size, C - cb * blk_size);
for (int i = 0; i < min_cb; i++)
@ -1401,7 +1410,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const {
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu);
if ((inputRank == 1 && !unaryEltwise) ||
(inputRank == 2 && !unaryEltwise && acrossChannels_)) {
(inputRank == 2 && !unaryEltwise && initAcrossChannels_)) {
return false;
}

View File

@ -80,12 +80,13 @@ public:
void createPrimitive() override;
bool created() const override;
void execute(mkldnn::stream strm) override;
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
bool canBeInPlace() const override {
return false;
}
inline bool getAcrossChannels() const {
return acrossChannels_;
return initAcrossChannels_;
}
inline bool getNormalizeVariance() const {
@ -94,12 +95,14 @@ public:
bool canFuse(const MKLDNNNodePtr& node) const override;
void prepareParams() override;
private:
void mvn_pln(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
void mvn_pln(const uint8_t *src_data, uint8_t *dst_data);
void mvn_blk(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
void mvn_blk(const uint8_t *src_data, uint8_t *dst_data);
void mvn_ref(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
void mvn_ref(const uint8_t *src_data, uint8_t *dst_data);
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false);
@ -107,7 +110,8 @@ private:
std::tuple<size_t, size_t, size_t, size_t, size_t> shape5D;
bool acrossChannels_ = false;
bool initAcrossChannels_ = false;
bool execAcrossChannels_ = false;
bool normalizeVariance_ = true;
float epsValue_ = 1e-9f;
// Defines way to add epsilon: inside sqrt or outside.
@ -122,8 +126,6 @@ private:
mkldnn::primitive_attr attr;
std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
std::shared_ptr<jit_uni_mvn_mean_variance_kernel> mvn_mean_kernel;
std::shared_ptr<jit_uni_mvn_mean_variance_kernel> mvn_variance_kernel;
std::shared_ptr<jit_uni_mvn_kernel> mvn_kernel;

View File

@ -22,6 +22,7 @@
#include <ngraph/opsets/opset1.hpp>
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include "utils/cpu_utils.hpp"
using namespace mkldnn;
using namespace MKLDNNPlugin;
@ -811,7 +812,9 @@ void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr &attr, bool initWe
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
if (eltwiseNode) {
eltwiseNode->appendPostOps(ops);
// TODO [DS]: change to shape from memory
constexpr int align = 16;
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
continue;
}

View File

@ -102,10 +102,6 @@ void MKLDNNReorderNode::createPrimitive() {
void MKLDNNReorderNode::prepareParams() {
if (!isOptimized) {
if (!inputShapesDefined()) {
IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
}
auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())

View File

@ -124,10 +124,6 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() {
}
void MKLDNNSelectNode::prepareParams() {
if (!inputShapesDefined()) {
IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
}
const auto &_conditionDims = getParentEdgesAtPort(CONDITION)[0]->getMemory().getStaticDims();
const auto &_thenDims = getParentEdgesAtPort(THEN)[0]->getMemory().getStaticDims();
const auto &_elseDims = getParentEdgesAtPort(ELSE)[0]->getMemory().getStaticDims();

View File

@ -32,9 +32,13 @@ inline std::vector<size_t> getNormalizedDimsBySize(const InferenceEngine::SizeVe
* shape on which should be broadcastable
* @param secondInputDims
* shape which should be broadcastable
* @param weakComparison
* flag which specify how we compare C dims if value is undefined (weak or strong)
* @return true if broadcastable, false otherwise.
*/
inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims) {
inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims,
bool weakComparison = false) {
bool (*dimsEqual)(size_t, size_t) = weakComparison ? static_cast<bool (*)(size_t, size_t)>(dimsEqualWeak) : dimsEqualStrong;
if (secondInputDims.size() > firstInputDims.size())
return false;
if (std::accumulate(secondInputDims.begin(), secondInputDims.end(), 1, std::multiplies<size_t>()) == 1)
@ -42,7 +46,7 @@ inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVect
std::vector<size_t> normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size());
for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) {
if ((i == 1 && normalizedSecondInputDims[i] != firstInputDims[1]) || (i != 1 && normalizedSecondInputDims[i] != 1))
if ((i == 1 && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[1])) || (i != 1 && normalizedSecondInputDims[i] != 1))
return false;
}
return true;
@ -90,4 +94,34 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
return precision;
}
/**
* @brief Return aligned buffer by targetSize.
* If buffer has size 1, values are broadcasted with targetSize size.
* If aligned buffer size > targetSize, other values filled by zero.
* @param targetSize
* target size buffer
* @param buffer
* buffer to be aligned
* @param align
* alignment for targetSize
* @return aligned buffer
*/
inline std::vector<float> makeAlignedBuffer(size_t targetSize, const std::vector<float> &buffer, int align = -1) {
if (buffer.empty()) {
IE_THROW() << "Can't align buffer, becuase buffer is empty";
}
auto alignedBuffer = buffer;
if (align == -1) {
align = targetSize;
}
const size_t bufferSizeAligned = rnd_up(targetSize, align);
alignedBuffer.resize(bufferSizeAligned, 0);
if (buffer.size() == 1) {
std::fill(alignedBuffer.begin() + 1, alignedBuffer.begin() + targetSize, buffer[0]);
}
return alignedBuffer;
}
} // namespace MKLDNNPlugin

View File

@ -143,6 +143,15 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*CanSetInBlobWithDifferentPrecision/netPRC=BIN.*)",
R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=(I4|U4).*)",
R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=BIN.*)",
// Issue: 69086
// need to add support convert BIN -> FP32
// if we set output precision as BIN, when we create output blob precision looks like UNSPECIFIED
R"(.*smoke_FakeQuantizeLayerCPUTest.*bin.*)",
// Issue: 69088
// bad accuracy
R"(.*smoke_FakeQuantizeLayerCPUTest_Decompos.
*IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
};
#define FIX_62820 0

View File

@ -2,33 +2,106 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <shared_test_classes/single_layer/conversion.hpp>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
using namespace LayerTestsDefinitions;
using namespace InferenceEngine;
using namespace ngraph;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
namespace CPULayerTestsDefinitions {
class ConvertCPULayerTest : public ConversionLayerTest {};
using convertLayerShapeDefinition = std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>;
using convertLayerTestParamsSet = std::tuple<convertLayerShapeDefinition, // input shapes
InferenceEngine::Precision, // input precision
InferenceEngine::Precision, // output precision
CPUSpecificParams>;
class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
public:
static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
convertLayerShapeDefinition shapes;
InferenceEngine::Precision inPrc, outPrc;
CPUSpecificParams cpuParams;
std::tie(shapes, inPrc, outPrc, cpuParams) = obj.param;
std::ostringstream result;
if (!shapes.first.empty()) {
result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
}
result << "TS=";
for (const auto& shape : shapes.second) {
result << CommonTestUtils::vec2str(shape) << "_";
}
result << "inputPRC=" << inPrc.name() << "_";
result << "targetPRC=" << outPrc.name() << "_";
result << CPUTestsBase::getTestCaseName(cpuParams);
return result.str();
}
protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
convertLayerShapeDefinition shapes;
InferenceEngine::Precision inPrc, outPrc;
CPUSpecificParams cpuParams;
std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
for (size_t i = 0; i < shapes.second.size(); i++) {
targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
}
inputDynamicShapes = shapes.first;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
auto params = ngraph::builder::makeParams(ngPrc, {targetStaticShapes[0][0]});
auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
}
};
TEST_P(ConvertCPULayerTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
ConversionParamsTuple params = GetParam();
inPrc = std::get<2>(params);
outPrc = std::get<3>(params);
Run();
CheckPluginRelatedResults(executableNetwork, "Convert");
}
namespace {
const std::vector<ngraph::helpers::ConversionTypes> conversionOpTypes = {
ngraph::helpers::ConversionTypes::CONVERT,
ngraph::helpers::ConversionTypes::CONVERT_LIKE,
std::vector<convertLayerShapeDefinition> inShapes_4D = {
{{}, {{1, 2, 3, 4}}},
{
// dynamic
{{-1, -1, -1, -1}},
// target
{
{2, 4, 4, 1},
{2, 17, 5, 4},
{1, 2, 3, 4}
}
},
{
// dynamic
{{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
// target
{
{2, 17, 5, 4},
{5, 2, 3, 2},
{1, 10, 4, 1},
}
}
};
const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
// List of precisions natively supported by mkldnn.
const std::vector<Precision> precisions = {
Precision::U8,
@ -38,26 +111,19 @@ const std::vector<Precision> precisions = {
Precision::BF16
};
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest_From_BF16, ConvertCPULayerTest,
::testing::Combine(
::testing::ValuesIn(conversionOpTypes),
::testing::Values(inShape),
::testing::Values(Precision::BF16),
::testing::ValuesIn(precisions),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
ConversionLayerTest::getTestCaseName);
std::vector<CPUSpecificParams> memForm4D = {
CPUSpecificParams({nchw}, {nchw}, {}, {}),
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
};
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest_To_BF16, ConvertCPULayerTest,
INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
::testing::Combine(
::testing::ValuesIn(conversionOpTypes),
::testing::Values(inShape),
::testing::ValuesIn(inShapes_4D),
::testing::ValuesIn(precisions),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
ConversionLayerTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions
::testing::ValuesIn(precisions),
::testing::ValuesIn(memForm4D)),
ConvertCPULayerTest::getTestCaseName);
} // namespace CPULayerTestsDefinitions

View File

@ -12,15 +12,18 @@ using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using inputShapes = std::tuple<std::vector<ngraph::PartialShape>, // dynamic input shapes
std::vector<ngraph::Shape>, // target input shapes
std::vector<SizeVector>>; // range input shapes
using fqSpecificParams = std::tuple<int64_t, // 'data' input low bounds
int64_t, // 'data' input high bounds
std::vector<float>, // output low
std::vector<float>, // output high
std::vector<SizeVector>, // 'range' inputs shapes
size_t>; // levels
using fqLayerTestParamsSet = std::tuple<fqSpecificParams,
SizeVector, // 'data' input shape
inputShapes, // input shapes
Precision, // input precision
std::pair<std::vector<float>, std::vector<float>>, // il and ih values
bool, // should be decomposed
@ -31,30 +34,39 @@ class FakeQuantizeLayerCPUTest : public testing::WithParamInterface<fqLayerTestP
public:
static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
fqSpecificParams fqParams;
SizeVector inDataShape;
inputShapes testShapes;
Precision inPrec;
std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
bool shouldBeDecomposed;
CPUSpecificParams cpuParams;
std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
std::tie(fqParams, testShapes, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
std::vector<ngraph::PartialShape> dynamicShapes;
std::vector<ngraph::Shape> targetShapes;
std::vector<SizeVector> ranges;
std::tie(dynamicShapes, targetShapes, ranges) = testShapes;
int64_t inDataLowBounds, inDataHighBounds;
std::vector<float> inputLow, inputHigh, outputLow, outputHigh;
std::vector<SizeVector> inRangesShapes;
size_t levels;
inputLow = inputRangesValues.first;
inputHigh = inputRangesValues.second;
std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, inRangesShapes, levels) = fqParams;
std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, levels) = fqParams;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_";
if (!dynamicShapes.empty()) {
result << "IS=" << CommonTestUtils::partialShape2str(dynamicShapes) << "_";
}
result << "TS=";
for (const auto& shape : targetShapes) {
result << "(" << CommonTestUtils::vec2str(shape) << ")_";
}
result << "RS=";
for (const auto& data : ranges) {
result << "(" << CommonTestUtils::vec2str(data) << ")_";
}
result << "inPrec=" << inPrec.name() << "_";
std::string rs = "";
for (size_t i = 0; i < inRangesShapes.size(); i++) {
rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_";
}
result << "RS=" << rs;
result << "LOW_BOUNDS=" << inDataLowBounds << "_";
result << "HIGH_BOUNDS=" << inDataHighBounds << "_";
result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_";
@ -75,7 +87,9 @@ public:
const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo();
auto input = inDataMap.begin();
Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds);
const auto td = input->second->getTensorDesc();
Blob::Ptr blob = FuncTestUtils::createAndFillBlob(InferenceEngine::TensorDesc(td.getPrecision(), targetStaticShapes[index][0], td.getLayout()),
inDataHighBounds - inDataLowBounds, inDataLowBounds);
inferRequest.SetBlob(input->second->name(), blob);
inputs.push_back(blob);
@ -88,30 +102,37 @@ protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
fqSpecificParams fqParams;
SizeVector inDataShape;
inputShapes testShapes;
Precision inPrec;
std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
bool shouldBeDecomposed;
CPUSpecificParams cpuParams;
std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();
std::tie(fqParams, testShapes, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::vector<SizeVector> inRangesShapes;
std::vector<ngraph::Shape> targetShapes;
std::vector<SizeVector> ranges;
std::tie(inputDynamicShapes, targetShapes, ranges) = testShapes;
for (size_t i = 0; i < targetShapes.size(); i++) {
targetStaticShapes.push_back(std::vector<ov::Shape>{targetShapes});
}
size_t levels;
std::vector<std::vector<float>> rangesBounds(RANGES_INPUT_NUMBER);
rangesBounds[0] = inputRangesValues.first;
rangesBounds[1] = inputRangesValues.second;
std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams;
std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], levels) = fqParams;
auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
ParameterVector params = builder::makeParams(ngInPrec, {inDataShape});
ParameterVector params = builder::makeParams(ngInPrec, {targetStaticShapes[0][0]});
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset5::Parameter>(params));
auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty());
auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty());
auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty());
auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty());
auto il = builder::makeConstant(ngInPrec, ranges[0], rangesBounds[0], rangesBounds[0].empty());
auto ih = builder::makeConstant(ngInPrec, ranges[1], rangesBounds[1], rangesBounds[1].empty());
auto ol = builder::makeConstant(ngInPrec, ranges[2], rangesBounds[2], rangesBounds[2].empty());
auto oh = builder::makeConstant(ngInPrec, ranges[3], rangesBounds[3], rangesBounds[3].empty());
auto fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], il, ih, ol, oh, levels);
layerName = shouldBeDecomposed ? "" : "FakeQuantize";
@ -120,9 +141,7 @@ protected:
selectedType = getPrimitiveType() + "_" + inPrec.name();
}
fq->get_rt_info() = getCPUInfo();
function = std::make_shared<Function>(fq, params, "FakeQuantizeCPU");
function = makeNgraphFunction(ngInPrec, params, fq, "FakeQuantizeCPU");
}
private:
@ -132,6 +151,7 @@ private:
};
TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, layerName);
@ -149,6 +169,12 @@ const std::vector<std::pair<std::vector<float>, std::vector<float>>> input_range
const std::vector<float> outputLow{5.0f}, outputHigh{25.0f};
const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(levels));
namespace fqImpl {
std::vector<CPUSpecificParams> memForm4D_jit = {
@ -157,19 +183,31 @@ std::vector<CPUSpecificParams> memForm4D_jit = {
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
};
const std::vector<std::vector<SizeVector>> rangesShapes4D_jit = {
{{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}},
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
std::vector<inputShapes> rangesShapes4D_jit = {
inputShapes{
{},
{{4, 5, 6, 7}},
{{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}}
},
inputShapes{
{},
{{4, 5, 6, 7}},
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
},
inputShapes{
{{-1, -1, -1, -1}},
{{4, 5, 6, 7}, {1, 12, 1, 1}, {4, 1, 8, 2}, {1, 16, 6, 1}},
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
},
inputShapes{
{{-1, -1, -1, -1}},
{{4, 16, 6, 7}, {1, 16, 1, 1}, {7, 16, 1, 2}, {1, 16, 6, 1}},
{{1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}}
},
};
const auto specificParams4D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes4D_jit),
::testing::ValuesIn(levels));
const auto testParams4D_jit = ::testing::Combine(specificParams4D_jit,
::testing::Values(SizeVector{4, 5, 6, 7}),
const auto testParams4D_jit = ::testing::Combine(specificParams,
::testing::ValuesIn(rangesShapes4D_jit),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
@ -181,18 +219,21 @@ std::vector<CPUSpecificParams> memForm4D_ref = {
CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"})
};
const std::vector<std::vector<SizeVector>> rangesShapes4D_ref = {
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
std::vector<inputShapes> rangesShapes4D_ref = {
inputShapes{
{},
{{4, 5, 6, 7}},
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
},
inputShapes{
{{-1, -1, -1, -1}},
{{4, 16, 6, 7}, {4, 1, 1, 1}, {4, 16, 1, 2}, {4, 16, 6, 1}},
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
},
};
const auto specificParams4D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes4D_ref),
::testing::ValuesIn(levels));
const auto testParams4D_ref = ::testing::Combine(specificParams4D_ref,
::testing::Values(SizeVector{4, 5, 6, 7}),
const auto testParams4D_ref = ::testing::Combine(specificParams,
::testing::ValuesIn(rangesShapes4D_ref),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
@ -206,19 +247,31 @@ std::vector<CPUSpecificParams> memForm5D_jit = {
CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {})
};
const std::vector<std::vector<SizeVector>> rangesShapes5D_jit = {
{{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}},
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
std::vector<inputShapes> rangesShapes5D_jit = {
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}}
},
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
},
inputShapes{
{{-1, -1, -1, -1, -1}},
{{3, 4, 5, 6, 7}, {1, 12, 1, 1, 1}, {4, 1, 8, 2, 7}, {1, 16, 6, 5, 1}},
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
},
inputShapes{
{{-1, -1, -1, -1, -1}},
{{4, 16, 6, 7, 8}, {1, 16, 1, 1, 1}, {7, 16, 1, 2, 5}, {1, 16, 6, 1, 7}},
{{1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}}
},
};
const auto specificParams5D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes5D_jit),
::testing::ValuesIn(levels));
const auto testParams5D_jit = ::testing::Combine(specificParams5D_jit,
::testing::Values(SizeVector{3, 4, 5, 6, 7}),
const auto testParams5D_jit = ::testing::Combine(specificParams,
::testing::ValuesIn(rangesShapes5D_jit),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
@ -231,18 +284,21 @@ std::vector<CPUSpecificParams> memForm5D_ref = {
CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"})
};
const std::vector<std::vector<SizeVector>> rangesShapes5D_ref = {
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
std::vector<inputShapes> rangesShapes5D_ref = {
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
},
inputShapes{
{{-1, -1, -1, -1, -1}},
{{3, 16, 6, 7, 8}, {3, 16, 1, 1, 1}, {3, 16, 1, 2, 5}, {3, 16, 6, 1, 7}},
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
},
};
const auto specificParams5D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes5D_ref),
::testing::ValuesIn(levels));
const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,
::testing::Values(SizeVector{3, 4, 5, 6, 7}),
const auto testParams5D_ref = ::testing::Combine(specificParams,
::testing::ValuesIn(rangesShapes5D_ref),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(false),
@ -250,32 +306,115 @@ const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,
INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_5D_ref, FakeQuantizeLayerCPUTest, testParams5D_ref, FakeQuantizeLayerCPUTest::getTestCaseName);
const auto specificParamsBin = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(std::vector<float>{0.0f}),
::testing::Values(std::vector<float>{1.0f}),
::testing::Values(2));
const auto testParamsBin4D = ::testing::Combine(specificParamsBin,
::testing::ValuesIn(rangesShapes4D_jit),
::testing::Values(Precision::FP32),
::testing::Values(std::pair<std::vector<float>, std::vector<float>>{{3.0f}, {3.f}}),
::testing::Values(false),
::testing::Values(CPUSpecificParams()));
INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_4D_bin, FakeQuantizeLayerCPUTest, testParamsBin4D, FakeQuantizeLayerCPUTest::getTestCaseName);
} // namespace fqImpl
const std::vector<SizeVector> dataShapes = {
{4, 5, 6, 7},
{3, 4, 5, 6, 7},
{2, 3, 4, 5, 6, 7},
};
const std::vector<std::vector<SizeVector>> rangesShapes = {
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}},
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}},
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
};
namespace fqDecompos {
const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
::testing::Values(dataHighBounds),
::testing::Values(outputLow),
::testing::Values(outputHigh),
::testing::ValuesIn(rangesShapes),
::testing::ValuesIn(levels));
std::vector<inputShapes> decomposeShapes = {
inputShapes{
{},
{{4, 5, 6, 7}},
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
},
inputShapes{
{},
{{4, 5, 6, 7}},
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
},
inputShapes{
{},
{{4, 5, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
},
inputShapes{
{},
{{4, 5, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
},
inputShapes{
{},
{{4, 5, 6, 7}},
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
},
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
},
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
},
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
},
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
},
inputShapes{
{},
{{3, 4, 5, 6, 7}},
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
},
inputShapes{
{},
{{2, 3, 4, 5, 6, 7}},
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
},
inputShapes{
{},
{{2, 3, 4, 5, 6, 7}},
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
},
inputShapes{
{},
{{2, 3, 4, 5, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
},
inputShapes{
{},
{{2, 3, 4, 5, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
},
inputShapes{
{},
{{2, 3, 4, 5, 6, 7}},
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
},
inputShapes{
{{-1, -1, -1, -1}},
{{4, 5, 6, 7}, {1, 5, 6, 7}, {7, 5, 6, 7}},
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
},
inputShapes{
{{-1, -1, -1, -1, -1}},
{{8, 4, 5, 6, 7}, {1, 1, 5, 6, 7}, {1, 1, 1, 6, 7}},
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
},
};
const auto testParams = ::testing::Combine(specificParams,
::testing::ValuesIn(dataShapes),
::testing::ValuesIn(decomposeShapes),
::testing::Values(Precision::FP32),
::testing::ValuesIn(input_ranges),
::testing::Values(true),

View File

@ -12,8 +12,16 @@ using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using basicCpuMvnParams = std::tuple<
std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>, // Input shapes
InferenceEngine::Precision, // Input precision
ngraph::AxisSet, // Reduction axes
bool, // Across channels
bool, // Normalize variance
double>; // Epsilon
typedef std::tuple<
LayerTestsDefinitions::mvn1Params,
basicCpuMvnParams,
CPUSpecificParams,
fusingSpecificParams,
Precision, // CNNNetwork input precision
@ -24,16 +32,35 @@ class MvnLayerCPUTest : public testing::WithParamInterface<MvnLayerCPUTestParamS
virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
public:
static std::string getTestCaseName(testing::TestParamInfo<MvnLayerCPUTestParamSet> obj) {
LayerTestsDefinitions::mvn1Params basicParamsSet;
basicCpuMvnParams basicParamsSet;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
Precision inputPrecision, outputPrecision;
std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::Mvn1LayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvn1Params>(
basicParamsSet, 0));
std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>> inputShapes;
InferenceEngine::Precision netPrecision;
ngraph::AxisSet axes;
bool acrossChanels, normalizeVariance;
double eps;
std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet;
std::ostringstream result;
if (!inputShapes.first.empty()) {
result << "IS=" << CommonTestUtils::partialShape2str(inputShapes.first) << "_";
}
result << "TS=";
for (const auto& shape : inputShapes.second) {
result << "(" << CommonTestUtils::vec2str(shape) << ")_";
}
result << "Precision=" << netPrecision.name() << "_";
if (!axes.empty()) {
result << "ReductionAccess=" << CommonTestUtils::vec2str(axes.to_vector()) << "_";
} else {
result << "AcrossChannels=" << (acrossChanels ? "TRUE" : "FALSE") << "_";
}
result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_";
result << "Epsilon=" << eps;
result << "_" << "CNNInpPrc=" << inputPrecision.name();
result << "_" << "CNNOutPrc=" << outputPrecision.name();
@ -45,7 +72,9 @@ public:
}
protected:
void SetUp() override {
LayerTestsDefinitions::mvn1Params basicParamsSet;
targetDevice = CommonTestUtils::DEVICE_CPU;
basicCpuMvnParams basicParamsSet;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam();
@ -53,14 +82,20 @@ protected:
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
InferenceEngine::SizeVector inputShapes;
std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>> inputShapes;
InferenceEngine::Precision netPrecision;
ngraph::AxisSet axes;
bool acrossChanels, normalizeVariance;
double eps;
std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet;
for (size_t i = 0; i < inputShapes.second.size(); i++) {
targetStaticShapes.push_back({inputShapes.second[i]});
}
inputDynamicShapes = inputShapes.first;
auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto param = ngraph::builder::makeParams(netPrc, {inputShapes});
auto param = ngraph::builder::makeParams(netPrc, {targetStaticShapes[0].front()});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps);
if (!axes.empty()) {
@ -82,40 +117,141 @@ TEST_P(MvnLayerCPUTest, CompareWithRefs) {
}
namespace {
const std::vector<std::vector<size_t>> inputShapes_1D = {
{5},
{16},
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_1D = {
{ {}, {{5}}},
{ {}, {{16}}},
{
// dynamic
{{-1}},
// target
{
{2},
{16},
{1}
}
},
{
// dynamic
{{{1, 20}}},
// target
{
{1},
{16},
{4}
}
}
};
const std::vector<std::vector<size_t>> inputShapes_2D = {
{1, 32},
{16, 64},
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_2D = {
{ {}, {{1, 32}}},
{ {}, {{16, 64}}},
{
// dynamic
{{-1, -1}},
// target
{
{2, 16},
{4, 16},
{1, 16}
}
},
{
// dynamic
{{{1, 5}, {1, 20}}},
// target
{
{1, 1},
{2, 16},
{4, 16}
}
}
};
const std::vector<std::vector<size_t>> inputShapes_3D = {
{1, 32, 17},
{1, 37, 9},
{1, 16, 4},
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_3D = {
{ {}, {{1, 32, 17}}},
{ {}, {{1, 37, 9}}},
{ {}, {{1, 16, 4}}},
{
// dynamic
{{-1, -1, -1}},
// target
{
{2, 16, 6},
{4, 16, 2},
{1, 16, 4}
}
},
{
// dynamic
{{{1, 5}, {1, 20}, {1, 7}}},
// target
{
{1, 1, 1},
{2, 16, 6},
{4, 16, 2}
}
}
};
const std::vector<std::vector<size_t>> inputShapes_4D = {
{1, 16, 5, 8},
{2, 19, 5, 10},
{7, 32, 2, 8},
{5, 8, 3, 5},
{1, 2, 7, 5},
{1, 4, 5, 5},
{1, 7, 3, 5},
{1, 15, 9, 5},
{4, 41, 6, 9}
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_4D = {
{ {}, {{1, 16, 5, 8}}},
{ {}, {{2, 19, 5, 10}}},
{ {}, {{7, 32, 2, 8}}},
{ {}, {{5, 8, 3, 5}}},
{ {}, {{1, 2, 7, 5}}},
{ {}, {{1, 4, 5, 5}}},
{ {}, {{1, 7, 3, 5}}},
{ {}, {{1, 15, 9, 5}}},
{ {}, {{4, 41, 6, 9}}},
{
// dynamic
{{-1, -1, -1, -1}},
// target
{
{2, 16, 10, 6},
{4, 16, 2, 2},
{1, 16, 8, 4}
}
},
{
// dynamic
{{{1, 5}, {1, 20}, {1, 10}, {1, 7}}},
// target
{
{1, 1, 1, 1},
{2, 16, 10, 6},
{4, 16, 2, 2}
}
}
};
const std::vector<std::vector<size_t>> inputShapes_5D = {
{1, 32, 8, 1, 6},
{1, 9, 1, 15, 9},
{6, 64, 6, 1, 18},
{2, 31, 2, 9, 1},
{10, 16, 5, 10, 6}
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_5D = {
{ {}, {{1, 32, 8, 1, 6}}},
{ {}, {{1, 9, 1, 15, 9}}},
{ {}, {{6, 64, 6, 1, 18}}},
{ {}, {{2, 31, 2, 9, 1}}},
{ {}, {{10, 16, 5, 10, 6}}},
{
// dynamic
{{-1, -1, -1, -1, -1}},
// target
{
{2, 16, 5, 10, 6},
{4, 16, 7, 2, 2},
{1, 16, 11, 8, 4}
}
},
{
// dynamic
{{{1, 5}, {1, 20}, {1, 7}, {1, 10}, {1, 7}}},
// target
{
{1, 1, 1, 1, 1},
{2, 16, 5, 10, 6},
{4, 16, 7, 2, 2}
}
}
};
const std::vector<bool> acrossChannels = {
@ -162,6 +298,7 @@ std::vector<fusingSpecificParams> fusingParamsSet {
fusingFakeQuantizePerTensorRelu,
/* another patterns */
fusingScaleShift,
fusingAddPerTensor
};
const auto Mvn3D = ::testing::Combine(
@ -171,8 +308,7 @@ const auto Mvn3D = ::testing::Combine(
::testing::ValuesIn(emptyReductionAxes),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(epsilon)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
@ -187,8 +323,7 @@ const auto Mvn4D = ::testing::Combine(
::testing::ValuesIn(emptyReductionAxes),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(epsilon)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
@ -203,8 +338,7 @@ const auto Mvn5D = ::testing::Combine(
::testing::ValuesIn(emptyReductionAxes),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(epsilon)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
@ -228,8 +362,7 @@ const auto Mvn1D = ::testing::Combine(
::testing::ValuesIn(emptyReductionAxes),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(epsilon)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
::testing::ValuesIn(inpPrc),
@ -245,8 +378,7 @@ const auto Mvn2D = ::testing::Combine(
::testing::ValuesIn(emptyReductionAxes),
::testing::Values(false),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(epsilon)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
@ -262,8 +394,7 @@ const auto Mvn2DTrans = ::testing::Combine(
::testing::ValuesIn(emptyReductionAxes),
::testing::Values(true),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(epsilon)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
::testing::ValuesIn(inpPrc),

View File

@ -23,7 +23,9 @@ public:
std::tie(shapes, broadcast) = obj.param;
std::ostringstream result;
result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
if (!shapes.first.empty()) {
result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
}
result << "TS=";
for (const auto& shape : shapes.second) {
result << "(";