[CPU] MVN, FQ, Convert dynamic nodes (#7817)
This commit is contained in:
parent
6908023a42
commit
6416b73855
@ -486,7 +486,7 @@ void MKLDNNGraph::InitEdges() {
|
||||
std::string convertName = edge->getParent()->getName() + "_" +
|
||||
inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
|
||||
|
||||
auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape().getStaticDims(), inDesc.getPrecision(), outDesc.getPrecision(),
|
||||
auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(),
|
||||
convertName, this->getEngine(), this->weightsCache);
|
||||
convertNode->setDescs(inDesc, outDesc);
|
||||
InsertNode(edge, convertNode, true);
|
||||
|
@ -1609,7 +1609,30 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph
|
||||
|
||||
std::vector<float> scalesBuffer;
|
||||
std::vector<float> shiftsBuffer;
|
||||
parent->fillScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get(), scalesBuffer, shiftsBuffer, 1);
|
||||
auto parentEltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(parent);
|
||||
if (!parentEltwise) {
|
||||
IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node";
|
||||
}
|
||||
|
||||
std::tie(scalesBuffer, shiftsBuffer) = parentEltwise->getScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get());
|
||||
|
||||
const auto &outputShape = child->getOutputShapeAtPort(0);
|
||||
VectorDims outputDims = outputShape.getDims();
|
||||
const size_t channelPos = outputDims.size() > 1 ? 1 : 0;
|
||||
if (outputShape.isDynamic()) {
|
||||
if (outputDims[channelPos] == Shape::UNDEFINED_DIM) {
|
||||
if (scalesBuffer.size() > 1) {
|
||||
outputDims[channelPos] = scalesBuffer.size();
|
||||
} else if (shiftsBuffer.size() > 1) {
|
||||
outputDims[channelPos] = shiftsBuffer.size();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
scalesBuffer = makeAlignedBuffer(outputDims[channelPos], scalesBuffer, 1);
|
||||
shiftsBuffer = makeAlignedBuffer(outputDims[channelPos], shiftsBuffer, 1);
|
||||
|
||||
for (int i = 0; i < scalesBuffer.size(); i++)
|
||||
if (scalesBuffer[i] == 0.f)
|
||||
|
@ -503,8 +503,9 @@ void MKLDNNNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
|
||||
void MKLDNNNode::executeDynamic(mkldnn::stream strm) {
|
||||
if (needShapeInfer())
|
||||
if (needShapeInfer()) {
|
||||
redefineOutputMemory(shapeInfer());
|
||||
}
|
||||
if (needPrepareParams()) {
|
||||
IE_ASSERT(inputShapesDefined()) << "Can't prepare params for " << getTypeStr() << " node with name: " << getName() <<
|
||||
" since the input shapes are not defined.";
|
||||
@ -1045,7 +1046,7 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
|
||||
}
|
||||
|
||||
@ -1192,7 +1193,7 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|
||||
if (i == fusingPort)
|
||||
continue;
|
||||
auto& weightShape = getInputShapeAtPort(i).getDims();
|
||||
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape))
|
||||
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1213,6 +1214,66 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|
||||
|| isConvertablePowerStatic();
|
||||
}
|
||||
|
||||
std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
|
||||
std::vector<float> scales, shifts;
|
||||
|
||||
const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
|
||||
auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
|
||||
auto constBlob = constInputNode->getMemoryPtr();
|
||||
const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
buffer.resize(elementsCount);
|
||||
cpu_convert(constBlob->GetPtr(),
|
||||
&buffer[0],
|
||||
MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
|
||||
Precision::FP32,
|
||||
elementsCount);
|
||||
};
|
||||
|
||||
const auto constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
|
||||
|
||||
if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
|
||||
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
|
||||
} else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
|
||||
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
|
||||
} else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
|
||||
fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
|
||||
fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
|
||||
} else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
|
||||
const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
|
||||
if (!power) {
|
||||
IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
|
||||
}
|
||||
scales.push_back(power->getBeta());
|
||||
shifts.push_back(power->getGamma());
|
||||
} else {
|
||||
IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
|
||||
}
|
||||
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd: {
|
||||
scales.resize(shifts.size(), 1.0f);
|
||||
break;
|
||||
}
|
||||
case EltwiseSubtract: {
|
||||
scales.resize(shifts.size(), 1.0f);
|
||||
std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
|
||||
break;
|
||||
}
|
||||
case EltwiseMultiply: {
|
||||
shifts.resize(scales.size(), 0.0f);
|
||||
break;
|
||||
}
|
||||
case EltwiseDivide: {
|
||||
shifts.resize(scales.size(), 0.0f);
|
||||
std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
return {scales, shifts};
|
||||
}
|
||||
|
||||
bool MKLDNNNode::inputShapesDefined() const {
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined())
|
||||
@ -1307,86 +1368,6 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, int align) {
|
||||
scales.clear();
|
||||
shifts.clear();
|
||||
const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
|
||||
auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
|
||||
auto constBlob = constInputNode->getMemoryPtr();
|
||||
const auto elementsCount = constBlob->GetDescWithType<BlockedMemoryDesc>()->getPaddedElementsCount();
|
||||
buffer.resize(elementsCount);
|
||||
cpu_convert(constBlob->GetPtr(),
|
||||
&buffer[0],
|
||||
MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()),
|
||||
Precision::FP32,
|
||||
elementsCount);
|
||||
};
|
||||
|
||||
const size_t constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0;
|
||||
|
||||
if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
|
||||
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales);
|
||||
} else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
|
||||
fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts);
|
||||
} else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
|
||||
fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
|
||||
fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
|
||||
} else if (one_of(getAlgorithm(), EltwisePowerStatic)) {
|
||||
const auto power = dynamic_cast<const MKLDNNEltwiseNode *>(this);
|
||||
if (!power) {
|
||||
IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode";
|
||||
}
|
||||
scales.push_back(power->getBeta());
|
||||
shifts.push_back(power->getGamma());
|
||||
} else {
|
||||
IE_THROW() << "Can't fill scale and shifts for node: " << getName() << " with type: " << NameFromType(getType());
|
||||
}
|
||||
|
||||
const size_t bufferSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
|
||||
if (align == -1) {
|
||||
align = bufferSize;
|
||||
}
|
||||
const size_t bufferSizeAligned = rnd_up(bufferSize, static_cast<size_t>(align));
|
||||
|
||||
size_t initSize = scales.size();
|
||||
if (initSize > 0) {
|
||||
scales.resize(bufferSizeAligned, 0);
|
||||
if (initSize == 1) {
|
||||
std::fill(scales.begin() + 1, scales.begin() + bufferSize, scales[0]);
|
||||
}
|
||||
}
|
||||
|
||||
initSize = shifts.size();
|
||||
if (initSize > 0) {
|
||||
shifts.resize(bufferSizeAligned, 0);
|
||||
if (initSize == 1) {
|
||||
std::fill(shifts.begin() + 1, shifts.begin() + bufferSize, shifts[0]);
|
||||
}
|
||||
}
|
||||
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd: {
|
||||
scales.resize(bufferSizeAligned, 1.0f);
|
||||
break;
|
||||
}
|
||||
case EltwiseSubtract: {
|
||||
scales.resize(bufferSizeAligned, 1.0f);
|
||||
std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; });
|
||||
break;
|
||||
}
|
||||
case EltwiseMultiply: {
|
||||
shifts.resize(bufferSizeAligned, 0.0f);
|
||||
break;
|
||||
}
|
||||
case EltwiseDivide: {
|
||||
shifts.resize(bufferSizeAligned, 0.0f);
|
||||
std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; });
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::createShapeInferSubgraph(const std::shared_ptr<ngraph::Node>& op) {
|
||||
ngraph::OutputVector inputsForShapeInfer;
|
||||
for (size_t i = 0; i < inputShapes.size(); i++) {
|
||||
|
@ -556,10 +556,18 @@ public:
|
||||
return outputShapes[port];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return scales and shift if nodes can be executed as ScaleShift, else raise exception
|
||||
* If node has only scale or shift value, fill missing value with default values
|
||||
* i.e. EltwiseAdd: fill shifts from constant, fill scales with default values = 1.0f
|
||||
* @param parentNode
|
||||
* node from which data comes
|
||||
* @return pair of scales and shifts
|
||||
*/
|
||||
std::pair<std::vector<float>, std::vector<float>> getScalesAndShifts(const MKLDNNNode *parentNode) const;
|
||||
|
||||
protected:
|
||||
bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
|
||||
// TODO [mandrono]: place outside of the node API
|
||||
void fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector<float> &scales, std::vector<float> &shifts, const int align = -1);
|
||||
|
||||
void setType(Type type) {
|
||||
this->type = type;
|
||||
@ -578,7 +586,7 @@ protected:
|
||||
* Seed node should call this routine and pass its post operations list as parameter.
|
||||
* @param ops List of fused post operations
|
||||
*/
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false);
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
|
||||
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() const { return nullptr; }
|
||||
|
||||
typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "cpu/x64/cpu_isa_traits.hpp"
|
||||
#include "utils/general_utils.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
// WA for xbyak.h
|
||||
#ifdef _WIN32
|
||||
@ -1127,16 +1128,19 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
for (auto &node : fusedWith) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (eltwiseNode->isSpecialConvolutionAddFusing())
|
||||
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
ops.append_sum(1.0);
|
||||
else
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
} else {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
fakeQuantizeNode->appendPostOps(ops);
|
||||
fakeQuantizeNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -355,7 +355,7 @@ void MKLDNNConcatNode::createPrimitive() {
|
||||
IE_THROW() << "Source memory from " << parent->getName() << " didn't allocate for node "
|
||||
<< getName() << ".";
|
||||
}
|
||||
// DnnlBlockedMemoryDesc
|
||||
|
||||
auto desc = srcMemPtr->GetDescWithType<DnnlMemoryDesc>()->getDnnlDesc();
|
||||
auto& dims = getInputShapeAtPort(i).getStaticDims();
|
||||
for (size_t j = 0; j < dims.size(); j++) {
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "common/cpu_convert.h"
|
||||
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -338,7 +339,8 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
|
||||
} else {
|
||||
eltwiseNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (eltwiseNode->scalesMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
|
||||
@ -351,7 +353,9 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
fakeQuantizeNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
|
||||
constexpr int align = -1;
|
||||
// no need to fill post ops dims for fq, make sense only for bin fq
|
||||
fakeQuantizeNode->appendPostOps(ops, VectorDims{}, align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (fakeQuantizeNode->cropHighMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
|
||||
|
@ -15,11 +15,6 @@ using namespace InferenceEngine;
|
||||
|
||||
bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto convert = std::dynamic_pointer_cast<const ngraph::opset1::Convert>(op);
|
||||
if (!convert) {
|
||||
errorMessage = "Only opset1 Convert operation is supported";
|
||||
@ -41,14 +36,20 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, co
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
|
||||
std::vector<VectorDims> MKLDNNConvertNode::shapeInfer() const {
|
||||
return std::vector<VectorDims>{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()};
|
||||
}
|
||||
|
||||
MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
|
||||
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
|
||||
: MKLDNNNode("Convert", nodeName, eng, cache) {
|
||||
inputShapes.emplace_back(dims);
|
||||
inputShapes.push_back(shape);
|
||||
addOriginalInputPrecision(inPrc);
|
||||
outputShapes.emplace_back(dims);
|
||||
outputShapes.push_back(shape);
|
||||
addOriginalOutputPrecision(outPrc);
|
||||
|
||||
isDynamic = shape.isDynamic();
|
||||
|
||||
errorPrefix = "Convert node with name '" + getName() + "'";
|
||||
}
|
||||
|
||||
|
@ -14,13 +14,14 @@ namespace MKLDNNPlugin {
|
||||
class MKLDNNConvertNode : public MKLDNNNode {
|
||||
public:
|
||||
MKLDNNConvertNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
MKLDNNConvertNode(const InferenceEngine::SizeVector &dims, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
|
||||
MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc,
|
||||
const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
|
||||
|
||||
void getSupportedDescriptors() override;
|
||||
void initSupportedPrimitiveDescriptors() override;
|
||||
void createPrimitive() override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override {
|
||||
return false;
|
||||
@ -38,6 +39,9 @@ public:
|
||||
const MemoryDesc& getInput() const { return *input; }
|
||||
const MemoryDesc& getOutput() const { return *output; }
|
||||
|
||||
std::vector<VectorDims> shapeInfer() const override;
|
||||
bool needPrepareParams() const override { return false; }
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
static bool isSupportedDesc(const MemoryDesc &desc);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <nodes/common/cpu_memcpy.h>
|
||||
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -273,7 +274,9 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
for (auto &node : fusedWith) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
continue;
|
||||
}
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "emitters/jit_bf16_emitters.hpp"
|
||||
#include <mkldnn_selective_build.h>
|
||||
#include "utils/general_utils.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
@ -1007,9 +1008,9 @@ size_t MKLDNNEltwiseNode::getOpInputsNum() const {
|
||||
// TODO [DS]: used only in FuseConvolutionSumAndConvolutionSumActivation
|
||||
// fix when reimplement this transformation for dynamic shapes
|
||||
bool MKLDNNEltwiseNode::isWithBroadcast() {
|
||||
auto oDims = outputShapes[0].getStaticDims();
|
||||
auto oDims = getOutputShapeAtPort(0).getStaticDims();
|
||||
for (size_t i = 0; i < inputShapes.size(); i++) {
|
||||
auto iDims = inputShapes[i].getStaticDims();
|
||||
auto iDims = getInputShapeAtPort(i).getStaticDims();
|
||||
if (iDims != oDims)
|
||||
return true;
|
||||
}
|
||||
@ -1039,7 +1040,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
|
||||
return;
|
||||
|
||||
// if dim rank is greater than the maximum possible, we should use the reference execution
|
||||
canUseOptimizedImpl = mayiuse(x64::sse41) && inputShapes[0].getRank() <= MAX_ELTWISE_DIM_RANK;
|
||||
canUseOptimizedImpl = mayiuse(x64::sse41) && getInputShapeAtPort(0).getRank() <= MAX_ELTWISE_DIM_RANK;
|
||||
|
||||
size_t expectedInputsNum = getOpInputsNum();
|
||||
for (auto& postOp : fusedWith) {
|
||||
@ -1246,10 +1247,6 @@ std::vector<VectorDims> MKLDNNEltwiseNode::shapeInfer() const {
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::prepareParams() {
|
||||
if (!inputShapesDefined()) {
|
||||
IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
|
||||
}
|
||||
|
||||
if (memPtrs.empty()) {
|
||||
for (auto i = 0; i < inputNum; i++)
|
||||
memPtrs.push_back(getParentEdgeAt(i)->getMemoryPtr());
|
||||
@ -1520,7 +1517,7 @@ void MKLDNNEltwiseNode::offset_in_calc(VectorDims& offset, VectorDims& dims_in,
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::executeOptimized6D(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
void MKLDNNEltwiseNode::executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
const VectorDims &dims_out) const {
|
||||
parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4],
|
||||
[&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
|
||||
@ -1535,7 +1532,7 @@ void MKLDNNEltwiseNode::executeOptimized6D(const std::shared_ptr<jit_uni_eltwise
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::executeOptimizedGeneric(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
void MKLDNNEltwiseNode::executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
const VectorDims &dims_out, const size_t schedulerWorkAmount) const {
|
||||
parallel_nt(0, [&](const int ithr, const int nthr) {
|
||||
size_t start = 0, end = 0;
|
||||
@ -1690,19 +1687,14 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
|
||||
specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd &&
|
||||
getInputShapeAtPort(0) == getInputShapeAtPort(1);
|
||||
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
|
||||
if ((parentNode->getType() == FullyConnected) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
|
||||
EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
|
||||
fillScalesAndShifts(parentNode.get(), scales, shifts);
|
||||
} else {
|
||||
fillScalesAndShifts(parentNode.get(), scales, shifts, 16);
|
||||
}
|
||||
scalesSize = static_cast<size_t>(outputShapes[0].getStaticDims()[outputShapes[0].getRank() > 1 ? 1 : 0]);
|
||||
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
|
||||
}
|
||||
MKLDNNNode::fuseInto(parentNode);
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
|
||||
|
||||
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
switch (getMKLDNNAlgorithm()) {
|
||||
case mkldnn::algorithm::eltwise_relu:
|
||||
@ -1730,18 +1722,21 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
|
||||
default: IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
} else {
|
||||
const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
|
||||
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
|
||||
if (getAlgorithm() != EltwisePrelu) {
|
||||
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
|
||||
}
|
||||
|
||||
if (initAsBinary) {
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
|
||||
if (data.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
|
||||
auto outShape = outputShapes[0].getStaticDims();
|
||||
auto chIdx = outputShapes[0].getRank() > 1 ? 1 : 0;
|
||||
std::vector<size_t> binaryDims(postOpDims.size(), 1);
|
||||
binaryDims[chIdx] = postOpDims[chIdx];
|
||||
|
||||
std::vector<size_t> binaryShape(outShape.size(), 1);
|
||||
binaryShape[chIdx] = outShape[chIdx];
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (initBinaryMemory) {
|
||||
@ -1752,19 +1747,19 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
|
||||
break;
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
|
||||
break;
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
|
||||
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
@ -1777,14 +1772,14 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary,
|
||||
case EltwiseDivide:
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
if (scales.empty() || shifts.empty())
|
||||
if (scalesBuffer.empty() || shiftsBuffer.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]);
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
if (scales.empty())
|
||||
if (scalesBuffer.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr);
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
@ -1810,7 +1805,7 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!mayiuse(x64::sse41) || inputShapes[0].getRank() > MAX_ELTWISE_DIM_RANK)
|
||||
if (!mayiuse(x64::sse41) || getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK)
|
||||
return false;
|
||||
|
||||
if (!isSuitableNode(this)) {
|
||||
|
@ -75,7 +75,7 @@ public:
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override;
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false) override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
|
||||
void fuseInto(MKLDNNNodePtr& parentNode) override;
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
|
||||
@ -116,7 +116,7 @@ private:
|
||||
void exec(const MKLDNNEltwiseNode& node, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override;
|
||||
const jit_eltwise_params& getJep() const override;
|
||||
|
||||
std::shared_ptr<jit_uni_eltwise_kernel> pKernel;
|
||||
std::unique_ptr<jit_uni_eltwise_kernel> pKernel;
|
||||
size_t schedulerWorkAmount = 0;
|
||||
};
|
||||
|
||||
@ -149,15 +149,16 @@ private:
|
||||
|
||||
std::vector<float> scales = {};
|
||||
std::vector<float> shifts = {};
|
||||
size_t scalesSize = 0;
|
||||
std::vector<float> scalesBuffer = {};
|
||||
std::vector<float> shiftsBuffer = {};
|
||||
|
||||
std::vector<MKLDNNMemoryPtr> memPtrs = {};
|
||||
|
||||
static std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>> initializers;
|
||||
|
||||
void executeOptimized6D(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
const VectorDims &dims_out) const;
|
||||
void executeOptimizedGeneric(const std::shared_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
const VectorDims &dims_out, const size_t schedulerWorkAmount) const;
|
||||
void executeReference(const jit_eltwise_params &jep, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out,
|
||||
const size_t fullWorkAmount) const;
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/ngraph_utils.hpp"
|
||||
#include "common/cpu_memcpy.h"
|
||||
|
||||
// Quantization ranges validation is switched off by default in order to avoid regressions on user side
|
||||
// #define VALIDATE_QUANTIZATION_RANGES
|
||||
@ -825,23 +826,19 @@ private:
|
||||
|
||||
bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto fq = std::dynamic_pointer_cast<const ngraph::opset1::FakeQuantize>(op);
|
||||
if (!fq) {
|
||||
errorMessage = "Only opset1 FakeQuantize operation is supported";
|
||||
return false;
|
||||
}
|
||||
if (fq->get_input_shape(0).size() < 2 || fq->get_input_shape(0).size() > 5) {
|
||||
errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(fq->get_input_shape(0).size());
|
||||
const auto dataRank = fq->get_input_partial_shape(0).rank().get_length();
|
||||
if (dataRank < 2 || dataRank > 5) {
|
||||
errorMessage = "Doesn't support 'data' input with rank: " + std::to_string(dataRank);
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 1; i < fq->get_input_size(); i++) {
|
||||
if (fq->get_input_shape(i).size() > 5) {
|
||||
errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_shape(i).size());
|
||||
if (fq->get_input_partial_shape(i).rank().get_length() > 5) {
|
||||
errorMessage = "Doesn't support 'range' input with rank: " + std::to_string(fq->get_input_partial_shape(i).rank().get_length());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -853,7 +850,7 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ng
|
||||
}
|
||||
for (size_t i = 1; i < fq->get_input_size(); i++) {
|
||||
size_t count_not_unit_axis = 0;
|
||||
auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), fq->get_input_shape(0).size());
|
||||
auto shape = getNormalizedDimsBySize(fq->get_input_shape(i), dataRank);
|
||||
|
||||
if (ngraph::shape_size(shape) != 1) {
|
||||
size_t not_unit_axis = 0;
|
||||
@ -892,12 +889,12 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
if (levels <= 1)
|
||||
IE_THROW() << errorPrefix << "supports 'levels' attribute greater than or equal to 2";
|
||||
|
||||
if (fq->get_input_size() != 5)
|
||||
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << fq->get_input_size();
|
||||
if (fq->get_output_size() != 1)
|
||||
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << fq->get_output_size();
|
||||
if (inputShapes.size() != 5)
|
||||
IE_THROW() << errorPrefix << "has incorrect number of input edges: " << inputShapes.size();
|
||||
if (outputShapes.size() != 1)
|
||||
IE_THROW() << errorPrefix << "has incorrect number of output edges: " << outputShapes.size();
|
||||
|
||||
auto initAxisIdx = [&](const ngraph::Shape& inputDims) {
|
||||
auto initAxisIdx = [&](const VectorDims& inputDims) {
|
||||
size_t axisIdx = 0;
|
||||
for (int i = 1; i < inputDims.size(); i++) {
|
||||
if (inputDims[i] > 1) {
|
||||
@ -908,11 +905,11 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
return axisIdx;
|
||||
};
|
||||
|
||||
const size_t dataNDims = fq->get_input_shape(0).size();
|
||||
axis = dataNDims == 1 ? 0 : 1;
|
||||
const size_t dataRank = getInputShapeAtPort(0).getRank();
|
||||
axis = dataRank == 1 ? 0 : 1;
|
||||
int axisSize = -1;
|
||||
|
||||
const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataNDims);
|
||||
const auto ilShape = getNormalizedDimsBySize(fq->get_input_shape(1), dataRank);
|
||||
auto inputLowAxis = initAxisIdx(ilShape);
|
||||
isInputLowBroadcasted = (ngraph::is_scalar(ilShape) || ilShape[inputLowAxis] == 1);
|
||||
if (!isInputLowBroadcasted) {
|
||||
@ -920,7 +917,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
axisSize = ilShape[inputLowAxis];
|
||||
}
|
||||
|
||||
const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataNDims);
|
||||
const auto ihShape = getNormalizedDimsBySize(fq->get_input_shape(2), dataRank);
|
||||
auto inputHighAxis = initAxisIdx(ihShape);
|
||||
isInputHighBroadcasted = (ngraph::is_scalar(ihShape) || ihShape[inputHighAxis] == 1);
|
||||
if (!isInputHighBroadcasted) {
|
||||
@ -928,7 +925,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
axisSize = ihShape[inputHighAxis];
|
||||
}
|
||||
|
||||
const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataNDims);
|
||||
const auto olShape = getNormalizedDimsBySize(fq->get_input_shape(3), dataRank);
|
||||
auto outputLowAxis = initAxisIdx(olShape);
|
||||
isOutputLowBroadcasted = (ngraph::is_scalar(olShape) || olShape[outputLowAxis] == 1);
|
||||
if (!isOutputLowBroadcasted) {
|
||||
@ -936,7 +933,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
axisSize = olShape[outputLowAxis];
|
||||
}
|
||||
|
||||
const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataNDims);
|
||||
const auto ohShape = getNormalizedDimsBySize(fq->get_input_shape(4), dataRank);
|
||||
auto outputHighAxis = initAxisIdx(ohShape);
|
||||
isOutputHighBroadcasted = (ngraph::is_scalar(ohShape) || ohShape[outputHighAxis] == 1);
|
||||
if (!isOutputHighBroadcasted) {
|
||||
@ -949,11 +946,9 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
auto outputLowAxisSize = ngraph::is_scalar(olShape) ? 1 : olShape[outputLowAxis];
|
||||
auto outputHighAxisSize = ngraph::is_scalar(ohShape) ? 1 : ohShape[outputHighAxis];
|
||||
|
||||
int axisRealSize = static_cast<int>(fq->get_input_shape(0)[axis]);
|
||||
size_t axisPaddedSize = static_cast<size_t>(rnd_up(fq->get_input_shape(0)[axis], 16));
|
||||
|
||||
if (axisSize != -1 && axisSize != axisRealSize)
|
||||
if (axisSize != -1 && !dimsEqualWeak(axisSize, getInputShapeAtPort(0).getDims()[axis])) {
|
||||
IE_THROW() << errorPrefix << "has different quantization axis size on 'data' and 'range' inputs";
|
||||
}
|
||||
|
||||
const auto inputLowNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(1));
|
||||
auto inputLowData = inputLowNode->cast_vector<float>();
|
||||
@ -995,12 +990,24 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
if (binarization) {
|
||||
algorithm = FQBinarization;
|
||||
|
||||
binarizationThresholds.resize(axisPaddedSize);
|
||||
binarizationOutputMask.resize(axisPaddedSize);
|
||||
if (isInputLowBroadcasted) {
|
||||
binarizationThresholds.push_back(inputLowData[0]);
|
||||
} else {
|
||||
IE_ASSERT(axisSize != -1);
|
||||
binarizationThresholds.resize(rnd_up(axisSize, 16));
|
||||
for (int i = 0; i < axisSize; i++) {
|
||||
binarizationThresholds[i] = inputLowData[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < axisRealSize; i++) {
|
||||
binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i];
|
||||
binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000;
|
||||
if (isOutputHighBroadcasted) {
|
||||
binarizationOutputMask.push_back(outputHighData[0] == 1.f ? 0xffffffff : 0x00000000);
|
||||
} else {
|
||||
IE_ASSERT(axisSize != -1);
|
||||
binarizationOutputMask.resize(rnd_up(axisSize, 16));
|
||||
for (int i = 0; i < axisSize; i++) {
|
||||
binarizationOutputMask[i] = outputHighData[i] == 1.f ? 0xffffffff : 0x00000000;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto allElementsAreEqual = [&](const std::vector<float> &data, size_t size) {
|
||||
@ -1117,13 +1124,14 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
|
||||
std::vector<LayoutType> MKLDNNFakeQuantizeNode::getDataFormats() const {
|
||||
// Special case for first FQ in the network
|
||||
if (getInputShapeAtPort(0).getStaticDims()[getAxis()] == 3) {
|
||||
const auto &dims = getInputShapeAtPort(0).getDims();
|
||||
if (dims[getAxis()] == 3) {
|
||||
return { LayoutType::ncsp };
|
||||
} else {
|
||||
if (isBinarization()) {
|
||||
return { LayoutType::nspc };
|
||||
} else {
|
||||
if (one_of(getInputShapeAtPort(0).getRank(), 4, 5)) {
|
||||
if (one_of(dims.size(), 4, 5)) {
|
||||
if (getAxis() == 1) {
|
||||
auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c;
|
||||
return { blkFormat, LayoutType::nspc, LayoutType::ncsp };
|
||||
@ -1235,81 +1243,139 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::createPrimitive() {
|
||||
auto config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
|
||||
auto inDims = config.inConfs[0].desc->getShape().getStaticDims();
|
||||
jqp.c = inDims.size() > 1 ? inDims[1] : 1;
|
||||
|
||||
jqp.src_prc = config.inConfs[0].desc->getPrecision();
|
||||
jqp.wei_prc = Precision::FP32;
|
||||
jqp.dst_prc = config.outConfs[0].desc->getPrecision();
|
||||
|
||||
auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
||||
jqp.s_str = srcDesc->getStrides();
|
||||
|
||||
auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
||||
jqp.d_str = dstDesc->getStrides();
|
||||
|
||||
jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5);
|
||||
|
||||
jqp.op_type = getAlgorithm();
|
||||
|
||||
bool MKLDNNFakeQuantizeNode::needPrepareParams() const {
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
if (isBinarization())
|
||||
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(jqp));
|
||||
else
|
||||
quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx512_common>(jqp));
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
if (isBinarization())
|
||||
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx2>(jqp));
|
||||
else
|
||||
quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx2>(jqp));
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
if (isBinarization())
|
||||
quantize_kernel.reset(new jit_uni_binarization_kernel<cpu::x64::sse41>(jqp));
|
||||
else
|
||||
quantize_kernel.reset(new jit_uni_quantization_kernel<cpu::x64::sse41>(jqp));
|
||||
if (internalBlobMemory.empty() || (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref && inputShapesModified())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto axisSize = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[getAxis()];
|
||||
const auto newPaddedSize = rnd_up(axisSize, 16);
|
||||
const auto currPaddedSize = rnd_up(currentAxisSize, 16);
|
||||
|
||||
return newPaddedSize != currPaddedSize || (isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) &&
|
||||
axisSize != currentAxisSize);
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::prepareParams() {
|
||||
const size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
|
||||
const size_t newPaddedSize = rnd_up(axisSize, 16);
|
||||
IE_ASSERT(newPaddedSize != 0);
|
||||
|
||||
if (internalBlobMemory.empty() || newPaddedSize != rnd_up(currentAxisSize, 16) ||
|
||||
(isBinarization() && (isInputLowBroadcasted || isOutputHighBroadcasted) && axisSize != currentAxisSize)) {
|
||||
DnnlBlockedMemoryDesc weightsDataDesc(Shape(VectorDims{newPaddedSize}), memory::data_type::f32, memory::format_tag::x);
|
||||
|
||||
if (isBinarization()) {
|
||||
constexpr size_t numBinFqIntBlob = 2;
|
||||
bool needUpdThr = false, needUpdMask = false;
|
||||
if (isInputLowBroadcasted && axisSize != currentAxisSize) {
|
||||
binarizationThresholds.resize(newPaddedSize);
|
||||
std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + axisSize, binarizationThresholds[0]);
|
||||
std::fill(binarizationThresholds.begin() + axisSize, binarizationThresholds.end(), 0);
|
||||
needUpdThr = true;
|
||||
}
|
||||
|
||||
if (isOutputHighBroadcasted && axisSize != currentAxisSize) {
|
||||
binarizationOutputMask.resize(newPaddedSize);
|
||||
std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + axisSize, binarizationOutputMask[0]);
|
||||
std::fill(binarizationOutputMask.begin() + axisSize, binarizationOutputMask.end(), 0);
|
||||
needUpdMask = true;
|
||||
}
|
||||
|
||||
if (internalBlobMemory.empty() || needUpdThr) {
|
||||
auto binarizationThresholdsDataMem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||
binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr());
|
||||
if (internalBlobMemory.empty()) {
|
||||
internalBlobMemory.push_back(binarizationThresholdsDataMem);
|
||||
} else {
|
||||
internalBlobMemory[0] = binarizationThresholdsDataMem;
|
||||
}
|
||||
}
|
||||
|
||||
if (internalBlobMemory.size() == (numBinFqIntBlob - 1) || needUpdMask) {
|
||||
auto binarizationMaskDataMem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||
binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr());
|
||||
if (internalBlobMemory.size() == (numBinFqIntBlob - 1)) {
|
||||
internalBlobMemory.push_back(binarizationMaskDataMem);
|
||||
} else {
|
||||
internalBlobMemory[1] = binarizationMaskDataMem;
|
||||
}
|
||||
}
|
||||
} else if (levels != 2) {
|
||||
constexpr size_t numFqIntBlob = 6;
|
||||
|
||||
auto pushInternalBlob = [&](std::vector<float>& data, size_t idx) {
|
||||
auto memory = std::make_shared<MKLDNNMemory>(getEngine());
|
||||
bool needOverwrite = getInputShapeAtPort(0).getDims()[getAxis()] == Shape::UNDEFINED_DIM && data.size() == 1;
|
||||
if (needOverwrite) {
|
||||
memory->Create(weightsDataDesc);
|
||||
float *ptr = reinterpret_cast<float *>(memory->GetPtr());
|
||||
std::fill(ptr, ptr + newPaddedSize, data[0]);
|
||||
} else {
|
||||
if (data.size() == 1) {
|
||||
data.resize(newPaddedSize, data[0]);
|
||||
} else {
|
||||
data.resize(newPaddedSize);
|
||||
}
|
||||
memory->Create(weightsDataDesc, &data[0]);
|
||||
}
|
||||
|
||||
if (internalBlobMemory.size() != numFqIntBlob) {
|
||||
internalBlobMemory.push_back(memory);
|
||||
} else if (needOverwrite) {
|
||||
internalBlobMemory[idx] = memory;
|
||||
}
|
||||
};
|
||||
|
||||
pushInternalBlob(cropLow, 0);
|
||||
pushInternalBlob(cropHigh, 1);
|
||||
pushInternalBlob(inputScale, 2);
|
||||
pushInternalBlob(inputShift, 3);
|
||||
pushInternalBlob(outputScale, 4);
|
||||
pushInternalBlob(outputShift, 5);
|
||||
} else {
|
||||
IE_THROW() << "Can't fill internal blob for FakeQuantize node with name: " << getName();
|
||||
}
|
||||
}
|
||||
if (quantize_kernel)
|
||||
quantize_kernel->create_ker();
|
||||
currentAxisSize = axisSize;
|
||||
|
||||
size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()];
|
||||
size_t axisPaddedSize = rnd_up(axisSize, 16);
|
||||
auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor();
|
||||
if (!selectedPrimitiveDescriptor)
|
||||
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
|
||||
const auto& config = getSelectedPrimitiveDescriptor()->getConfig();
|
||||
|
||||
DnnlBlockedMemoryDesc weightsDataDesc(Shape(InferenceEngine::SizeVector{axisPaddedSize}), memory::data_type::f32, memory::format_tag::x);
|
||||
const auto& inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims();
|
||||
|
||||
if (isBinarization()) {
|
||||
auto binarizationThresholdsDataMem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||
binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr());
|
||||
internalBlobMemory.push_back(binarizationThresholdsDataMem);
|
||||
jit_quantize_params jqp = {};
|
||||
jqp.c = inDims.size() > 1 ? inDims[1] : 1;
|
||||
|
||||
auto binarizationMaskDataMem = std::make_shared<MKLDNNMemory>(getEngine());
|
||||
binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr());
|
||||
internalBlobMemory.push_back(binarizationMaskDataMem);
|
||||
} else if (levels != 2) {
|
||||
auto pushInternalBlob = [&](std::vector<float>& data) {
|
||||
if (data.size() == 1)
|
||||
data.resize(axisPaddedSize, data[0]);
|
||||
else
|
||||
data.resize(axisPaddedSize);
|
||||
auto memory = std::make_shared<MKLDNNMemory>(getEngine());
|
||||
memory->Create(weightsDataDesc, &data[0]);
|
||||
internalBlobMemory.push_back(memory);
|
||||
};
|
||||
jqp.src_prc = config.inConfs[0].desc->getPrecision();
|
||||
jqp.wei_prc = Precision::FP32;
|
||||
jqp.dst_prc = config.outConfs[0].desc->getPrecision();
|
||||
|
||||
pushInternalBlob(cropLow);
|
||||
pushInternalBlob(cropHigh);
|
||||
pushInternalBlob(inputScale);
|
||||
pushInternalBlob(inputShift);
|
||||
pushInternalBlob(outputScale);
|
||||
pushInternalBlob(outputShift);
|
||||
auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
||||
jqp.s_str = srcDesc->getStrides();
|
||||
|
||||
auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType<BlockedMemoryDesc>();
|
||||
jqp.d_str = dstDesc->getStrides();
|
||||
|
||||
jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5);
|
||||
|
||||
jqp.op_type = getAlgorithm();
|
||||
|
||||
execPtr = std::make_shared<FakeQuantizeJitExecutor>(jqp);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1322,8 +1388,8 @@ void MKLDNNFakeQuantizeNode::executeReference() {
|
||||
auto srcDims = srcMemory->getStaticDims();
|
||||
auto dstDims = dstMemory->getStaticDims();
|
||||
|
||||
auto s_str = jqp.s_str;
|
||||
auto d_str = jqp.d_str;
|
||||
auto s_str = srcMemory->GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
auto d_str = dstMemory->GetDescWithType<BlockedMemoryDesc>()->getStrides();
|
||||
|
||||
const int N = srcDims[0];
|
||||
const int C = srcDims.size() > 1 ? srcDims[1] : 1;
|
||||
@ -1331,7 +1397,7 @@ void MKLDNNFakeQuantizeNode::executeReference() {
|
||||
const int H = srcDims.size() == 3 ? srcDims[2] : srcDims.size() > 3 ? srcDims[srcDims.size() - 2] : 1;
|
||||
const int W = srcDims.size() > 3 ? srcDims[srcDims.size() - 1] : 1;
|
||||
|
||||
if (jqp.op_type == FQBinarization) {
|
||||
if (isBinarization()) {
|
||||
size_t tmp = s_str[s_str.size() - 1];
|
||||
for (int i = s_str.size() - 1; i > 1; i--) {
|
||||
s_str[i] = s_str[i - 1];
|
||||
@ -1430,7 +1496,7 @@ void MKLDNNFakeQuantizeNode::executeReference() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::executeBinarization() {
|
||||
void MKLDNNFakeQuantizeNode::executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
|
||||
auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
|
||||
auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
|
||||
|
||||
@ -1442,6 +1508,7 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {
|
||||
|
||||
auto src_dims = srcMemory->getStaticDims();
|
||||
|
||||
const auto &jqp = pKernel->jqp_;
|
||||
std::vector<size_t> s_str = jqp.s_str;
|
||||
size_t tmp = s_str[s_str.size() - 1];
|
||||
for (int i = s_str.size() - 1; i > 1; i--) {
|
||||
@ -1465,11 +1532,11 @@ void MKLDNNFakeQuantizeNode::executeBinarization() {
|
||||
arg.output_mask = &output_mask[0];
|
||||
arg.work_amount = (size_t)C;
|
||||
|
||||
(*quantize_kernel)(&arg);
|
||||
(*pKernel)(&arg);
|
||||
});
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::executeQuantization() {
|
||||
void MKLDNNFakeQuantizeNode::executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const {
|
||||
auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
|
||||
auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
|
||||
|
||||
@ -1490,6 +1557,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
|
||||
int blk_size = (srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5))
|
||||
? 1 : mayiuse(cpu::x64::avx512_common) ? 16 : 8;
|
||||
|
||||
const auto &jqp = pKernel->jqp_;
|
||||
auto src_type_size = jqp.src_prc.size();
|
||||
auto dst_type_size = jqp.dst_prc.size();
|
||||
|
||||
@ -1536,7 +1604,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
|
||||
arg.block_size = (size_t) blk_size;
|
||||
arg.work_amount = (size_t)H;
|
||||
|
||||
(*quantize_kernel)(&arg);
|
||||
(*pKernel)(&arg);
|
||||
});
|
||||
} else {
|
||||
parallel_nd(N, CB, D, H, [&](int n, int cb, int d, int h) {
|
||||
@ -1564,7 +1632,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() {
|
||||
arg.block_size = (is_blk_format && srcDims.size() != 2) ? (size_t) blk_size : nstl::min(blk_size, C - c);
|
||||
arg.work_amount = (size_t) W;
|
||||
|
||||
(*quantize_kernel)(&arg);
|
||||
(*pKernel)(&arg);
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -1575,29 +1643,40 @@ void MKLDNNFakeQuantizeNode::execute(mkldnn::stream strm) {
|
||||
IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors.";
|
||||
|
||||
if (selectedPrimitiveDescriptor->getImplementationType() != impl_desc_type::ref) {
|
||||
if (jqp.op_type == FQBinarization)
|
||||
executeBinarization();
|
||||
else
|
||||
executeQuantization();
|
||||
execPtr->exec(*this);
|
||||
} else {
|
||||
executeReference();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
|
||||
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
|
||||
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
|
||||
const size_t bufferAlignment = 16;
|
||||
|
||||
if (getAlgorithm() == FQBinarization) {
|
||||
const auto realAxisSize = postOpDims[postOpDims.size() > 1 ? 1 : 0];
|
||||
const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment);
|
||||
if (!isPostOpDataInitialized) {
|
||||
size_t paddedSize = rnd_up(binarizationThresholds.size(), bufferAlignment);
|
||||
binarizationThresholds.resize(paddedSize, 0);
|
||||
binarizationOutputMask.resize(paddedSize, 0);
|
||||
binarizationThresholds.resize(axisPaddedSize, 0);
|
||||
binarizationOutputMask.resize(axisPaddedSize, 0);
|
||||
|
||||
if (isInputLowBroadcasted) {
|
||||
std::fill(binarizationThresholds.begin() + 1, binarizationThresholds.begin() + realAxisSize, binarizationThresholds[0]);
|
||||
std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
|
||||
}
|
||||
if (isOutputHighBroadcasted) {
|
||||
std::fill(binarizationOutputMask.begin() + 1, binarizationOutputMask.begin() + realAxisSize, binarizationOutputMask[0]);
|
||||
std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);
|
||||
|
||||
if (!isInputLowBroadcasted && !isOutputHighBroadcasted) {
|
||||
isPostOpDataInitialized = true;
|
||||
}
|
||||
} else {
|
||||
if (!isPostOpDataInitialized) {
|
||||
if (cropLow.size() > 1)
|
||||
@ -1626,10 +1705,10 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBin
|
||||
|
||||
if (initAsBinary) {
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) {
|
||||
auto outShape = outputShapes[0].getStaticDims();
|
||||
auto chIdx = outputShapes[0].getRank() > 1 ? 1 : 0;
|
||||
const auto rank = getOutputShapeAtPort(0).getRank();
|
||||
auto chIdx = rank > 1 ? 1 : 0;
|
||||
|
||||
std::vector<size_t> binaryShape(outShape.size(), 1);
|
||||
std::vector<size_t> binaryShape(rank, 1);
|
||||
binaryShape[chIdx] = dataSize;
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
|
||||
@ -1654,10 +1733,45 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, bool initAsBin
|
||||
} else {
|
||||
ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData);
|
||||
}
|
||||
}
|
||||
|
||||
if (!isPostOpDataInitialized)
|
||||
isPostOpDataInitialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) {
|
||||
bool isBinarization = _jqp.op_type == FQBinarization;
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
if (isBinarization)
|
||||
pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx512_common>(_jqp));
|
||||
else
|
||||
pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx512_common>(_jqp));
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
if (isBinarization)
|
||||
pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::avx2>(_jqp));
|
||||
else
|
||||
pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::avx2>(_jqp));
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
if (isBinarization)
|
||||
pKernel.reset(new jit_uni_binarization_kernel<cpu::x64::sse41>(_jqp));
|
||||
else
|
||||
pKernel.reset(new jit_uni_quantization_kernel<cpu::x64::sse41>(_jqp));
|
||||
} else {
|
||||
IE_THROW() << "Can't create jit fake quantize kernel";
|
||||
}
|
||||
if (pKernel) {
|
||||
pKernel->create_ker();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::exec(const MKLDNNFakeQuantizeNode& node) {
|
||||
if (!pKernel)
|
||||
IE_THROW() << "Can't execute, kernel for fake quantize node is not compiled";
|
||||
|
||||
if (pKernel->jqp_.op_type == FQBinarization) {
|
||||
node.executeBinarization(pKernel);
|
||||
} else {
|
||||
node.executeQuantization(pKernel);
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNFakeQuantizeNode::created() const {
|
||||
|
@ -73,11 +73,15 @@ public:
|
||||
void createPrimitive() override;
|
||||
bool created() const override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
|
||||
size_t getAxis() const { return axis; }
|
||||
|
||||
bool isBinarization() const { return getAlgorithm() == Algorithm::FQBinarization; }
|
||||
|
||||
bool needPrepareParams() const override;
|
||||
void prepareParams() override;
|
||||
|
||||
const float* getBinarizationTresholdsPtr() const { return &binarizationThresholds[0]; }
|
||||
const float* getBinarizationOutputMaskPtr() const { return reinterpret_cast<const float*>(&binarizationOutputMask[0]); }
|
||||
size_t getBinarizationTresholdsSize() const { return binarizationThresholds.size(); }
|
||||
@ -117,7 +121,8 @@ public:
|
||||
InferenceEngine::Precision getInputPrecision() const { return inputPrecision; }
|
||||
InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; }
|
||||
|
||||
void appendPostOps(mkldnn::post_ops& ops, bool initAsBinary = false, bool initBinaryMemory = false) override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = -1, bool initAsBinary = false,
|
||||
bool initBinaryMemory = false) override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
@ -129,11 +134,24 @@ public:
|
||||
MKLDNNMemoryPtr outputShiftMemory;
|
||||
|
||||
private:
|
||||
struct FakeQuantizeExecutor {
|
||||
virtual void exec(const MKLDNNFakeQuantizeNode& node) = 0;
|
||||
virtual ~FakeQuantizeExecutor() = default;
|
||||
};
|
||||
using executorPtr = std::shared_ptr<FakeQuantizeExecutor>;
|
||||
executorPtr execPtr = nullptr;
|
||||
|
||||
struct FakeQuantizeJitExecutor : public FakeQuantizeExecutor {
|
||||
FakeQuantizeJitExecutor(const jit_quantize_params &_jqp);
|
||||
void exec(const MKLDNNFakeQuantizeNode& node) override;
|
||||
std::unique_ptr<jit_uni_quantize_kernel> pKernel;
|
||||
};
|
||||
|
||||
void init() override;
|
||||
std::vector<LayoutType> getDataFormats() const;
|
||||
void executeReference();
|
||||
void executeBinarization();
|
||||
void executeQuantization();
|
||||
void executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
|
||||
void executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
|
||||
|
||||
size_t levels = 0;
|
||||
|
||||
@ -170,15 +188,12 @@ private:
|
||||
bool isOutputLowBroadcasted = false;
|
||||
bool isOutputHighBroadcasted = false;
|
||||
|
||||
size_t currentAxisSize = 0;
|
||||
size_t axis = 0;
|
||||
|
||||
InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32;
|
||||
InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
|
||||
|
||||
jit_quantize_params jqp = {};
|
||||
|
||||
std::shared_ptr<jit_uni_quantize_kernel> quantize_kernel = nullptr;
|
||||
|
||||
std::string errorPrefix;
|
||||
};
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "utils/general_utils.h"
|
||||
#include <memory_desc/cpu_memory_desc_utils.h>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -189,7 +190,8 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
for (auto &node : fusedWith) {
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
fakeQuantizeNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
|
||||
// no need to fill post ops dims for fq, make sense only for bin fq
|
||||
fakeQuantizeNode->appendPostOps(ops, VectorDims{}, -1, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (fakeQuantizeNode->cropHighMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
|
||||
@ -209,7 +211,9 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool ini
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops, initAsBinary, initBinaryMemory);
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = -1;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (eltwiseNode->scalesMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset4.hpp>
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -2394,7 +2395,9 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
constexpr int align = 16;
|
||||
// TODO [DS]: change to shape from memory
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "utils/general_utils.h"
|
||||
#include "memory_desc/cpu_memory_desc_utils.h"
|
||||
#include "mkldnn_extension_utils.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -89,7 +90,8 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
// TODO [DS]: change to shape from memory
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -604,11 +605,6 @@ private:
|
||||
|
||||
bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
|
||||
try {
|
||||
if (isDynamicNgraphNode(op)) {
|
||||
errorMessage = "Doesn't support op with dynamic shapes";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (op->get_output_partial_shape(0).rank().is_dynamic()) {
|
||||
errorMessage = "Unsupported dynamic input rank.";
|
||||
return false;
|
||||
@ -680,7 +676,6 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
}
|
||||
|
||||
const ngraph::Shape& inDataShape = op->input_value(0).get_shape();
|
||||
if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v6::MVN>(op)) {
|
||||
normalizeVariance_ = mvnOp->get_normalize_variance();
|
||||
epsValue_ = mvnOp->get_eps();
|
||||
@ -689,27 +684,25 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr<ngraph::Node>& op, const mkld
|
||||
epsMode_ = OUTSIDE_SQRT;
|
||||
}
|
||||
|
||||
acrossChannels_ = false;
|
||||
const auto& inDataShapeSize = inDataShape.size();
|
||||
initAcrossChannels_ = false;
|
||||
const auto& inDataShapeSize = getInputShapeAtPort(0).getRank();
|
||||
if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1)
|
||||
acrossChannels_ = true;
|
||||
initAcrossChannels_ = true;
|
||||
} else if (auto mvnOp = ngraph::as_type_ptr<ngraph::op::v0::MVN>(op)) {
|
||||
normalizeVariance_ = mvnOp->get_normalize_variance();
|
||||
epsValue_ = mvnOp->get_eps();
|
||||
epsMode_ = INSIDE_SQRT;
|
||||
acrossChannels_ = mvnOp->get_across_channels();
|
||||
initAcrossChannels_ = mvnOp->get_across_channels();
|
||||
}
|
||||
execAcrossChannels_ = initAcrossChannels_;
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::getSupportedDescriptors() {
|
||||
}
|
||||
void MKLDNNMVNNode::getSupportedDescriptors() {}
|
||||
|
||||
void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
setPostOps(attr, true);
|
||||
|
||||
Precision inputPrecision = getOriginalInputPrecisionAtPort(0);
|
||||
Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
|
||||
if (!mayiuse(avx512_core)) {
|
||||
@ -729,7 +722,8 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
|
||||
src_data_size = inputPrecision.size();
|
||||
dst_data_size = outputPrecision.size();
|
||||
|
||||
bool canBeInplace = (src_data_size == dst_data_size) &&
|
||||
// TODO [DS]: inplace
|
||||
bool canBeInplace = !isDynamicNode() && (src_data_size == dst_data_size) &&
|
||||
(getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) &&
|
||||
!getParentEdgeAt(0)->getParent()->isConstant();
|
||||
|
||||
@ -788,7 +782,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
|
||||
pushDesc(LayoutType::ncsp, impl_type);
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::createPrimitive() {
|
||||
void MKLDNNMVNNode::prepareParams() {
|
||||
auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
@ -800,74 +794,87 @@ void MKLDNNMVNNode::createPrimitive() {
|
||||
|
||||
const SizeVector in_dims = srcMemPtr->getStaticDims();
|
||||
transformTo5DCase(in_dims);
|
||||
auto selectedPD = getSelectedPrimitiveDescriptor();
|
||||
auto jcp = jit_mvn_config_params();
|
||||
jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
|
||||
jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
|
||||
jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
|
||||
jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
|
||||
jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
|
||||
jcp.normalize_variance = normalizeVariance_;
|
||||
jcp.across_channels = acrossChannels_;
|
||||
int N = 0;
|
||||
std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = shape5D;
|
||||
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
|
||||
setPostOps(attr, true);
|
||||
|
||||
jcp.normalize_variance = false;
|
||||
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
|
||||
if (normalizeVariance_) {
|
||||
jcp.normalize_variance = true;
|
||||
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
|
||||
if (mayiuse(cpu::x64::sse41)) {
|
||||
auto selectedPD = getSelectedPrimitiveDescriptor();
|
||||
auto jcp = jit_mvn_config_params();
|
||||
jcp.src_prc = selectedPD->getConfig().inConfs[0].desc->getPrecision();
|
||||
jcp.dst_prc = selectedPD->getConfig().outConfs[0].desc->getPrecision();
|
||||
jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.src_prc));
|
||||
jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(jcp.dst_prc));
|
||||
jcp.planar_layout = selectedPD->getConfig().inConfs[0].desc->hasLayoutType(LayoutType::ncsp);
|
||||
jcp.normalize_variance = normalizeVariance_;
|
||||
jcp.across_channels = execAcrossChannels_;
|
||||
int N = 0;
|
||||
std::tie(N, jcp.C, jcp.D, jcp.H, jcp.W) = shape5D;
|
||||
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx512_common>(jcp, *attr.get()));
|
||||
|
||||
jcp.normalize_variance = false;
|
||||
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
|
||||
if (normalizeVariance_) {
|
||||
jcp.normalize_variance = true;
|
||||
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx512_common>(jcp));
|
||||
}
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
|
||||
|
||||
jcp.normalize_variance = false;
|
||||
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
|
||||
if (normalizeVariance_) {
|
||||
jcp.normalize_variance = true;
|
||||
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
|
||||
}
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
|
||||
|
||||
jcp.normalize_variance = false;
|
||||
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
|
||||
if (normalizeVariance_) {
|
||||
jcp.normalize_variance = true;
|
||||
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
|
||||
}
|
||||
}
|
||||
} else if (mayiuse(cpu::x64::avx2)) {
|
||||
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::avx2>(jcp, *attr.get()));
|
||||
|
||||
jcp.normalize_variance = false;
|
||||
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
|
||||
if (normalizeVariance_) {
|
||||
jcp.normalize_variance = true;
|
||||
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::avx2>(jcp));
|
||||
}
|
||||
} else if (mayiuse(cpu::x64::sse41)) {
|
||||
mvn_kernel.reset(new jit_uni_mvn_kernel_f32<cpu::x64::sse41>(jcp, *attr.get()));
|
||||
if (mvn_kernel)
|
||||
mvn_kernel->create_ker();
|
||||
|
||||
jcp.normalize_variance = false;
|
||||
mvn_mean_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
|
||||
if (normalizeVariance_) {
|
||||
jcp.normalize_variance = true;
|
||||
mvn_variance_kernel.reset(new jit_uni_mvn_mean_variance_kernel_f32<cpu::x64::sse41>(jcp));
|
||||
if (mvn_mean_kernel)
|
||||
mvn_mean_kernel->create_ker();
|
||||
|
||||
if (mvn_variance_kernel)
|
||||
mvn_variance_kernel->create_ker();
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::createPrimitive() {
|
||||
if (inputShapesDefined()) {
|
||||
if (needPrepareParams())
|
||||
prepareParams();
|
||||
updateLastInputDims();
|
||||
}
|
||||
|
||||
if (mvn_kernel)
|
||||
mvn_kernel->create_ker();
|
||||
|
||||
if (mvn_mean_kernel)
|
||||
mvn_mean_kernel->create_ker();
|
||||
|
||||
if (mvn_variance_kernel)
|
||||
mvn_variance_kernel->create_ker();
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) {
|
||||
switch (shape.size()) {
|
||||
// for 1 and 2 rank, if acrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure.
|
||||
// for 1 and 2 rank, if initAcrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure.
|
||||
// otherwise there are not enough data in spatial dimension to process in one kernel.
|
||||
case 1 : // C
|
||||
if (acrossChannels_) {
|
||||
if (initAcrossChannels_) {
|
||||
shape5D = std::make_tuple(1, 1, 1, 1, shape[0]);
|
||||
acrossChannels_ = false;
|
||||
execAcrossChannels_ = false;
|
||||
break;
|
||||
} else {
|
||||
shape5D = std::make_tuple(1, shape[0], 1, 1, 1);
|
||||
break;
|
||||
}
|
||||
case 2 : // NC
|
||||
if (acrossChannels_) {
|
||||
if (initAcrossChannels_) {
|
||||
shape5D = std::make_tuple(1, shape[0], 1, shape[1], 1);
|
||||
acrossChannels_ = false;
|
||||
execAcrossChannels_ = false;
|
||||
break;
|
||||
} else {
|
||||
shape5D = std::make_tuple(shape[0], shape[1], 1, 1, 1);
|
||||
@ -882,6 +889,8 @@ void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) {
|
||||
|
||||
void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
|
||||
mkldnn::post_ops ops;
|
||||
VectorDims postOpDims(5);
|
||||
std::tie(postOpDims[0], postOpDims[1], postOpDims[2], postOpDims[3], postOpDims[4]) = shape5D;
|
||||
for (auto &node : fusedWith) {
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
@ -891,7 +900,8 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) {
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, postOpDims, align);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
|
||||
@ -906,22 +916,21 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) {
|
||||
uint8_t *dst_data = reinterpret_cast<uint8_t*>(dstMemPtr->GetPtr());
|
||||
uint8_t *src_data = reinterpret_cast<uint8_t*>(srcMemPtr->GetPtr());
|
||||
|
||||
auto dim = srcMemPtr->getStaticDims();
|
||||
if (mayiuse(cpu::x64::sse41)) {
|
||||
if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) {
|
||||
IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform.";
|
||||
}
|
||||
if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) {
|
||||
mvn_pln(src_data, dst_data, dim);
|
||||
mvn_pln(src_data, dst_data);
|
||||
} else {
|
||||
mvn_blk(src_data, dst_data, dim);
|
||||
mvn_blk(src_data, dst_data);
|
||||
}
|
||||
} else {
|
||||
mvn_ref(src_data, dst_data, dim);
|
||||
mvn_ref(src_data, dst_data);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
|
||||
void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data) {
|
||||
size_t blk_size = 1; // blk size in vmm
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
blk_size = 16;
|
||||
@ -943,7 +952,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
|
||||
for (size_t b = 0lu; b < N; b++) {
|
||||
size_t cb = b * C3;
|
||||
if (acrossChannels_) {
|
||||
if (execAcrossChannels_) {
|
||||
// Calculate mean value for one instance in batch
|
||||
// Parallel sum for each channel
|
||||
float C3inv = 1.f / static_cast<float>(C3);
|
||||
@ -1056,7 +1065,7 @@ void MKLDNNMVNNode::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
|
||||
void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data) {
|
||||
const float *src_data_ptr = reinterpret_cast<const float *>(src_data);
|
||||
float *dst_data_ptr = reinterpret_cast<float *>(dst_data);
|
||||
size_t N = 0; size_t C = 0; size_t D = 0; size_t H = 0; size_t W = 0;
|
||||
@ -1068,7 +1077,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
|
||||
for (size_t b = 0lu; b < N; b++) {
|
||||
size_t cb = b * C3;
|
||||
if (acrossChannels_) {
|
||||
if (execAcrossChannels_) {
|
||||
// Parallel sum for each channel for mean
|
||||
float C3inv = 1.f / static_cast<float>(C3);
|
||||
float mean_temp = 0.0f;
|
||||
@ -1154,7 +1163,7 @@ void MKLDNNMVNNode::mvn_ref(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const SizeVector& dims) {
|
||||
void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data) {
|
||||
size_t blk_size = 1; // channel blk for memory layout
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
blk_size = 16;
|
||||
@ -1176,7 +1185,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
size_t C5 = C * D * H * W;
|
||||
|
||||
size_t threads_num = parallel_get_num_threads();
|
||||
size_t aux_buffer_size = acrossChannels_ ? blk_size : rnd_up(C, blk_size);
|
||||
size_t aux_buffer_size = execAcrossChannels_ ? blk_size : rnd_up(C, blk_size);
|
||||
std::vector<float> mean_buffer(aux_buffer_size * threads_num);
|
||||
std::vector<float> variance_buffer(aux_buffer_size * threads_num);
|
||||
|
||||
@ -1185,7 +1194,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
|
||||
for (size_t b = 0lu; b < N; b++) {
|
||||
size_t b_offset = is_nhwc ? b * C5 : b * C3;
|
||||
if (acrossChannels_) {
|
||||
if (execAcrossChannels_) {
|
||||
// mean for this instance in batch
|
||||
float C5inv = 1.f / static_cast<float>(C5);
|
||||
float mean_temp = 0.0f;
|
||||
@ -1213,7 +1222,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si
|
||||
arg.src_stride = src_stride_size;
|
||||
arg.work_amount = static_cast<size_t>(W);
|
||||
arg.oc_off = static_cast<size_t>(cb * blk_size * sizeof(float)); // for tail process
|
||||
(*mvn_mean_kernel)(&arg); // for W * blk
|
||||
(*mvn_mean_kernel)(&arg); // for W * blk
|
||||
|
||||
size_t min_cb = (std::min)(blk_size, C - cb * blk_size);
|
||||
for (int i = 0; i < min_cb; i++)
|
||||
@ -1401,7 +1410,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
|
||||
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu);
|
||||
if ((inputRank == 1 && !unaryEltwise) ||
|
||||
(inputRank == 2 && !unaryEltwise && acrossChannels_)) {
|
||||
(inputRank == 2 && !unaryEltwise && initAcrossChannels_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -80,12 +80,13 @@ public:
|
||||
void createPrimitive() override;
|
||||
bool created() const override;
|
||||
void execute(mkldnn::stream strm) override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
bool canBeInPlace() const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool getAcrossChannels() const {
|
||||
return acrossChannels_;
|
||||
return initAcrossChannels_;
|
||||
}
|
||||
|
||||
inline bool getNormalizeVariance() const {
|
||||
@ -94,12 +95,14 @@ public:
|
||||
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
|
||||
void prepareParams() override;
|
||||
|
||||
private:
|
||||
void mvn_pln(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
|
||||
void mvn_pln(const uint8_t *src_data, uint8_t *dst_data);
|
||||
|
||||
void mvn_blk(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
|
||||
void mvn_blk(const uint8_t *src_data, uint8_t *dst_data);
|
||||
|
||||
void mvn_ref(const uint8_t *src_data, uint8_t *dst_data, const InferenceEngine::SizeVector &dims);
|
||||
void mvn_ref(const uint8_t *src_data, uint8_t *dst_data);
|
||||
|
||||
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false);
|
||||
|
||||
@ -107,7 +110,8 @@ private:
|
||||
|
||||
std::tuple<size_t, size_t, size_t, size_t, size_t> shape5D;
|
||||
|
||||
bool acrossChannels_ = false;
|
||||
bool initAcrossChannels_ = false;
|
||||
bool execAcrossChannels_ = false;
|
||||
bool normalizeVariance_ = true;
|
||||
float epsValue_ = 1e-9f;
|
||||
// Defines way to add epsilon: inside sqrt or outside.
|
||||
@ -122,8 +126,6 @@ private:
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
|
||||
std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
|
||||
|
||||
std::shared_ptr<jit_uni_mvn_mean_variance_kernel> mvn_mean_kernel;
|
||||
std::shared_ptr<jit_uni_mvn_mean_variance_kernel> mvn_variance_kernel;
|
||||
std::shared_ptr<jit_uni_mvn_kernel> mvn_kernel;
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "utils/cpu_utils.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
using namespace MKLDNNPlugin;
|
||||
@ -811,7 +812,9 @@ void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr &attr, bool initWe
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
eltwiseNode->appendPostOps(ops);
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -102,10 +102,6 @@ void MKLDNNReorderNode::createPrimitive() {
|
||||
|
||||
void MKLDNNReorderNode::prepareParams() {
|
||||
if (!isOptimized) {
|
||||
if (!inputShapesDefined()) {
|
||||
IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
|
||||
}
|
||||
|
||||
auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
|
||||
auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
|
||||
if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
|
||||
|
@ -124,10 +124,6 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() {
|
||||
}
|
||||
|
||||
void MKLDNNSelectNode::prepareParams() {
|
||||
if (!inputShapesDefined()) {
|
||||
IE_THROW() << "Can't prepare params for eltwise node with name: " << getName();
|
||||
}
|
||||
|
||||
const auto &_conditionDims = getParentEdgesAtPort(CONDITION)[0]->getMemory().getStaticDims();
|
||||
const auto &_thenDims = getParentEdgesAtPort(THEN)[0]->getMemory().getStaticDims();
|
||||
const auto &_elseDims = getParentEdgesAtPort(ELSE)[0]->getMemory().getStaticDims();
|
||||
|
@ -32,9 +32,13 @@ inline std::vector<size_t> getNormalizedDimsBySize(const InferenceEngine::SizeVe
|
||||
* shape on which should be broadcastable
|
||||
* @param secondInputDims
|
||||
* shape which should be broadcastable
|
||||
* @param weakComparison
|
||||
* flag which specify how we compare C dims if value is undefined (weak or strong)
|
||||
* @return true if broadcastable, false otherwise.
|
||||
*/
|
||||
inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims) {
|
||||
inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims,
|
||||
bool weakComparison = false) {
|
||||
bool (*dimsEqual)(size_t, size_t) = weakComparison ? static_cast<bool (*)(size_t, size_t)>(dimsEqualWeak) : dimsEqualStrong;
|
||||
if (secondInputDims.size() > firstInputDims.size())
|
||||
return false;
|
||||
if (std::accumulate(secondInputDims.begin(), secondInputDims.end(), 1, std::multiplies<size_t>()) == 1)
|
||||
@ -42,7 +46,7 @@ inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVect
|
||||
|
||||
std::vector<size_t> normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size());
|
||||
for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) {
|
||||
if ((i == 1 && normalizedSecondInputDims[i] != firstInputDims[1]) || (i != 1 && normalizedSecondInputDims[i] != 1))
|
||||
if ((i == 1 && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[1])) || (i != 1 && normalizedSecondInputDims[i] != 1))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -90,4 +94,34 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine:
|
||||
return precision;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return aligned buffer by targetSize.
|
||||
* If buffer has size 1, values are broadcasted with targetSize size.
|
||||
* If aligned buffer size > targetSize, other values filled by zero.
|
||||
* @param targetSize
|
||||
* target size buffer
|
||||
* @param buffer
|
||||
* buffer to be aligned
|
||||
* @param align
|
||||
* alignment for targetSize
|
||||
* @return aligned buffer
|
||||
*/
|
||||
inline std::vector<float> makeAlignedBuffer(size_t targetSize, const std::vector<float> &buffer, int align = -1) {
|
||||
if (buffer.empty()) {
|
||||
IE_THROW() << "Can't align buffer, becuase buffer is empty";
|
||||
}
|
||||
|
||||
auto alignedBuffer = buffer;
|
||||
if (align == -1) {
|
||||
align = targetSize;
|
||||
}
|
||||
const size_t bufferSizeAligned = rnd_up(targetSize, align);
|
||||
|
||||
alignedBuffer.resize(bufferSizeAligned, 0);
|
||||
if (buffer.size() == 1) {
|
||||
std::fill(alignedBuffer.begin() + 1, alignedBuffer.begin() + targetSize, buffer[0]);
|
||||
}
|
||||
return alignedBuffer;
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -143,6 +143,15 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*CanSetInBlobWithDifferentPrecision/netPRC=BIN.*)",
|
||||
R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=(I4|U4).*)",
|
||||
R"(.*CanSetOutBlobWithDifferentPrecision/netPRC=BIN.*)",
|
||||
|
||||
// Issue: 69086
|
||||
// need to add support convert BIN -> FP32
|
||||
// if we set output precision as BIN, when we create output blob precision looks like UNSPECIFIED
|
||||
R"(.*smoke_FakeQuantizeLayerCPUTest.*bin.*)",
|
||||
// Issue: 69088
|
||||
// bad accuracy
|
||||
R"(.*smoke_FakeQuantizeLayerCPUTest_Decompos.
|
||||
*IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)",
|
||||
};
|
||||
|
||||
#define FIX_62820 0
|
||||
|
@ -2,33 +2,106 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <shared_test_classes/single_layer/conversion.hpp>
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
using namespace InferenceEngine;
|
||||
using namespace ngraph;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
class ConvertCPULayerTest : public ConversionLayerTest {};
|
||||
using convertLayerShapeDefinition = std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>;
|
||||
|
||||
using convertLayerTestParamsSet = std::tuple<convertLayerShapeDefinition, // input shapes
|
||||
InferenceEngine::Precision, // input precision
|
||||
InferenceEngine::Precision, // output precision
|
||||
CPUSpecificParams>;
|
||||
|
||||
class ConvertCPULayerTest : public testing::WithParamInterface<convertLayerTestParamsSet>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<convertLayerTestParamsSet> obj) {
|
||||
convertLayerShapeDefinition shapes;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(shapes, inPrc, outPrc, cpuParams) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
if (!shapes.first.empty()) {
|
||||
result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
|
||||
}
|
||||
result << "TS=";
|
||||
for (const auto& shape : shapes.second) {
|
||||
result << CommonTestUtils::vec2str(shape) << "_";
|
||||
}
|
||||
result << "inputPRC=" << inPrc.name() << "_";
|
||||
result << "targetPRC=" << outPrc.name() << "_";
|
||||
result << CPUTestsBase::getTestCaseName(cpuParams);
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
convertLayerShapeDefinition shapes;
|
||||
InferenceEngine::Precision inPrc, outPrc;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(shapes, inPrc, outPrc, cpuParams) = GetParam();
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
|
||||
selectedType = std::string("unknown_") + (inPrc == InferenceEngine::Precision::U8 ? "I8" : inPrc.name());
|
||||
|
||||
for (size_t i = 0; i < shapes.second.size(); i++) {
|
||||
targetStaticShapes.push_back(std::vector<ngraph::Shape>{shapes.second[i]});
|
||||
}
|
||||
inputDynamicShapes = shapes.first;
|
||||
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);
|
||||
auto targetPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {targetStaticShapes[0][0]});
|
||||
auto conversion = ngraph::builder::makeConversion(params.front(), targetPrc, helpers::ConversionTypes::CONVERT);
|
||||
|
||||
function = makeNgraphFunction(ngPrc, params, conversion, "ConversionCPU");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvertCPULayerTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
ConversionParamsTuple params = GetParam();
|
||||
inPrc = std::get<2>(params);
|
||||
outPrc = std::get<3>(params);
|
||||
|
||||
Run();
|
||||
|
||||
CheckPluginRelatedResults(executableNetwork, "Convert");
|
||||
}
|
||||
|
||||
namespace {
|
||||
const std::vector<ngraph::helpers::ConversionTypes> conversionOpTypes = {
|
||||
ngraph::helpers::ConversionTypes::CONVERT,
|
||||
ngraph::helpers::ConversionTypes::CONVERT_LIKE,
|
||||
std::vector<convertLayerShapeDefinition> inShapes_4D = {
|
||||
{{}, {{1, 2, 3, 4}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1, -1, -1, -1}},
|
||||
// target
|
||||
{
|
||||
{2, 4, 4, 1},
|
||||
{2, 17, 5, 4},
|
||||
{1, 2, 3, 4}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 5}, {2, 22}, {2, 9}, {1, 4}}},
|
||||
// target
|
||||
{
|
||||
{2, 17, 5, 4},
|
||||
{5, 2, 3, 2},
|
||||
{1, 10, 4, 1},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inShape = {{1, 2, 3, 4}};
|
||||
|
||||
// List of precisions natively supported by mkldnn.
|
||||
const std::vector<Precision> precisions = {
|
||||
Precision::U8,
|
||||
@ -38,26 +111,19 @@ const std::vector<Precision> precisions = {
|
||||
Precision::BF16
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest_From_BF16, ConvertCPULayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(conversionOpTypes),
|
||||
::testing::Values(inShape),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::ValuesIn(precisions),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConversionLayerTest::getTestCaseName);
|
||||
std::vector<CPUSpecificParams> memForm4D = {
|
||||
CPUSpecificParams({nchw}, {nchw}, {}, {}),
|
||||
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
|
||||
CPUSpecificParams({nChw8c}, {nChw8c}, {}, {}),
|
||||
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest_To_BF16, ConvertCPULayerTest,
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_ConvertCPULayerTest, ConvertCPULayerTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(conversionOpTypes),
|
||||
::testing::Values(inShape),
|
||||
::testing::ValuesIn(inShapes_4D),
|
||||
::testing::ValuesIn(precisions),
|
||||
::testing::Values(Precision::BF16),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(Layout::ANY),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
ConversionLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
} // namespace CPULayerTestsDefinitions
|
||||
::testing::ValuesIn(precisions),
|
||||
::testing::ValuesIn(memForm4D)),
|
||||
ConvertCPULayerTest::getTestCaseName);
|
||||
|
||||
} // namespace CPULayerTestsDefinitions
|
@ -12,15 +12,18 @@ using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
using inputShapes = std::tuple<std::vector<ngraph::PartialShape>, // dynamic input shapes
|
||||
std::vector<ngraph::Shape>, // target input shapes
|
||||
std::vector<SizeVector>>; // range input shapes
|
||||
|
||||
using fqSpecificParams = std::tuple<int64_t, // 'data' input low bounds
|
||||
int64_t, // 'data' input high bounds
|
||||
std::vector<float>, // output low
|
||||
std::vector<float>, // output high
|
||||
std::vector<SizeVector>, // 'range' inputs shapes
|
||||
size_t>; // levels
|
||||
|
||||
using fqLayerTestParamsSet = std::tuple<fqSpecificParams,
|
||||
SizeVector, // 'data' input shape
|
||||
inputShapes, // input shapes
|
||||
Precision, // input precision
|
||||
std::pair<std::vector<float>, std::vector<float>>, // il and ih values
|
||||
bool, // should be decomposed
|
||||
@ -31,30 +34,39 @@ class FakeQuantizeLayerCPUTest : public testing::WithParamInterface<fqLayerTestP
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
|
||||
fqSpecificParams fqParams;
|
||||
SizeVector inDataShape;
|
||||
inputShapes testShapes;
|
||||
Precision inPrec;
|
||||
std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
|
||||
bool shouldBeDecomposed;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
|
||||
std::tie(fqParams, testShapes, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = obj.param;
|
||||
|
||||
std::vector<ngraph::PartialShape> dynamicShapes;
|
||||
std::vector<ngraph::Shape> targetShapes;
|
||||
std::vector<SizeVector> ranges;
|
||||
std::tie(dynamicShapes, targetShapes, ranges) = testShapes;
|
||||
|
||||
int64_t inDataLowBounds, inDataHighBounds;
|
||||
std::vector<float> inputLow, inputHigh, outputLow, outputHigh;
|
||||
std::vector<SizeVector> inRangesShapes;
|
||||
size_t levels;
|
||||
inputLow = inputRangesValues.first;
|
||||
inputHigh = inputRangesValues.second;
|
||||
std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, inRangesShapes, levels) = fqParams;
|
||||
std::tie(inDataLowBounds, inDataHighBounds, outputLow, outputHigh, levels) = fqParams;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inDataShape) << "_";
|
||||
if (!dynamicShapes.empty()) {
|
||||
result << "IS=" << CommonTestUtils::partialShape2str(dynamicShapes) << "_";
|
||||
}
|
||||
result << "TS=";
|
||||
for (const auto& shape : targetShapes) {
|
||||
result << "(" << CommonTestUtils::vec2str(shape) << ")_";
|
||||
}
|
||||
result << "RS=";
|
||||
for (const auto& data : ranges) {
|
||||
result << "(" << CommonTestUtils::vec2str(data) << ")_";
|
||||
}
|
||||
result << "inPrec=" << inPrec.name() << "_";
|
||||
|
||||
std::string rs = "";
|
||||
for (size_t i = 0; i < inRangesShapes.size(); i++) {
|
||||
rs += CommonTestUtils::vec2str(inRangesShapes[i]) + "_";
|
||||
}
|
||||
result << "RS=" << rs;
|
||||
result << "LOW_BOUNDS=" << inDataLowBounds << "_";
|
||||
result << "HIGH_BOUNDS=" << inDataHighBounds << "_";
|
||||
result << "IL=" << CommonTestUtils::vec2str(inputLow) << "_";
|
||||
@ -75,7 +87,9 @@ public:
|
||||
const InputsDataMap &inDataMap = cnnNetwork.getInputsInfo();
|
||||
auto input = inDataMap.begin();
|
||||
|
||||
Blob::Ptr blob = FuncTestUtils::createAndFillBlob(input->second->getTensorDesc(), inDataHighBounds - inDataLowBounds, inDataLowBounds);
|
||||
const auto td = input->second->getTensorDesc();
|
||||
Blob::Ptr blob = FuncTestUtils::createAndFillBlob(InferenceEngine::TensorDesc(td.getPrecision(), targetStaticShapes[index][0], td.getLayout()),
|
||||
inDataHighBounds - inDataLowBounds, inDataLowBounds);
|
||||
inferRequest.SetBlob(input->second->name(), blob);
|
||||
inputs.push_back(blob);
|
||||
|
||||
@ -88,30 +102,37 @@ protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
fqSpecificParams fqParams;
|
||||
SizeVector inDataShape;
|
||||
inputShapes testShapes;
|
||||
Precision inPrec;
|
||||
std::pair<std::vector<float>, std::vector<float>> inputRangesValues;
|
||||
bool shouldBeDecomposed;
|
||||
CPUSpecificParams cpuParams;
|
||||
std::tie(fqParams, inDataShape, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();
|
||||
std::tie(fqParams, testShapes, inPrec, inputRangesValues, shouldBeDecomposed, cpuParams) = this->GetParam();
|
||||
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
|
||||
std::vector<SizeVector> inRangesShapes;
|
||||
std::vector<ngraph::Shape> targetShapes;
|
||||
std::vector<SizeVector> ranges;
|
||||
std::tie(inputDynamicShapes, targetShapes, ranges) = testShapes;
|
||||
|
||||
for (size_t i = 0; i < targetShapes.size(); i++) {
|
||||
targetStaticShapes.push_back(std::vector<ov::Shape>{targetShapes});
|
||||
}
|
||||
|
||||
size_t levels;
|
||||
std::vector<std::vector<float>> rangesBounds(RANGES_INPUT_NUMBER);
|
||||
rangesBounds[0] = inputRangesValues.first;
|
||||
rangesBounds[1] = inputRangesValues.second;
|
||||
std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], inRangesShapes, levels) = fqParams;
|
||||
std::tie(inDataLowBounds, inDataHighBounds, rangesBounds[2], rangesBounds[3], levels) = fqParams;
|
||||
|
||||
auto ngInPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec);
|
||||
ParameterVector params = builder::makeParams(ngInPrec, {inDataShape});
|
||||
ParameterVector params = builder::makeParams(ngInPrec, {targetStaticShapes[0][0]});
|
||||
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<opset5::Parameter>(params));
|
||||
|
||||
auto il = builder::makeConstant(ngInPrec, inRangesShapes[0], rangesBounds[0], rangesBounds[0].empty());
|
||||
auto ih = builder::makeConstant(ngInPrec, inRangesShapes[1], rangesBounds[1], rangesBounds[1].empty());
|
||||
auto ol = builder::makeConstant(ngInPrec, inRangesShapes[2], rangesBounds[2], rangesBounds[2].empty());
|
||||
auto oh = builder::makeConstant(ngInPrec, inRangesShapes[3], rangesBounds[3], rangesBounds[3].empty());
|
||||
auto il = builder::makeConstant(ngInPrec, ranges[0], rangesBounds[0], rangesBounds[0].empty());
|
||||
auto ih = builder::makeConstant(ngInPrec, ranges[1], rangesBounds[1], rangesBounds[1].empty());
|
||||
auto ol = builder::makeConstant(ngInPrec, ranges[2], rangesBounds[2], rangesBounds[2].empty());
|
||||
auto oh = builder::makeConstant(ngInPrec, ranges[3], rangesBounds[3], rangesBounds[3].empty());
|
||||
auto fq = std::make_shared<opset5::FakeQuantize>(paramOuts[0], il, ih, ol, oh, levels);
|
||||
|
||||
layerName = shouldBeDecomposed ? "" : "FakeQuantize";
|
||||
@ -120,9 +141,7 @@ protected:
|
||||
selectedType = getPrimitiveType() + "_" + inPrec.name();
|
||||
}
|
||||
|
||||
fq->get_rt_info() = getCPUInfo();
|
||||
|
||||
function = std::make_shared<Function>(fq, params, "FakeQuantizeCPU");
|
||||
function = makeNgraphFunction(ngInPrec, params, fq, "FakeQuantizeCPU");
|
||||
}
|
||||
|
||||
private:
|
||||
@ -132,6 +151,7 @@ private:
|
||||
};
|
||||
|
||||
TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Run();
|
||||
|
||||
CheckPluginRelatedResults(executableNetwork, layerName);
|
||||
@ -149,6 +169,12 @@ const std::vector<std::pair<std::vector<float>, std::vector<float>>> input_range
|
||||
|
||||
const std::vector<float> outputLow{5.0f}, outputHigh{25.0f};
|
||||
|
||||
const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(outputLow),
|
||||
::testing::Values(outputHigh),
|
||||
::testing::ValuesIn(levels));
|
||||
|
||||
namespace fqImpl {
|
||||
|
||||
std::vector<CPUSpecificParams> memForm4D_jit = {
|
||||
@ -157,19 +183,31 @@ std::vector<CPUSpecificParams> memForm4D_jit = {
|
||||
CPUSpecificParams({nChw16c}, {nChw16c}, {}, {})
|
||||
};
|
||||
|
||||
const std::vector<std::vector<SizeVector>> rangesShapes4D_jit = {
|
||||
{{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}},
|
||||
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
std::vector<inputShapes> rangesShapes4D_jit = {
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}, {1, 5, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1}},
|
||||
{{4, 5, 6, 7}, {1, 12, 1, 1}, {4, 1, 8, 2}, {1, 16, 6, 1}},
|
||||
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1}},
|
||||
{{4, 16, 6, 7}, {1, 16, 1, 1}, {7, 16, 1, 2}, {1, 16, 6, 1}},
|
||||
{{1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}, {1, 16, 1, 1}}
|
||||
},
|
||||
};
|
||||
|
||||
const auto specificParams4D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(outputLow),
|
||||
::testing::Values(outputHigh),
|
||||
::testing::ValuesIn(rangesShapes4D_jit),
|
||||
::testing::ValuesIn(levels));
|
||||
const auto testParams4D_jit = ::testing::Combine(specificParams4D_jit,
|
||||
::testing::Values(SizeVector{4, 5, 6, 7}),
|
||||
const auto testParams4D_jit = ::testing::Combine(specificParams,
|
||||
::testing::ValuesIn(rangesShapes4D_jit),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(input_ranges),
|
||||
::testing::Values(false),
|
||||
@ -181,18 +219,21 @@ std::vector<CPUSpecificParams> memForm4D_ref = {
|
||||
CPUSpecificParams({nchw}, {nchw}, {"ref_FP32"}, {"ref_FP32"})
|
||||
};
|
||||
|
||||
const std::vector<std::vector<SizeVector>> rangesShapes4D_ref = {
|
||||
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
|
||||
std::vector<inputShapes> rangesShapes4D_ref = {
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1}},
|
||||
{{4, 16, 6, 7}, {4, 1, 1, 1}, {4, 16, 1, 2}, {4, 16, 6, 1}},
|
||||
{{4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}, {4, 1, 1, 1}}
|
||||
},
|
||||
};
|
||||
|
||||
const auto specificParams4D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(outputLow),
|
||||
::testing::Values(outputHigh),
|
||||
::testing::ValuesIn(rangesShapes4D_ref),
|
||||
::testing::ValuesIn(levels));
|
||||
const auto testParams4D_ref = ::testing::Combine(specificParams4D_ref,
|
||||
::testing::Values(SizeVector{4, 5, 6, 7}),
|
||||
const auto testParams4D_ref = ::testing::Combine(specificParams,
|
||||
::testing::ValuesIn(rangesShapes4D_ref),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(input_ranges),
|
||||
::testing::Values(false),
|
||||
@ -206,19 +247,31 @@ std::vector<CPUSpecificParams> memForm5D_jit = {
|
||||
CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {})
|
||||
};
|
||||
|
||||
const std::vector<std::vector<SizeVector>> rangesShapes5D_jit = {
|
||||
{{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}},
|
||||
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
|
||||
std::vector<inputShapes> rangesShapes5D_jit = {
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}, {1, 4, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1, -1}},
|
||||
{{3, 4, 5, 6, 7}, {1, 12, 1, 1, 1}, {4, 1, 8, 2, 7}, {1, 16, 6, 5, 1}},
|
||||
{{1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {1, 1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1, -1}},
|
||||
{{4, 16, 6, 7, 8}, {1, 16, 1, 1, 1}, {7, 16, 1, 2, 5}, {1, 16, 6, 1, 7}},
|
||||
{{1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 16, 1, 1, 1}}
|
||||
},
|
||||
};
|
||||
|
||||
const auto specificParams5D_jit = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(outputLow),
|
||||
::testing::Values(outputHigh),
|
||||
::testing::ValuesIn(rangesShapes5D_jit),
|
||||
::testing::ValuesIn(levels));
|
||||
const auto testParams5D_jit = ::testing::Combine(specificParams5D_jit,
|
||||
::testing::Values(SizeVector{3, 4, 5, 6, 7}),
|
||||
const auto testParams5D_jit = ::testing::Combine(specificParams,
|
||||
::testing::ValuesIn(rangesShapes5D_jit),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(input_ranges),
|
||||
::testing::Values(false),
|
||||
@ -231,18 +284,21 @@ std::vector<CPUSpecificParams> memForm5D_ref = {
|
||||
CPUSpecificParams({ncdhw}, {ncdhw}, {"ref_FP32"}, {"ref_FP32"})
|
||||
};
|
||||
|
||||
const std::vector<std::vector<SizeVector>> rangesShapes5D_ref = {
|
||||
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
|
||||
std::vector<inputShapes> rangesShapes5D_ref = {
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1, -1}},
|
||||
{{3, 16, 6, 7, 8}, {3, 16, 1, 1, 1}, {3, 16, 1, 2, 5}, {3, 16, 6, 1, 7}},
|
||||
{{3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}, {3, 1, 1, 1, 1}}
|
||||
},
|
||||
};
|
||||
|
||||
const auto specificParams5D_ref = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(outputLow),
|
||||
::testing::Values(outputHigh),
|
||||
::testing::ValuesIn(rangesShapes5D_ref),
|
||||
::testing::ValuesIn(levels));
|
||||
const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,
|
||||
::testing::Values(SizeVector{3, 4, 5, 6, 7}),
|
||||
const auto testParams5D_ref = ::testing::Combine(specificParams,
|
||||
::testing::ValuesIn(rangesShapes5D_ref),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(input_ranges),
|
||||
::testing::Values(false),
|
||||
@ -250,32 +306,115 @@ const auto testParams5D_ref = ::testing::Combine(specificParams5D_ref,
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_5D_ref, FakeQuantizeLayerCPUTest, testParams5D_ref, FakeQuantizeLayerCPUTest::getTestCaseName);
|
||||
|
||||
const auto specificParamsBin = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(std::vector<float>{0.0f}),
|
||||
::testing::Values(std::vector<float>{1.0f}),
|
||||
::testing::Values(2));
|
||||
|
||||
const auto testParamsBin4D = ::testing::Combine(specificParamsBin,
|
||||
::testing::ValuesIn(rangesShapes4D_jit),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::Values(std::pair<std::vector<float>, std::vector<float>>{{3.0f}, {3.f}}),
|
||||
::testing::Values(false),
|
||||
::testing::Values(CPUSpecificParams()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FakeQuantizeLayerCPUTest_4D_bin, FakeQuantizeLayerCPUTest, testParamsBin4D, FakeQuantizeLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace fqImpl
|
||||
|
||||
const std::vector<SizeVector> dataShapes = {
|
||||
{4, 5, 6, 7},
|
||||
{3, 4, 5, 6, 7},
|
||||
{2, 3, 4, 5, 6, 7},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<SizeVector>> rangesShapes = {
|
||||
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}},
|
||||
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}},
|
||||
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
|
||||
};
|
||||
|
||||
namespace fqDecompos {
|
||||
|
||||
const auto specificParams = ::testing::Combine(::testing::Values(dataLowBounds),
|
||||
::testing::Values(dataHighBounds),
|
||||
::testing::Values(outputLow),
|
||||
::testing::Values(outputHigh),
|
||||
::testing::ValuesIn(rangesShapes),
|
||||
::testing::ValuesIn(levels));
|
||||
std::vector<inputShapes> decomposeShapes = {
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{4, 5, 6, 7}},
|
||||
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{3, 4, 5, 6, 7}},
|
||||
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{2, 3, 4, 5, 6, 7}},
|
||||
{{4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}, {4, 5, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{2, 3, 4, 5, 6, 7}},
|
||||
{{1, 5, 1, 1}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{2, 3, 4, 5, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 6, 7}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{2, 3, 4, 5, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{},
|
||||
{{2, 3, 4, 5, 6, 7}},
|
||||
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1}},
|
||||
{{4, 5, 6, 7}, {1, 5, 6, 7}, {7, 5, 6, 7}},
|
||||
{{1, 1, 6, 1}, {1, 5, 6, 7}, {1, 1, 6, 1}, {1, 1, 6, 1}}
|
||||
},
|
||||
inputShapes{
|
||||
{{-1, -1, -1, -1, -1}},
|
||||
{{8, 4, 5, 6, 7}, {1, 1, 5, 6, 7}, {1, 1, 1, 6, 7}},
|
||||
{{1, 1, 6, 7}, {1, 1, 6, 7}, {1, 1, 1, 1}, {1, 1, 1, 1}}
|
||||
},
|
||||
};
|
||||
|
||||
const auto testParams = ::testing::Combine(specificParams,
|
||||
::testing::ValuesIn(dataShapes),
|
||||
::testing::ValuesIn(decomposeShapes),
|
||||
::testing::Values(Precision::FP32),
|
||||
::testing::ValuesIn(input_ranges),
|
||||
::testing::Values(true),
|
||||
|
@ -12,8 +12,16 @@ using namespace CPUTestUtils;
|
||||
|
||||
namespace CPULayerTestsDefinitions {
|
||||
|
||||
using basicCpuMvnParams = std::tuple<
|
||||
std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>, // Input shapes
|
||||
InferenceEngine::Precision, // Input precision
|
||||
ngraph::AxisSet, // Reduction axes
|
||||
bool, // Across channels
|
||||
bool, // Normalize variance
|
||||
double>; // Epsilon
|
||||
|
||||
typedef std::tuple<
|
||||
LayerTestsDefinitions::mvn1Params,
|
||||
basicCpuMvnParams,
|
||||
CPUSpecificParams,
|
||||
fusingSpecificParams,
|
||||
Precision, // CNNNetwork input precision
|
||||
@ -24,16 +32,35 @@ class MvnLayerCPUTest : public testing::WithParamInterface<MvnLayerCPUTestParamS
|
||||
virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<MvnLayerCPUTestParamSet> obj) {
|
||||
LayerTestsDefinitions::mvn1Params basicParamsSet;
|
||||
basicCpuMvnParams basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
fusingSpecificParams fusingParams;
|
||||
Precision inputPrecision, outputPrecision;
|
||||
std::tie(basicParamsSet, cpuParams, fusingParams, inputPrecision, outputPrecision) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << LayerTestsDefinitions::Mvn1LayerTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::mvn1Params>(
|
||||
basicParamsSet, 0));
|
||||
std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>> inputShapes;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::AxisSet axes;
|
||||
bool acrossChanels, normalizeVariance;
|
||||
double eps;
|
||||
std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet;
|
||||
|
||||
std::ostringstream result;
|
||||
if (!inputShapes.first.empty()) {
|
||||
result << "IS=" << CommonTestUtils::partialShape2str(inputShapes.first) << "_";
|
||||
}
|
||||
result << "TS=";
|
||||
for (const auto& shape : inputShapes.second) {
|
||||
result << "(" << CommonTestUtils::vec2str(shape) << ")_";
|
||||
}
|
||||
result << "Precision=" << netPrecision.name() << "_";
|
||||
if (!axes.empty()) {
|
||||
result << "ReductionAccess=" << CommonTestUtils::vec2str(axes.to_vector()) << "_";
|
||||
} else {
|
||||
result << "AcrossChannels=" << (acrossChanels ? "TRUE" : "FALSE") << "_";
|
||||
}
|
||||
result << "NormalizeVariance=" << (normalizeVariance ? "TRUE" : "FALSE") << "_";
|
||||
result << "Epsilon=" << eps;
|
||||
result << "_" << "CNNInpPrc=" << inputPrecision.name();
|
||||
result << "_" << "CNNOutPrc=" << outputPrecision.name();
|
||||
|
||||
@ -45,7 +72,9 @@ public:
|
||||
}
|
||||
protected:
|
||||
void SetUp() override {
|
||||
LayerTestsDefinitions::mvn1Params basicParamsSet;
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
|
||||
basicCpuMvnParams basicParamsSet;
|
||||
CPUSpecificParams cpuParams;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::tie(basicParamsSet, cpuParams, fusingParams, inPrc, outPrc) = this->GetParam();
|
||||
@ -53,14 +82,20 @@ protected:
|
||||
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>> inputShapes;
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::AxisSet axes;
|
||||
bool acrossChanels, normalizeVariance;
|
||||
double eps;
|
||||
std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps, targetDevice) = basicParamsSet;
|
||||
std::tie(inputShapes, netPrecision, axes, acrossChanels, normalizeVariance, eps) = basicParamsSet;
|
||||
|
||||
for (size_t i = 0; i < inputShapes.second.size(); i++) {
|
||||
targetStaticShapes.push_back({inputShapes.second[i]});
|
||||
}
|
||||
inputDynamicShapes = inputShapes.first;
|
||||
|
||||
auto netPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto param = ngraph::builder::makeParams(netPrc, {inputShapes});
|
||||
auto param = ngraph::builder::makeParams(netPrc, {targetStaticShapes[0].front()});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(param));
|
||||
auto mvn = ngraph::builder::makeMVN(paramOuts[0], acrossChanels, normalizeVariance, eps);
|
||||
if (!axes.empty()) {
|
||||
@ -82,40 +117,141 @@ TEST_P(MvnLayerCPUTest, CompareWithRefs) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
const std::vector<std::vector<size_t>> inputShapes_1D = {
|
||||
{5},
|
||||
{16},
|
||||
|
||||
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_1D = {
|
||||
{ {}, {{5}}},
|
||||
{ {}, {{16}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1}},
|
||||
// target
|
||||
{
|
||||
{2},
|
||||
{16},
|
||||
{1}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 20}}},
|
||||
// target
|
||||
{
|
||||
{1},
|
||||
{16},
|
||||
{4}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShapes_2D = {
|
||||
{1, 32},
|
||||
{16, 64},
|
||||
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_2D = {
|
||||
{ {}, {{1, 32}}},
|
||||
{ {}, {{16, 64}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1, -1}},
|
||||
// target
|
||||
{
|
||||
{2, 16},
|
||||
{4, 16},
|
||||
{1, 16}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 5}, {1, 20}}},
|
||||
// target
|
||||
{
|
||||
{1, 1},
|
||||
{2, 16},
|
||||
{4, 16}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShapes_3D = {
|
||||
{1, 32, 17},
|
||||
{1, 37, 9},
|
||||
{1, 16, 4},
|
||||
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_3D = {
|
||||
{ {}, {{1, 32, 17}}},
|
||||
{ {}, {{1, 37, 9}}},
|
||||
{ {}, {{1, 16, 4}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1, -1, -1}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 6},
|
||||
{4, 16, 2},
|
||||
{1, 16, 4}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 5}, {1, 20}, {1, 7}}},
|
||||
// target
|
||||
{
|
||||
{1, 1, 1},
|
||||
{2, 16, 6},
|
||||
{4, 16, 2}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShapes_4D = {
|
||||
{1, 16, 5, 8},
|
||||
{2, 19, 5, 10},
|
||||
{7, 32, 2, 8},
|
||||
{5, 8, 3, 5},
|
||||
{1, 2, 7, 5},
|
||||
{1, 4, 5, 5},
|
||||
{1, 7, 3, 5},
|
||||
{1, 15, 9, 5},
|
||||
{4, 41, 6, 9}
|
||||
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_4D = {
|
||||
{ {}, {{1, 16, 5, 8}}},
|
||||
{ {}, {{2, 19, 5, 10}}},
|
||||
{ {}, {{7, 32, 2, 8}}},
|
||||
{ {}, {{5, 8, 3, 5}}},
|
||||
{ {}, {{1, 2, 7, 5}}},
|
||||
{ {}, {{1, 4, 5, 5}}},
|
||||
{ {}, {{1, 7, 3, 5}}},
|
||||
{ {}, {{1, 15, 9, 5}}},
|
||||
{ {}, {{4, 41, 6, 9}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1, -1, -1, -1}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 10, 6},
|
||||
{4, 16, 2, 2},
|
||||
{1, 16, 8, 4}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 5}, {1, 20}, {1, 10}, {1, 7}}},
|
||||
// target
|
||||
{
|
||||
{1, 1, 1, 1},
|
||||
{2, 16, 10, 6},
|
||||
{4, 16, 2, 2}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShapes_5D = {
|
||||
{1, 32, 8, 1, 6},
|
||||
{1, 9, 1, 15, 9},
|
||||
{6, 64, 6, 1, 18},
|
||||
{2, 31, 2, 9, 1},
|
||||
{10, 16, 5, 10, 6}
|
||||
const std::vector<std::pair<std::vector<ngraph::PartialShape>, std::vector<ngraph::Shape>>> inputShapes_5D = {
|
||||
{ {}, {{1, 32, 8, 1, 6}}},
|
||||
{ {}, {{1, 9, 1, 15, 9}}},
|
||||
{ {}, {{6, 64, 6, 1, 18}}},
|
||||
{ {}, {{2, 31, 2, 9, 1}}},
|
||||
{ {}, {{10, 16, 5, 10, 6}}},
|
||||
{
|
||||
// dynamic
|
||||
{{-1, -1, -1, -1, -1}},
|
||||
// target
|
||||
{
|
||||
{2, 16, 5, 10, 6},
|
||||
{4, 16, 7, 2, 2},
|
||||
{1, 16, 11, 8, 4}
|
||||
}
|
||||
},
|
||||
{
|
||||
// dynamic
|
||||
{{{1, 5}, {1, 20}, {1, 7}, {1, 10}, {1, 7}}},
|
||||
// target
|
||||
{
|
||||
{1, 1, 1, 1, 1},
|
||||
{2, 16, 5, 10, 6},
|
||||
{4, 16, 7, 2, 2}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<bool> acrossChannels = {
|
||||
@ -162,6 +298,7 @@ std::vector<fusingSpecificParams> fusingParamsSet {
|
||||
fusingFakeQuantizePerTensorRelu,
|
||||
/* another patterns */
|
||||
fusingScaleShift,
|
||||
fusingAddPerTensor
|
||||
};
|
||||
|
||||
const auto Mvn3D = ::testing::Combine(
|
||||
@ -171,8 +308,7 @@ const auto Mvn3D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyReductionAxes),
|
||||
::testing::ValuesIn(acrossChannels),
|
||||
::testing::ValuesIn(normalizeVariance),
|
||||
::testing::ValuesIn(epsilon),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(epsilon)),
|
||||
::testing::Values(emptyCPUSpec),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(inpPrc),
|
||||
@ -187,8 +323,7 @@ const auto Mvn4D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyReductionAxes),
|
||||
::testing::ValuesIn(acrossChannels),
|
||||
::testing::ValuesIn(normalizeVariance),
|
||||
::testing::ValuesIn(epsilon),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(epsilon)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(inpPrc),
|
||||
@ -203,8 +338,7 @@ const auto Mvn5D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyReductionAxes),
|
||||
::testing::ValuesIn(acrossChannels),
|
||||
::testing::ValuesIn(normalizeVariance),
|
||||
::testing::ValuesIn(epsilon),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(epsilon)),
|
||||
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(inpPrc),
|
||||
@ -228,8 +362,7 @@ const auto Mvn1D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyReductionAxes),
|
||||
::testing::ValuesIn(acrossChannels),
|
||||
::testing::ValuesIn(normalizeVariance),
|
||||
::testing::ValuesIn(epsilon),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(epsilon)),
|
||||
::testing::Values(emptyCPUSpec),
|
||||
::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
|
||||
::testing::ValuesIn(inpPrc),
|
||||
@ -245,8 +378,7 @@ const auto Mvn2D = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyReductionAxes),
|
||||
::testing::Values(false),
|
||||
::testing::ValuesIn(normalizeVariance),
|
||||
::testing::ValuesIn(epsilon),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(epsilon)),
|
||||
::testing::Values(emptyCPUSpec),
|
||||
::testing::ValuesIn(fusingParamsSet),
|
||||
::testing::ValuesIn(inpPrc),
|
||||
@ -262,8 +394,7 @@ const auto Mvn2DTrans = ::testing::Combine(
|
||||
::testing::ValuesIn(emptyReductionAxes),
|
||||
::testing::Values(true),
|
||||
::testing::ValuesIn(normalizeVariance),
|
||||
::testing::ValuesIn(epsilon),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::ValuesIn(epsilon)),
|
||||
::testing::Values(emptyCPUSpec),
|
||||
::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
|
||||
::testing::ValuesIn(inpPrc),
|
||||
|
@ -23,7 +23,9 @@ public:
|
||||
std::tie(shapes, broadcast) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
|
||||
if (!shapes.first.empty()) {
|
||||
result << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_";
|
||||
}
|
||||
result << "TS=";
|
||||
for (const auto& shape : shapes.second) {
|
||||
result << "(";
|
||||
|
Loading…
Reference in New Issue
Block a user