[CPU] Nspc layout enabling in the FP32/BF16 convolutions (#5292)

This commit is contained in:
Maksim Kutakov 2021-05-25 11:41:23 +03:00 committed by GitHub
parent cc810297f4
commit 617636693a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 907 additions and 344 deletions

View File

@ -278,7 +278,7 @@ class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILaye
public:
static LowPrecisionTransformations getAllTransformations(const LayerTransformation::Params& params = LayerTransformation::Params());
static bool isFunctionQuantized(const std::shared_ptr<Function>& function);
static bool isFunctionQuantized(const std::shared_ptr<const Function>& function);
LowPrecisionTransformer();
LowPrecisionTransformer(const LowPrecisionTransformations& transformations);

View File

@ -259,7 +259,7 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const
return transformer;
}
bool LowPrecisionTransformer::isFunctionQuantized(const std::shared_ptr<Function>& function) {
bool LowPrecisionTransformer::isFunctionQuantized(const std::shared_ptr<const Function>& function) {
std::set<std::shared_ptr<Node>> handledNodes;
std::deque<std::shared_ptr<Node>> nodes;
for (auto result : function->get_results()) {

View File

@ -45,6 +45,7 @@
#include <ngraph/variant.hpp>
#include <ngraph/ops.hpp>
#include <transformations/utils/utils.hpp>
#include <low_precision/transformer.hpp>
/*****************************************************
* Debug capability
@ -89,6 +90,9 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgr
this->_name = "subgraph";
this->reuse_io_tensors = false;
isQuantizedFlag = (config.lpTransformsMode == Config::On) &&
ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(subgraph);
// Map data object onto producer node
std::map<std::shared_ptr<ngraph::Node>, std::pair<MKLDNNNodePtr, int>> op2node;
@ -109,6 +113,10 @@ void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgr
for (const auto op : subgraph->get_ordered_ops()) {
const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)};
if (isQuantized()) {
node->setQuantizedGraphFlag(true);
}
graphNodes.push_back(node);
if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
@ -180,6 +188,9 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
IE_THROW() << "Function pointer inside CNNNetwork is nullptr";
}
isQuantizedFlag = (config.lpTransformsMode == Config::On) &&
ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(func);
auto orderedOps = func->get_ordered_ops();
// TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node
@ -202,6 +213,9 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
// Replicate All Nodes in topological order
for (const auto& op : orderedOps) {
const MKLDNNNodePtr node(MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache));
if (isQuantized()) {
node->setQuantizedGraphFlag(true);
}
graphNodes.push_back(node);
if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
@ -1162,6 +1176,10 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
afterNode->getParent()->childEdges.push_back(afterNode);
child->parentEdges.push_back(afterNode);
if (isQuantized()) {
node->setQuantizedGraphFlag(true);
}
if (initNode) {
node->getSupportedDescriptors();
node->initSupportedPrimitiveDescriptors();
@ -1178,15 +1196,9 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo
// Set all non const data paths precision to BF16
void MKLDNNGraph::EnforceBF16() {
bool isQuantizedModel = false;
for (auto& node : graphNodes) {
if (node->getType() == FakeQuantize)
isQuantizedModel = true;
}
// Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision
// only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default
if (implication(isQuantizedModel, config.manualEnforceBF16)) {
if (implication(isQuantized(), config.manualEnforceBF16)) {
for (auto &node : graphNodes) {
if (node->getType() != Input && node->getType() != Output) {
for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) {

View File

@ -153,6 +153,10 @@ public:
void SortTopologically();
bool isQuantized() const {
return isQuantizedFlag;
}
protected:
void VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes);
@ -185,6 +189,8 @@ protected:
std::map<std::string, MeanImage> _meanImages;
std::string _name;
bool isQuantizedFlag = false;
static mkldnn::engine eng;
void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);

View File

@ -586,6 +586,10 @@ public:
return false;
}
void setQuantizedGraphFlag(bool flag) {
isInQuantizedGraph = flag;
}
protected:
bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const;
bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const;
@ -652,6 +656,8 @@ protected:
Algorithm algorithm = Algorithm::Undefined;
bool isInQuantizedGraph = false;
friend class MKLDNNEdge;
friend class MKLDNNGraph;
friend class MKLDNNGraphOptimizer;

View File

@ -9,6 +9,7 @@
#include "mkldnn_fake_quantize_node.h"
#include "mkldnn_pooling_node.h"
#include "mkldnn_concat_node.h"
#include "cpu/x64/cpu_isa_traits.hpp"
#include <string>
#include <vector>
#include <mkldnn_types.h>
@ -234,10 +235,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
: memory::format_tag::nhwc);
createDescriptor({in_candidate}, {out_candidate});
} else {
inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
: memory::data_type::f32;
outputDataType = (getOriginalOutputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
: memory::data_type::f32;
inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16
&& !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
outputDataType = (getOriginalOutputPrecisionAtPort(0) == Precision::BF16
&& !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32;
eltwisePrecision = Precision::FP32;
for (int i = 0; i < fusedWith.size(); i++) {
if (fusedWith[i]->getAlgorithm() == EltwiseAdd) {
@ -263,52 +264,40 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
eltwisePrecision = Precision::FP32;
}
if (ndims == 4) {
if (one_of(ndims, 4, 5)) {
memory::format_tag ncsp = ndims == 4 ? memory::format_tag::nchw : memory::format_tag::ncdhw;
memory::format_tag nspc = ndims == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc;
memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c;
memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c;
if (IC == 1 && groupOC == 1) {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp);
createDescriptor({in_candidate}, {out_candidate});
} else if (IC == 3 || IC == 1) {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw16c);
} else if (IC < 4) {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c);
createDescriptor({in_candidate}, {out_candidate});
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw8c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c);
createDescriptor({in_candidate}, {out_candidate});
} else {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw16c);
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp16c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp16c);
createDescriptor({in_candidate}, {out_candidate});
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw8c);
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nCsp8c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nCsp8c);
createDescriptor({in_candidate}, {out_candidate});
}
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw);
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ncsp);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ncsp);
createDescriptor({in_candidate}, {out_candidate});
} else if (ndims == 5) {
if (IC == 1 && groupOC == 1) {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
createDescriptor({in_candidate}, {out_candidate});
} else if (IC == 3 || IC == 1) {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw16c);
createDescriptor({in_candidate}, {out_candidate});
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw8c);
createDescriptor({in_candidate}, {out_candidate});
} else {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw16c);
createDescriptor({in_candidate}, {out_candidate});
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw8c);
if (inputDataType != memory::data_type::bf16 && isNspcAvailable()) {
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, nspc);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, nspc);
createDescriptor({in_candidate}, {out_candidate});
}
in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw);
out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw);
createDescriptor({in_candidate}, {out_candidate});
}
}
}
@ -747,4 +736,80 @@ InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const {
return MKLDNNExtensionUtils::getMaxPrecision(inputPrecisions);
}
bool MKLDNNConvolutionNode::isNspcAvailable() const {
using impl::cpu::x64::mayiuse;
// do not use in non-quantized networks until it is enforced externally
if (!isInQuantizedGraph) {
auto predicate = [](memory::format_tag tag) {
return one_of(tag, memory::format_tag::nwc, memory::format_tag::nhwc, memory::format_tag::ndhwc);
};
if (std::none_of(inputMemoryFormatsFilter.begin(), inputMemoryFormatsFilter.end(), predicate)) {
return false;
}
}
// A bunch of heuristics are designed to cut off not optimal nspc convolution applications
auto inpDims = getParentEdgeAt(0)->getDims().ToSizeVector();
auto outDims = getChildEdgeAt(0)->getDims().ToSizeVector();
auto ndims = inpDims.size();
if (isDepthWise()) {
// 1d equivalent cases are painfully slow
if (1 == inpDims[inpDims.size() - 2]) {
return false;
}
} else {
// it was empirically observed that the nspc convolutions perform much slower than the blocked ones if the channels number more than the specific value
size_t spatialRank = ndims - 2; //two means batch dim plus channels dim
bool is1x1 = false;
if (!isGrouped) {
auto weightDimsReversItr = weightDims.crbegin();
auto inpDimsReversItr = inpDims.crbegin();
auto outDimsReversItr = outDims.crbegin();
auto paddingLreversItr = paddingL.crbegin();
auto paddingRreversItr = paddingR.crbegin();
for (size_t i = 0; i < spatialRank; ++i) {
is1x1 = true
&& *(weightDimsReversItr++) == 1
&& *(inpDimsReversItr++) == *(outDimsReversItr++)
&& *(paddingLreversItr++) == 0
&& *(paddingRreversItr++) == 0;
}
}
// if the activation field size is 1x1 the avx512 1x1 nspc convolution pollutes caches so that the layer after the convolution performs slow
if (mayiuse(impl::cpu::x64::avx512_common) && is1x1) {
auto end = inpDims.rbegin();
std::advance(end, spatialRank);
if (std::all_of(inpDims.rbegin(), end, [](size_t x) { return 1 == x; })) {
return false;
}
}
unsigned thresholdNumChannels = 128u; // for avx and below
if (is1x1) {
thresholdNumChannels = 2048u;
} else if (mayiuse(impl::cpu::x64::avx512_common)) {
thresholdNumChannels = 512u;
}
size_t OC = outDims[1];
if (std::max(IC, OC) >= thresholdNumChannels) {
return false;
}
if (!mayiuse(impl::cpu::x64::avx)) {
// SSE41 nspc convolutions do not support ic and oc tails yet and the blocked implementation will be much better than gemm
if ((IC % 8) || (OC % 8)) {
return false;
}
}
}
return true;
}
REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution);

View File

@ -62,6 +62,7 @@ private:
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights) const;
void filterSupportedDescriptors();
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
bool isNspcAvailable() const;
bool withBiases;
bool withSum;

View File

@ -1210,6 +1210,11 @@ void MKLDNNEltwiseNode::createPrimitive() {
size_t startOff = outOrder.size() != config.outConfs[0].desc.getDims().size() &&
outOrder[outOrder.size() - 1] != inOrder[inOrder.size() - 1] ? 1 : 0;
// WA to handle nspc layout with 1D tensors
if (1 == inRank) {
if (outRank > 2 && 1 == outOrder.back()) startOff = 1;
}
for (int j = 0; j < inRank; j++) {
dims_in[i][dims_in[i].size() - 1 - j - startOff] = config.inConfs[i].desc.getBlockingDesc().getBlockDims()[inRank - 1 - j];
}

View File

@ -171,10 +171,10 @@ protected:
// threshold = 0.6f; // Max in fp32 network by output: 12.0983
// 3 channels, 4 x 4 size
threshold = 20.6f; // Max in fp32 network by output: 879.077
threshold = 30.6f; // Max in fp32 network by output: 879.077
// STAGE3:
// filling of expected precision of layer execution defined by precisoin of input tensor to the primitive and reflected in
// filling of expected precision of layer execution defined by precision of input tensor to the primitive and reflected in
// performance counters
expectedPrecisions["Convolution_1"] = "BF16";
expectedPrecisions["Convolution_2"] = "BF16";

View File

@ -50,9 +50,15 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*BinaryConvolutionLayerTest.*)",
R"(.*ClampLayerTest.*netPrc=(I64|I32).*)",
R"(.*ClampLayerTest.*netPrc=U64.*)",
// TODO: 42538. Unexpected application crush
// TODO: 42538. Unexpected application crash
R"(.*CoreThreadingTestsWithIterations\.smoke_LoadNetwork.t.*)",
R"(.*CoreThreadingTestsWithIterations\.smoke_LoadNetworkAccuracy.*AUTO.*)",
// TODO: 53618. BF16 gemm ncsp convolution crash
R"(.*_GroupConv.*_inPRC=BF16.*_inFmts=nc.*_primitive=jit_gemm.*)",
// TODO: 53578. fork DW bf16 convolution does not support 3d cases yet
R"(.*_DW_GroupConv.*_inPRC=BF16.*_inFmts=(ndhwc|nCdhw16c).*)",
// TODO: 56143. Enable nspc convolutions for bf16 precision
R"(.*ConvolutionLayerCPUTest.*BF16.*_inFmts=(ndhwc|nhwc).*)",
// incorrect reference implementation
R"(.*NormalizeL2LayerTest.*axes=\(\).*)",

View File

@ -3,6 +3,7 @@
//
#include "test_utils/cpu_test_utils.hpp"
#include "test_utils/convolution_params.hpp"
#include "test_utils/fusing_test_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
@ -25,7 +26,7 @@ typedef std::tuple<
class ConvolutionLayerCPUTest : public testing::WithParamInterface<convLayerCPUTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
public:
static std::string getTestCaseName(testing::TestParamInfo<convLayerCPUTestParamsSet> obj) {
static std::string getTestCaseName(const testing::TestParamInfo<convLayerCPUTestParamsSet>& obj) {
convLayerTestParamsSet basicParamsSet;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
@ -124,6 +125,16 @@ protected:
TEST_P(ConvolutionLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
// Skip tests for sse41 convolution where ic or oc cannot be exactly divided by the block size,
// since tails processing for sse41 nspc layout is not supported yet (see 52736).
if (!inFmts.empty() && (inFmts.front() == nhwc || inFmts.front() == ndhwc) && selectedType.find("jit_sse") != std::string::npos) {
auto inpChannels = function->get_parameters().front()->get_shape()[1];
auto outChannels = function->get_output_shape(0)[1];
if ((inpChannels % 8) || (outChannels % 8)) {
GTEST_SKIP() << "Disabled test due to the sse41 convolution kernel does not support tails for nspc layout." << std::endl;
}
}
Run();
if (isBias) {
@ -137,50 +148,41 @@ namespace {
/* COMMON PARAMS */
const std::vector<fusingSpecificParams> fusingParamsSet{
emptyFusingSpec,
// activations
// eltwise
fusingRelu,
fusingElu,
fusingSigmoid,
fusingClamp,
fusingPReluPerChannel,
fusingSwish,
fusingHSwish,
fusingMish,
fusingSoftPlus,
// other patterns
fusingReluAdd,
fusingPRelu1D,
// depthwise
fusingReluScaleShift,
// fake quantize
fusingFakeQuantizePerTensorRelu,
fusingFakeQuantizePerChannelRelu,
// sum
fusingSumEluFQ,
fusingSum,
fusingPRelu1D,
fusingAddPerChannel // bias
// bias
fusingAddPerChannel
};
const std::vector<fusingSpecificParams> fusingParamsSetBF16{
emptyFusingSpec,
// activations
// eltwise
fusingRelu,
fusingElu,
fusingSigmoid,
fusingClamp,
fusingPReluPerChannel,
fusingSwish,
// other patterns
fusingReluAdd,
// depthwise
fusingReluScaleShift,
fusingSum
// sum
fusingSum,
// bias
fusingAddPerChannel
};
const std::map<std::string, std::string> cpuEmptyPluginConfig;
const std::map<std::string, std::string> cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } };
/* ============= Convolution params (planar layout) ============= */
const SizeVector numOutChannels_Planar = { 6 };
/* ============= Convolution params (GEMM layout) ============= */
const SizeVector numOutChannels_Gemm = {6 };
/* ============= Convolution params (blocked layout) ============= */
const SizeVector numOutChannels_Blocked = { 64 };
/* ============= Convolution params (blocked and nspc layout) ============= */
const SizeVector numOutChannels = { 64, 63 };
/* ============= Convolution params (2D) ============= */
const std::vector<SizeVector> kernels2d = { {3, 3}, {1, 1} };
@ -188,6 +190,8 @@ const std::vector<SizeVector> strides2d = { {1, 1}, {2, 2} };
const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0}, {1, 1} };
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
const std::vector<SizeVector> dilations2d = { {1, 1}, {2, 2} };
const std::vector<SizeVector> inputShapes2d = { {1, 64, 7, 7}, {1, 67, 7, 7} };
const std::vector<SizeVector> inputShapesPlain2Blocked2d = { {1, 1, 7, 7}, {1, 2, 7, 7}, {1, 3, 7, 7} };
/* ============= Convolution params (3D) ============= */
const std::vector<SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
@ -195,28 +199,31 @@ const std::vector<SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
const std::vector<std::vector<ptrdiff_t>> padBegins3d = { {0, 0, 0}, {1, 1, 1} };
const std::vector<std::vector<ptrdiff_t>> padEnds3d = { {0, 0, 0} };
const std::vector<SizeVector> dilations3d = { {1, 1, 1}, {2, 2, 2} };
const std::vector<SizeVector> inputShapes3d = { {1, 64, 7, 7, 7}, {1, 67, 7, 7, 7} };
const std::vector<SizeVector> inputShapesPlain2Blocked3d = { {1, 1, 7, 7, 7}, {1, 2, 7, 7, 7}, {1, 3, 7, 7, 7} };
/* ============= */
/* INSTANCES */
/* ============= Convolution (Planar 2D) ============= */
const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine(
/* ============= Convolution (Gemm 2D) ============= */
const auto convParams_ExplicitPadding_GEMM_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels_Planar),
::testing::ValuesIn(numOutChannels_Gemm),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Planar_2D = {
conv_gemm_2D
const std::vector<CPUSpecificParams> CPUParams_GEMM_2D = {
conv_gemm_2D,
conv_gemm_2D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Planar_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_GEMM_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_2D,
convParams_ExplicitPadding_GEMM_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
@ -224,15 +231,15 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Planar_FP32, ConvolutionLayerCPUTest,
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_2D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Planar_BF16, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_GEMM_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_2D,
convParams_ExplicitPadding_GEMM_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
@ -240,15 +247,15 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Planar_BF16, ConvolutionLayerCPUTest,
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_2D)),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Planar_I8, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_2D,
convParams_ExplicitPadding_GEMM_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
@ -256,30 +263,31 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Planar_I8, ConvolutionLayerCPUTest,
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_2D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Planar 3D) ============= */
const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine(
/* ============= Convolution (GEMM 3D) ============= */
const auto convParams_ExplicitPadding_GEMM_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::ValuesIn(numOutChannels_Planar),
::testing::ValuesIn(numOutChannels_Gemm),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Planar_3D = {
conv_gemm_3D
const std::vector<CPUSpecificParams> CPUParams_GEMM_3D = {
conv_gemm_3D,
conv_gemm_3D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Planar_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_GEMM_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_3D,
convParams_ExplicitPadding_GEMM_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
@ -287,15 +295,15 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Planar_FP32, ConvolutionLayerCPUTest,
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_3D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Planar_BF16, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_GEMM_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_3D,
convParams_ExplicitPadding_GEMM_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
@ -303,15 +311,15 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Planar_BF16, ConvolutionLayerCPUTest,
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_3D)),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Planar_I8, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_GEMM_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_3D,
convParams_ExplicitPadding_GEMM_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
@ -319,141 +327,221 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Planar_I8, ConvolutionLayerCPUTest,
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_3D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Blocked 2D) ============= */
const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
/* ============= Convolution (2D) ============= */
const auto convParams_ExplicitPadding_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Blocked_2D = {
const std::vector<CPUSpecificParams> CPUParams_2D = {
conv_sse42_2D,
conv_avx2_2D,
conv_avx512_2D
conv_avx512_2D,
conv_sse42_2D_nspc,
conv_avx2_2D_nspc,
conv_avx512_2D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Blocked_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D,
convParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Blocked_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_2D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Blocked_BF16, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D,
convParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D, conv_avx512_2D_nspc})),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_Blocked_I8, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D,
convParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Blocked_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_2D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Blocked 3D) ============= */
const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
const std::vector<CPUSpecificParams> CPUParams_2D_plain_to_blocked = {
conv_sse42_plain_to_blocked_2D,
conv_avx2_plain_to_blocked_2D,
conv_avx512_plain_to_blocked_2D,
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_PlainToBlocked_2D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapesPlain2Blocked2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_2D_plain_to_blocked)),
::testing::Values(emptyFusingSpec),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_PlainToBlocked_2D_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16, Precision::FP32),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapesPlain2Blocked2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_plain_to_blocked_2D})),
::testing::Values(emptyFusingSpec),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Convolution (3D) ============= */
const auto convParams_ExplicitPadding_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Blocked_3D = {
const std::vector<CPUSpecificParams> CPUParams_3D = {
//conv_sse42_3D, // not supported jit_sse42 for 3d
conv_avx2_3D,
conv_avx512_3D
conv_avx512_3D,
conv_avx2_3D_nspc,
conv_avx512_3D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Blocked_FP32, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D,
convParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7, 7 })),
::testing::ValuesIn(inputShapes3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Blocked_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D)),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Blocked_BF16, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D,
convParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7, 7 })),
::testing::ValuesIn(inputShapes3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D, conv_avx512_3D_nspc})),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_Blocked_I8, ConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D,
convParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7, 7 })),
::testing::ValuesIn(inputShapes3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Blocked_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
const std::vector<CPUSpecificParams> CPUParams_3D_plain_to_blocked = {
conv_avx2_plain_to_blocked_3D,
conv_avx512_plain_to_blocked_3D,
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_PlainToBlocked_3D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapesPlain2Blocked3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D_plain_to_blocked)),
::testing::Values(emptyFusingSpec),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Conv_PlainToBlocked_3D_BF16, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16, Precision::FP32),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapesPlain2Blocked3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_plain_to_blocked_3D})),
::testing::Values(emptyFusingSpec),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Kernel_1x1 (2D) ============= */
const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
@ -462,14 +550,17 @@ const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::Values(SizeVector({1, 1})),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::Values(63),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_1x1_2D = {
conv_sse42_2D_1x1,
conv_avx2_2D_1x1,
conv_avx512_2D_1x1
conv_avx512_2D_1x1,
conv_sse42_2D_1x1_nspc,
conv_avx2_2D_1x1_nspc,
conv_avx512_2D_1x1_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_1x1_FP32, ConvolutionLayerCPUTest,
@ -481,7 +572,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_1x1_FP32, ConvolutionLayerCPUTest,
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_2D)),
::testing::ValuesIn(fusingParamsSet),
@ -497,9 +588,9 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_1x1_BF16, ConvolutionLayerCPUTest,
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1, conv_avx512_2D_1x1_nspc})),
::testing::ValuesIn(fusingParamsSetBF16),
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
@ -513,7 +604,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 64, 7, 7 })),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_2D)),
::testing::Values(fusingSum),
@ -534,7 +625,7 @@ const auto convParams_1D = ::testing::Combine(
::testing::ValuesIn(padBegins1d),
::testing::ValuesIn(padEnds1d),
::testing::ValuesIn(dilations1d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
@ -560,7 +651,75 @@ INSTANTIATE_TEST_CASE_P(smoke_Conv_1D, ConvolutionLayerCPUTest,
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ========= */
/* ============= Jit Planar ============= */
/* ============= Convolution planar params (2D) ============= */
const std::vector<CPUSpecificParams> CPUParams_Jit_Planar_2D = {
// sse42 is not supported
conv_avx2_planar_2D,
conv_avx512_planar_2D,
};
const auto convParams_Planar_ExplicitPadding_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::Values(SizeVector{1, 1}),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::Values(1),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
INSTANTIATE_TEST_CASE_P(smoke_Conv_Jit_Planar_2D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_Planar_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Jit_Planar_2D)),
::testing::Values(emptyFusingSpec, fusingRelu),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= Convolution planar params (3D) ============= */
const std::vector<CPUSpecificParams> CPUParams_Jit_Planar_3D = {
// sse42 is not supported
conv_avx2_planar_3D,
conv_avx512_planar_3D,
};
const auto convParams_Planar_ExplicitPadding_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::Values(SizeVector{1, 1, 1}),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::Values(1),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
INSTANTIATE_TEST_CASE_P(smoke_Conv_Jit_Planar_3D_FP32, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_Planar_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapes3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Jit_Planar_3D)),
::testing::Values(emptyFusingSpec, fusingRelu),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);
/* ============= */
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -3,6 +3,7 @@
//
#include "test_utils/cpu_test_utils.hpp"
#include "test_utils/convolution_params.hpp"
#include "test_utils/fusing_test_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"

View File

@ -333,5 +333,61 @@ const auto params_5D_Planar_Blocked = ::testing::Combine(
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_Planar_Blocked, EltwiseLayerCPUTest, params_5D_Planar_Blocked, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_4D_1D = {
{{2, 17, 5, 4}, {4}},
{{1, 3, 3, 3}, {3}},
};
std::vector<CPUSpecificParams> cpuParams_4D_1D = {
CPUSpecificParams({nChw16c, x}, {nChw16c}, {}, {}),
CPUSpecificParams({nhwc, x}, {nhwc}, {}, {}),
CPUSpecificParams({nchw, x}, {nchw}, {}, {})
};
const auto params_4D_1D = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_4D_1D),
::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
::testing::ValuesIn(secondaryInputTypes),
::testing::ValuesIn(opTypes),
::testing::ValuesIn(netPrc),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_1D, EltwiseLayerCPUTest, params_4D_1D, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<std::vector<size_t>>> inShapes_5D_1D = {
{{2, 17, 5, 4, 10}, {10}},
{{1, 3, 3, 3, 3}, {3}},
};
std::vector<CPUSpecificParams> cpuParams_5D_1D = {
CPUSpecificParams({nCdhw16c, x}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc, x}, {ndhwc}, {}, {}),
CPUSpecificParams({ncdhw, x}, {ncdhw}, {}, {})
};
const auto params_5D_1D = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inShapes_5D_1D),
::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
::testing::ValuesIn(secondaryInputTypes),
::testing::ValuesIn(opTypes),
::testing::ValuesIn(netPrc),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D)));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_1D, EltwiseLayerCPUTest, params_5D_1D, EltwiseLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -4,6 +4,7 @@
#include <shared_test_classes/single_layer/group_convolution.hpp>
#include "test_utils/cpu_test_utils.hpp"
#include "test_utils/convolution_params.hpp"
#include "test_utils/fusing_test_utils.hpp"
using namespace InferenceEngine;
@ -83,18 +84,18 @@ protected:
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
std::tie(groupConvParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = basicParamsSet;
if (inPrc == Precision::UNSPECIFIED) {
selectedType += std::string("_") + Precision(Precision::FP32).name();
} else {
selectedType += std::string("_") + inPrc.name();
}
ngraph::op::PadType padType;
InferenceEngine::SizeVector kernel, stride, dilation;
std::vector<ptrdiff_t> padBegin, padEnd;
size_t convOutChannels, numGroups;
std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvParams;
if (inPrc == Precision::UNSPECIFIED) {
selectedType += std::string("_") + Precision(Precision::FP32).name();
} else {
selectedType += std::string("_") + inPrc.name();
}
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(
@ -145,31 +146,35 @@ std::vector<groupConvLayerCPUTestParamsSet> filterParamsSetForDevice(std::vector
/* ===================== */
/* COMMON PARAMS */
std::vector<fusingSpecificParams> fusingParamsSet {
const std::vector<fusingSpecificParams> fusingParamsSet {
emptyFusingSpec,
// activations
// eltwise
fusingRelu,
fusingElu,
fusingSigmoid,
fusingClamp,
fusingPReluPerChannel,
fusingSwish,
fusingHSwish,
fusingMish,
fusingSoftPlus,
// other patterns
fusingPRelu1D,
// depthwise
fusingReluScaleShift,
// fake quantize
fusingFakeQuantizePerTensorRelu,
fusingFakeQuantizePerChannelRelu,
// sum
fusingSumEluFQ,
fusingSum,
fusingPRelu1D
fusingSum
};
const std::vector<fusingSpecificParams> fusingParamsSetBF16{
emptyFusingSpec,
// eltwise
fusingRelu,
// depthwise
fusingReluScaleShift,
// sum
fusingSum
};
/* ============= GroupConvolution params (planar layout) ============= */
const SizeVector numOutChannels_Planar = {6};
const SizeVector numGroups_Planar = {2, 3};
const SizeVector numOutChannels_Gemm = {6};
const SizeVector numGroups_Gemm = {2, 3};
/* ============= GroupConvolution params (blocked layout) ============= */
const SizeVector numOutChannels_Blocked = {64};
@ -196,26 +201,27 @@ const std::vector<SizeVector> dilations3d = {{1, 1, 1}, {2, 2, 2}};
/* INSTANCES */
/* ============= GroupConvolution (Planar 2D) ============= */
const auto groupConvParams_ExplicitPadding_Planar_2D = ::testing::Combine(
/* ============= GroupConvolution (GEMM 2D) ============= */
const auto groupConvParams_ExplicitPadding_Gemm_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels_Planar),
::testing::ValuesIn(numGroups_Planar),
::testing::ValuesIn(numOutChannels_Gemm),
::testing::ValuesIn(numGroups_Gemm),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Planar_2D = {
conv_gemm_2D
const std::vector<CPUSpecificParams> CPUParams_Gemm_2D = {
conv_gemm_2D,
conv_gemm_2D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_Planar_FP32, GroupConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_Gemm_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Planar_2D,
groupConvParams_ExplicitPadding_Gemm_2D,
::testing::Values(Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@ -223,30 +229,46 @@ INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_Planar_FP32, GroupConvolutionLayerCPU
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 12, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Gemm_2D)),
::testing::ValuesIn(fusingParamsSet)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Planar 3D) ============= */
const auto groupConvParams_ExplicitPadding_Planar_3D = ::testing::Combine(
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_Gemm_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Gemm_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 12, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Gemm_2D)),
::testing::ValuesIn(fusingParamsSetBF16)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Gemm 3D) ============= */
const auto groupConvParams_ExplicitPadding_Gemm_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::ValuesIn(numOutChannels_Planar),
::testing::ValuesIn(numGroups_Planar),
::testing::ValuesIn(numOutChannels_Gemm),
::testing::ValuesIn(numGroups_Gemm),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Planar_3D = {
conv_gemm_3D
const std::vector<CPUSpecificParams> CPUParams_Gemm_3D = {
conv_gemm_3D,
conv_gemm_3D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_Planar_FP32, GroupConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_Gemm_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Planar_3D,
groupConvParams_ExplicitPadding_Gemm_3D,
::testing::Values(Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@ -254,12 +276,27 @@ INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_Planar_FP32, GroupConvolutionLayerCPU
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 12, 7, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Planar_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Gemm_3D)),
::testing::ValuesIn(fusingParamsSet)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Blocked 2D) ============= */
const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_Gemm_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Gemm_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 12, 7, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Gemm_3D)),
::testing::ValuesIn(fusingParamsSetBF16)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (2D) ============= */
const auto groupConvParams_ExplicitPadding_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
@ -270,16 +307,19 @@ const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Blocked_2D = {
const std::vector<CPUSpecificParams> CPUParams_2D = {
conv_sse42_2D,
conv_avx2_2D,
conv_avx512_2D
conv_avx512_2D,
conv_sse42_2D_nspc,
conv_avx2_2D_nspc,
conv_avx512_2D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_Blocked_FP32, GroupConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Blocked_2D,
groupConvParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@ -287,12 +327,27 @@ INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_Blocked_FP32, GroupConvolutionLayerCP
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 64, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Blocked_2D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_2D)),
::testing::ValuesIn(fusingParamsSet)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (Blocked 3D) ============= */
const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 64, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D, conv_avx512_2D_nspc})),
::testing::ValuesIn(fusingParamsSetBF16)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (3D) ============= */
const auto groupConvParams_ExplicitPadding_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
@ -303,16 +358,18 @@ const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const std::vector<CPUSpecificParams> CPUParams_Blocked_3D = {
const std::vector<CPUSpecificParams> CPUParams_3D = {
// conv_sse42_3D, // not supported jit_sse42 for 3d
conv_avx2_3D,
conv_avx512_3D
conv_avx512_3D,
conv_avx2_3D_nspc,
conv_avx512_3D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_Blocked_FP32, GroupConvolutionLayerCPUTest,
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_FP32, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_Blocked_3D,
groupConvParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
@ -320,10 +377,25 @@ INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_Blocked_FP32, GroupConvolutionLayerCP
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 64, 7, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_Blocked_3D)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D)),
::testing::ValuesIn(fusingParamsSet)),
GroupConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 64, 7, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D, conv_avx512_3D_nspc})),
::testing::ValuesIn(fusingParamsSetBF16)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (DW 2D) ============= */
const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
@ -339,7 +411,10 @@ const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine(
const std::vector<CPUSpecificParams> CPUParams_DW_2D = {
conv_sse42_dw_2D,
conv_avx2_dw_2D,
conv_avx512_dw_2D
conv_avx512_dw_2D,
conv_sse42_dw_2D_nspc,
conv_avx2_dw_2D_nspc,
conv_avx512_dw_2D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_DW_FP32, GroupConvolutionLayerCPUTest,
@ -357,6 +432,22 @@ INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_DW_FP32, GroupConvolutionLayerCPUTest
::testing::ValuesIn(fusingParamsSet)),
GroupConvolutionLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_2D_DW_BF16, GroupConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
groupConvParams_ExplicitPadding_DW_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t >({2, 32, 7, 7})),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D, conv_avx512_dw_2D_nspc})),
::testing::ValuesIn(fusingParamsSetBF16)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= GroupConvolution (DW 3D) ============= */
const auto groupConvParams_ExplicitPadding_DW_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
@ -372,7 +463,10 @@ const auto groupConvParams_ExplicitPadding_DW_3D = ::testing::Combine(
const std::vector<CPUSpecificParams> CPUParams_DW_3D = {
conv_sse42_dw_3D,
conv_avx2_dw_3D,
conv_avx512_dw_3D
conv_avx512_dw_3D,
conv_sse42_dw_3D_nspc,
conv_avx2_dw_3D_nspc,
conv_avx512_dw_3D_nspc
};
INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_DW_FP32, GroupConvolutionLayerCPUTest,
@ -393,10 +487,16 @@ INSTANTIATE_TEST_CASE_P(smoke_GroupConv_3D_DW_FP32, GroupConvolutionLayerCPUTest
/* ============= SINGLE TEST CASES ============= */
groupConvLayerCPUTestParamsSet makeSingleGroupConvCPUTestCase(SizeVector kernels, SizeVector strides, SizeVector dilations,
std::vector<ptrdiff_t> padBegins, std::vector<ptrdiff_t> padEnds, ngraph::op::PadType padType,
int groups, int mb, SizeVector spDims, int inGroupSize, int outGroupSize,
CPUSpecificParams CPUParams) {
using VecFusingParams = std::vector<fusingSpecificParams>;
using PrcConnectedParams = std::tuple<Precision, Precision, VecFusingParams>; // inPrc, outPrc, FusingParamsSet
using VecPrcConnectedParams = std::vector<PrcConnectedParams>;
std::vector<groupConvLayerCPUTestParamsSet> makeSingleGroupConvCPUTestCases(SizeVector kernels, SizeVector strides, SizeVector dilations,
std::vector<ptrdiff_t> padBegins, std::vector<ptrdiff_t> padEnds,
ngraph::op::PadType padType, int groups, int mb, SizeVector spDims,
int inGroupSize, int outGroupSize,
const std::vector<CPUSpecificParams>& CPUParams,
const VecPrcConnectedParams& vecPrcConnectedParams) {
int inChannels = groups * inGroupSize;
int outChannels = groups * outGroupSize;
@ -406,42 +506,92 @@ groupConvLayerCPUTestParamsSet makeSingleGroupConvCPUTestCase(SizeVector kernels
inputShapes.insert(inputShapes.end(), spDims.begin(), spDims.end());
groupConvSpecificParams specificParams(kernels, strides, padBegins, padEnds, dilations, outChannels, groups, padType);
groupConvLayerTestParamsSet basicParamsSet(specificParams, Precision::FP32,
InferenceEngine::Precision::UNSPECIFIED,
InferenceEngine::Precision::UNSPECIFIED,
InferenceEngine::Layout::ANY,
InferenceEngine::Layout::ANY, inputShapes, CommonTestUtils::DEVICE_CPU);
return groupConvLayerCPUTestParamsSet(basicParamsSet, CPUParams, emptyFusingSpec);
std::vector<groupConvLayerCPUTestParamsSet> retVector;
for (auto& prcConnectedParams : vecPrcConnectedParams) {
Precision inPrc, outPrc;
VecFusingParams fusingParams;
std::tie(inPrc, outPrc, fusingParams) = prcConnectedParams;
groupConvLayerTestParamsSet basicParamsSet(specificParams, Precision::FP32, inPrc, outPrc,
InferenceEngine::Layout::ANY, InferenceEngine::Layout::ANY,
inputShapes, CommonTestUtils::DEVICE_CPU);
for (auto &item : CPUParams) {
for (auto &fusingParam : fusingParams) {
retVector.push_back(groupConvLayerCPUTestParamsSet(basicParamsSet, item, fusingParam));
}
}
}
return retVector;
}
template<typename T>
void concatTestCases(std::vector<groupConvLayerCPUTestParamsSet>& resultVec, T tesCase) {
resultVec.insert(resultVec.begin(), std::make_move_iterator(tesCase.begin()), std::make_move_iterator(tesCase.end()));
}
template<typename T, typename... Args>
void concatTestCases(std::vector<groupConvLayerCPUTestParamsSet>& resultVec, T&& tesCase, Args&&... args) {
concatTestCases(resultVec, std::forward<T>(tesCase));
concatTestCases(resultVec, std::forward<Args>(args)...);
}
template<typename... Args>
std::vector<groupConvLayerCPUTestParamsSet> generateSingleGroupConvCPUTestCases(Args&&... args) {
std::vector<groupConvLayerCPUTestParamsSet> retVec;
concatTestCases(retVec, std::forward<Args>(args)...);
return retVec;
}
/* COMMON PARAMS */
const VecPrcConnectedParams vecPrcConnectParamsFP32 = {PrcConnectedParams{Precision::FP32, Precision::FP32, fusingParamsSet}};
const VecPrcConnectedParams vecPrcConnectParams = {PrcConnectedParams{Precision::FP32, Precision::FP32, fusingParamsSet},
PrcConnectedParams{Precision::BF16, Precision::BF16, fusingParamsSetBF16},
PrcConnectedParams{Precision::BF16, Precision::FP32, fusingParamsSetBF16}};
const VecPrcConnectedParams vecPrcConnectParamsFP32Default = {PrcConnectedParams{Precision::FP32, Precision::FP32, VecFusingParams{emptyFusingSpec}}};
const VecPrcConnectedParams vecPrcConnectParamsDefault = {PrcConnectedParams{Precision::FP32, Precision::FP32, VecFusingParams{emptyFusingSpec}},
PrcConnectedParams{Precision::BF16, Precision::BF16, VecFusingParams{emptyFusingSpec}},
PrcConnectedParams{Precision::BF16, Precision::FP32, VecFusingParams{emptyFusingSpec}}};
/* ============= GEMM GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> gemmGroupConvTestCases = {
const std::vector<groupConvLayerCPUTestParamsSet> gemmGroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. is_depthwise (true, false)
// 2. jcp.im2col_sz (=0,>0)
// 3. is_blocking_applicable (true, false)
// is_depthwise == false, im2col_sz > 0
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 2, 2, conv_gemm_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 2, 2, CPUParams_Gemm_2D, vecPrcConnectParams),
// is_depthwise == true
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 1, 1, conv_gemm_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 1, 1,
CPUParams_Gemm_2D, vecPrcConnectParams),
// im2col_sz == 0, is_blocking_applicable == true
makeSingleGroupConvCPUTestCase({1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 2, 2, conv_gemm_2D),
makeSingleGroupConvCPUTestCases({1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 2, 2, CPUParams_Gemm_2D, vecPrcConnectParams),
// is_blocking_applicable == false ((jcp.im2col_sz == 0) && (jcp.ic / jcp.oc >= 42))
makeSingleGroupConvCPUTestCase({1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 42, 1, conv_gemm_2D),
makeSingleGroupConvCPUTestCases({1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 42, 1, CPUParams_Gemm_2D, vecPrcConnectParams),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 3, 2, {129, 129}, 4, 2, conv_gemm_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 2, 1, {10, 10}, 3, 3, conv_gemm_2D),
makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {33, 33, 33}, 4, 2, conv_gemm_3D),
makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10, 10}, 3, 3, conv_gemm_3D),
};
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {129, 129}, 4, 2, CPUParams_Gemm_2D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10}, 3, 3, CPUParams_Gemm_2D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {33, 33, 33}, 4, 2, CPUParams_Gemm_3D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10, 10}, 3, 3, CPUParams_Gemm_3D, vecPrcConnectParams)
);
INSTANTIATE_TEST_CASE_P(smoke_GEMM_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(gemmGroupConvTestCases)));
INSTANTIATE_TEST_CASE_P(smoke_GEMM_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(gemmGroupConvTestCases)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT SSE42 GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> JIT_SSE42_GroupConvTestCases = {
const std::vector<CPUSpecificParams> sse42_GroupConv = {conv_sse42_2D, conv_sse42_2D_nspc};
const std::vector<groupConvLayerCPUTestParamsSet> JIT_SSE42_GroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. jcp.ur_w (=3,<3)
// 2. jcp.ur_w_tail (=0,>0)
// 3. jcp.kw (>7,<=7)
@ -450,37 +600,50 @@ const std::vector<groupConvLayerCPUTestParamsSet> JIT_SSE42_GroupConvTestCases =
// 6. ocb_work
// jcp.ur_w == 3, jcp.ur_w_tail == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 10}, 8, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 10}, 8, 8, sse42_GroupConv, vecPrcConnectParamsFP32),
// jcp.ur_w < 3 (jcp.ur_w == jcp.ow)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 4}, 8, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 4}, 8, 8, sse42_GroupConv, vecPrcConnectParamsFP32),
// jcp.ur_w == 3, jcp.ur_w_tail == 0
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 11}, 8, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 11}, 8, 8, sse42_GroupConv, vecPrcConnectParamsFP32),
// jcp.kw > 7
makeSingleGroupConvCPUTestCase({3, 8}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 10}, 8, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 8}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 10}, 8, 8, sse42_GroupConv, vecPrcConnectParamsFP32),
// jcp.nb_oc == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 8, 16, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 8, 16, sse42_GroupConv, vecPrcConnectParamsFP32),
// jcp.nb_ic == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 16, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 16, 8, sse42_GroupConv, vecPrcConnectParamsFP32),
// ocb_work > 1 (ocb_work == 2)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 8, 40, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 8, 40, sse42_GroupConv, vecPrcConnectParamsFP32),
// jcp.nb_ic == 2, ocb_work == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 16, 40, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 16, 40, sse42_GroupConv, vecPrcConnectParamsFP32),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 3, 2, {129, 129}, 8, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 2, 1, {10, 10}, 8, 8, conv_sse42_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {129, 129}, 8, 8, sse42_GroupConv, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10}, 8, 8, sse42_GroupConv, vecPrcConnectParamsFP32Default)
// not supported jit_sse42 for 3d
// makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
// makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
// 3, 2, {33, 33, 33}, 8, 8, cpuParams_sse42_3D),
// makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
// makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
// 2, 1, {10, 10, 10}, 8, 8, cpuParams_sse42_3D),
};
);
INSTANTIATE_TEST_CASE_P(smoke_JIT_SSE42_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(JIT_SSE42_GroupConvTestCases)));
INSTANTIATE_TEST_CASE_P(smoke_JIT_SSE42_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(JIT_SSE42_GroupConvTestCases)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT AVX2 GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX2_GroupConvTestCases = {
const std::vector<CPUSpecificParams> avx2_GroupConv_2D = {conv_avx2_2D, conv_avx2_2D_nspc};
const std::vector<CPUSpecificParams> avx2_GroupConv_3D = {conv_avx2_3D, conv_avx2_3D_nspc};
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX2_GroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. jcp.ur_w (=3,<3)
// 2. jcp.ur_w_tail (=0,>0)
// 3. jcp.kw (>7,<=7)
@ -489,144 +652,181 @@ const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX2_GroupConvTestCases =
// 6. ocb_work
// jcp.ur_w == 3, jcp.ur_w_tail == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 10}, 8, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 10}, 8, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// jcp.ur_w < 3 (jcp.ur_w == jcp.ow)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 4}, 8, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 4}, 8, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// jcp.ur_w == 3, jcp.ur_w_tail == 0
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 11}, 8, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 11}, 8, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// jcp.kw > 7
makeSingleGroupConvCPUTestCase({3, 8}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 10}, 8, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 8}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 10}, 8, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// jcp.nb_oc == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 8, 16, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 8, 16, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// jcp.nb_ic == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 16, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 16, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// ocb_work > 1 (ocb_work == 2)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 8, 40, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 8, 40, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// jcp.nb_ic == 2, ocb_work == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 16, 40, conv_avx2_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 16, 40, avx2_GroupConv_2D, vecPrcConnectParamsFP32),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 3, 2, {129, 129}, 8, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 2, 1, {10, 10}, 8, 8, conv_avx2_2D),
makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {33, 33, 33}, 8, 8, conv_avx2_3D),
makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10, 10}, 8, 8, conv_avx2_3D),
};
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {129, 129}, 8, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10}, 8, 8, avx2_GroupConv_2D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {33, 33, 33}, 8, 8, avx2_GroupConv_3D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10, 10}, 8, 8, avx2_GroupConv_3D, vecPrcConnectParamsFP32)
);
INSTANTIATE_TEST_CASE_P(smoke_JIT_AVX2_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(JIT_AVX2_GroupConvTestCases)));
INSTANTIATE_TEST_CASE_P(smoke_JIT_AVX2_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(JIT_AVX2_GroupConvTestCases)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT AVX512 GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX512_GroupConvTestCases = {
const std::vector<CPUSpecificParams> avx512_GroupConv_2D = {conv_avx512_2D, conv_avx512_2D_nspc};
const std::vector<CPUSpecificParams> avx512_GroupConv_3D = {conv_avx512_3D, conv_avx512_3D_nspc};
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX512_GroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. "blocked to blocked" or "planar to blocked"
// 2. jcp.nb_ic, jcp.nb_oc
// blocked to blocked
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 16, 16, conv_avx512_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 16, 16, avx512_GroupConv_2D, vecPrcConnectParams),
// jcp.nb_ic == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 32, 16, conv_avx512_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 32, 16, avx512_GroupConv_2D, vecPrcConnectParams),
// jcp.nb_oc == 2
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 2, 1, {5, 5}, 16, 32, conv_avx512_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
2, 1, {5, 5}, 16, 32, avx512_GroupConv_2D, vecPrcConnectParams),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 3, 2, {129, 129}, 16, 16,
conv_avx512_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 2, 1, {10, 10}, 16, 16, conv_avx512_2D),
makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {33, 33, 33}, 16, 16, conv_avx512_3D),
makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10, 10}, 16, 16, conv_avx512_3D),
};
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 3, 2, {129, 129}, 16, 16,
avx512_GroupConv_2D, vecPrcConnectParams),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10}, 16, 16, avx512_GroupConv_2D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
3, 2, {33, 33, 33}, 16, 16, avx512_GroupConv_3D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
2, 1, {10, 10, 10}, 16, 16, avx512_GroupConv_3D, vecPrcConnectParams)
);
INSTANTIATE_TEST_CASE_P(smoke_JIT_AVX512_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(JIT_AVX512_GroupConvTestCases)));
INSTANTIATE_TEST_CASE_P(smoke_JIT_AVX512_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice(JIT_AVX512_GroupConvTestCases)),
GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT SSE42 DW GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> JIT_SSE42_DW_GroupConvTestCases = {
const std::vector<CPUSpecificParams> sse42_DW_2D = {conv_sse42_dw_2D, conv_sse42_dw_2D_nspc};
const std::vector<CPUSpecificParams> sse42_DW_3D = {conv_sse42_dw_3D, conv_sse42_dw_3D_nspc};
const std::vector<groupConvLayerCPUTestParamsSet> JIT_SSE42_DW_GroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. jcp.ngroups % simd_w (=0,!=0)
// 2. jcp.nb_ch
// 3. jcp.nb_ch_blocking (=2,<2)
// 4. jcp.ur_w == 3
// jcp.ngroups % simd_w == 0, jcp.nb_ch == 1, jcp.nb_ch_blocking == 1 (jcp.ngroups == 8)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 8, 1, {5, 5}, 1, 1, conv_sse42_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
8, 1, {5, 5}, 1, 1, sse42_DW_2D, vecPrcConnectParamsFP32),
// jcp.ngroups % simd_w == 0, jcp.nb_ch == 2, jcp.nb_ch_blocking == 2 (jcp.ngroups == 16)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 16, 1, {5, 5}, 1, 1, conv_sse42_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
16, 1, {5, 5}, 1, 1, sse42_DW_2D, vecPrcConnectParamsFP32),
// jcp.ngroups % simd_w != 0, jcp.nb_ch == 3, jcp.nb_ch_blocking == 2 (jcp.ngroups == 17) TODO: pad channels not supported for SSE42
// makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 17, 1, {5, 5}, 1, 1, conv_sse42_dw_2D),
// makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
// 17, 1, {5, 5}, 1, 1, conv_sse42_DW_2D, vecPrcConnectParamsFP32only),
// jcp.ow > jcp.ur_w (jcp.ow == 7)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 8, 1, {5, 9}, 1, 1, conv_sse42_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
8, 1, {5, 9}, 1, 1, sse42_DW_2D, vecPrcConnectParamsFP32),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 8, 2, {129, 129}, 1, 1,
conv_sse42_dw_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 8, 1, {10, 10}, 1, 1, conv_sse42_dw_2D),
makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
8, 2, {33, 33, 33}, 1, 1, conv_sse42_dw_3D),
makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
8, 1, {10, 10, 10}, 1, 1, conv_sse42_dw_3D),
};
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 8, 2, {129, 129}, 1, 1,
sse42_DW_2D, vecPrcConnectParamsFP32),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT,
8, 1, {10, 10}, 1, 1, sse42_DW_2D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
8, 2, {33, 33, 33}, 1, 1, sse42_DW_3D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
8, 1, {10, 10, 10}, 1, 1, sse42_DW_3D, vecPrcConnectParamsFP32)
);
INSTANTIATE_TEST_CASE_P(smoke_JIT_SSE42_DW_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice
(JIT_SSE42_DW_GroupConvTestCases)));
(JIT_SSE42_DW_GroupConvTestCases)), GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT AVX2 DW GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX2_DW_GroupConvTestCases = {
const std::vector<CPUSpecificParams> avx2_DW_2D = {conv_avx2_dw_2D, conv_avx2_dw_2D_nspc};
const std::vector<CPUSpecificParams> avx2_DW_3D = {conv_avx2_dw_3D, conv_avx2_dw_3D_nspc};
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX2_DW_GroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. jcp.ngroups % simd_w (=0,!=0)
// 2. jcp.nb_ch
// 3. jcp.nb_ch_blocking (=3,<3)
// 4. jcp.ur_w == 4
// jcp.ngroups % simd_w == 0, jcp.nb_ch == 1, jcp.nb_ch_blocking == 1 (jcp.ngroups == 8)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 8, 1, {5, 5}, 1, 1, conv_avx2_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
8, 1, {5, 5}, 1, 1, avx2_DW_2D, vecPrcConnectParamsFP32),
// jcp.ngroups % simd_w == 0, jcp.nb_ch == 3, jcp.nb_ch_blocking == 3 (jcp.ngroups == 24)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 24, 1, {5, 5}, 1, 1, conv_avx2_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
24, 1, {5, 5}, 1, 1, avx2_DW_2D, vecPrcConnectParamsFP32),
// jcp.ngroups % simd_w != 0, jcp.nb_ch == 4, jcp.nb_ch_blocking == 3 (jcp.ngroups == 25)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 25, 1, {5, 5}, 1, 1, conv_avx2_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
25, 1, {5, 5}, 1, 1, avx2_DW_2D, vecPrcConnectParamsFP32),
// jcp.ow > jcp.ur_w (jcp.ow == 7)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 8, 1, {5, 9}, 1, 1, conv_avx2_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
8, 1, {5, 9}, 1, 1, avx2_DW_2D, vecPrcConnectParamsFP32),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 8, 2, {129, 129}, 1, 1,
conv_avx2_dw_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 8, 1, {10, 10}, 1, 1, conv_avx2_dw_2D),
makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
8, 2, {33, 33, 33}, 1, 1, conv_avx2_dw_3D),
makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
8, 1, {10, 10, 10}, 1, 1, conv_avx2_dw_3D),
};
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 8, 2, {129, 129}, 1, 1,
avx2_DW_2D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT,
8, 1, {10, 10}, 1, 1, avx2_DW_2D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
8, 2, {33, 33, 33}, 1, 1, avx2_DW_3D, vecPrcConnectParamsFP32Default),
makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
8, 1, {10, 10, 10}, 1, 1, avx2_DW_3D, vecPrcConnectParamsFP32)
);
INSTANTIATE_TEST_CASE_P(smoke_JIT_AVX2_DW_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice
(JIT_AVX2_DW_GroupConvTestCases)));
(JIT_AVX2_DW_GroupConvTestCases)), GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT AVX512 DW GroupConvolution ============= */
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX512_DW_GroupConvTestCases = {
const std::vector<CPUSpecificParams> avx512_DW_2D = {conv_avx512_dw_2D, conv_avx512_dw_2D_nspc};
const std::vector<CPUSpecificParams> avx512_DW_3D = {conv_avx512_dw_3D, conv_avx512_dw_3D_nspc};
const std::vector<groupConvLayerCPUTestParamsSet> JIT_AVX512_DW_GroupConvTestCases = generateSingleGroupConvCPUTestCases(
// 1. jcp.ngroups % simd_w (=0,!=0)
// 2. jcp.nb_ch
// 3. jcp.nb_ch_blocking (=4,<4)
// 4. jcp.ur_w == 6
// jcp.ngroups % simd_w == 0, jcp.nb_ch == 1, jcp.nb_ch_blocking == 1 (jcp.ngroups == 16)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 16, 1, {5, 5}, 1, 1, conv_avx512_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
16, 1, {5, 5}, 1, 1, avx512_DW_2D, vecPrcConnectParams),
// jcp.ngroups % simd_w == 0, jcp.nb_ch == 4, jcp.nb_ch_blocking == 4 (jcp.ngroups == 64)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 64, 1, {5, 5}, 1, 1, conv_avx512_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
64, 1, {5, 5}, 1, 1, avx512_DW_2D, vecPrcConnectParams),
// jcp.ngroups % simd_w != 0, jcp.nb_ch == 5, jcp.nb_ch_blocking == 4 (jcp.ngroups == 65)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 65, 1, {5, 5}, 1, 1, conv_avx512_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
65, 1, {5, 5}, 1, 1, avx512_DW_2D, vecPrcConnectParams),
// jcp.ow > jcp.ur_w (jcp.ow == 7)
makeSingleGroupConvCPUTestCase({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID, 8, 1, {5, 9}, 1, 1, conv_avx512_dw_2D),
makeSingleGroupConvCPUTestCases({3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}, ngraph::op::PadType::VALID,
8, 1, {5, 9}, 1, 1, avx512_DW_2D, vecPrcConnectParams),
// "hard" cases
makeSingleGroupConvCPUTestCase({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 16, 2, {129, 129}, 1, 1,
conv_avx512_dw_2D),
makeSingleGroupConvCPUTestCase({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 16, 1, {10, 10}, 1, 1,
conv_avx512_dw_2D),
makeSingleGroupConvCPUTestCase({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
16, 2, {33, 33, 33}, 1, 1, conv_avx512_dw_3D),
makeSingleGroupConvCPUTestCase({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
16, 1, {10, 10, 10}, 1, 1, conv_avx512_dw_3D),
};
makeSingleGroupConvCPUTestCases({3, 3}, {2, 2}, {1, 1}, {1, 1}, {1, 1}, ngraph::op::PadType::EXPLICIT, 16, 2, {129, 129}, 1, 1,
avx512_DW_2D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({2, 4}, {1, 2}, {3, 2}, {2, 1}, {1, 0}, ngraph::op::PadType::EXPLICIT, 16, 1, {10, 10}, 1, 1,
avx512_DW_2D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({3, 3, 3}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, ngraph::op::PadType::EXPLICIT,
16, 2, {33, 33, 33}, 1, 1, avx512_DW_3D, vecPrcConnectParamsDefault),
makeSingleGroupConvCPUTestCases({2, 3, 4}, {1, 2, 2}, {3, 1, 2}, {2, 2, 1}, {1, 1, 0}, ngraph::op::PadType::EXPLICIT,
16, 1, {10, 10, 10}, 1, 1, avx512_DW_3D, vecPrcConnectParams)
);
INSTANTIATE_TEST_CASE_P(smoke_JIT_AVX512_DW_GroupConv, GroupConvolutionLayerCPUTest, ::testing::ValuesIn(filterParamsSetForDevice
(JIT_AVX512_DW_GroupConvTestCases)));
(JIT_AVX512_DW_GroupConvTestCases)), GroupConvolutionLayerCPUTest::getTestCaseName);
/* ============= JIT SSE42 1x1 Convolution (not supported with groups) ============= */
/* ============= JIT AVX2 1x1 Convolution (not supported with groups) ============= */

View File

@ -4,6 +4,7 @@
#include <shared_test_classes/single_layer/group_convolution_backprop_data.hpp>
#include "test_utils/cpu_test_utils.hpp"
#include "test_utils/convolution_params.hpp"
#include "test_utils/fusing_test_utils.hpp"
using namespace InferenceEngine;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils/convolution_params.hpp"
#include "subgraph_tests/include/conv_concat.hpp"
using namespace InferenceEngine;

View File

@ -0,0 +1,73 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "cpu_test_utils.hpp"
namespace CPUTestUtils {
const auto conv_sse42_1D = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
const auto conv_avx2_1D = CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx512_1D = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
const auto conv_ref_2D = CPUSpecificParams{{nchw}, {nchw}, {"ref_any"}, "ref_any"};
const auto conv_ref_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref_any"}, "ref_any"};
const auto conv_gemm_2D = CPUSpecificParams{{nchw}, {nchw}, {"gemm_any"}, "jit_gemm"};
const auto conv_gemm_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"gemm_any"}, "jit_gemm"};
const auto conv_gemm_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_gemm"}, "jit_gemm"};
const auto conv_gemm_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_gemm"}, "jit_gemm"};
const auto conv_sse42_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_avx2_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_planar_2D = CPUSpecificParams{{nchw}, {nchw}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_planar_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx512_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_dw_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_planar_2D = CPUSpecificParams{{nchw}, {nchw}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_planar_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_plain_to_blocked_2D = CPUSpecificParams{{nchw}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_plain_to_blocked_3D = CPUSpecificParams{{ncdhw}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_dw_2D_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_3D_nspc = CPUSpecificParams{{ndhwc}, {ndhwc}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_sse42_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
const auto conv_sse42_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_2D_1x1_nspc = CPUSpecificParams{{nhwc}, {nhwc}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
} // namespace CPUTestUtils

View File

@ -148,35 +148,6 @@ protected:
const auto emptyCPUSpec = CPUSpecificParams{{}, {}, {}, {}};
const auto conv_sse42_1D = CPUSpecificParams{{}, {}, {"jit_sse42"}, "jit_sse42"};
const auto conv_avx2_1D = CPUSpecificParams{{}, {}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx512_1D = CPUSpecificParams{{}, {}, {"jit_avx512"}, "jit_avx512"};
const auto conv_ref_2D = CPUSpecificParams{{nchw}, {nchw}, {"ref_any"}, "ref_any"};
const auto conv_ref_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"ref_any"}, "ref_any"};
const auto conv_gemm_2D = CPUSpecificParams{{nchw}, {nchw}, {"gemm_any"}, "jit_gemm"};
const auto conv_gemm_3D = CPUSpecificParams{{ncdhw}, {ncdhw}, {"gemm_any"}, "jit_gemm"};
const auto conv_sse42_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42"}, "jit_sse42"};
const auto conv_sse42_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_sse42_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_sse42_dw"}, "jit_sse42_dw"};
const auto conv_avx2_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2"}, "jit_avx2"};
const auto conv_avx2_dw_2D = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx2_dw_3D = CPUSpecificParams{{nCdhw8c}, {nCdhw8c}, {"jit_avx2_dw"}, "jit_avx2_dw"};
const auto conv_avx512_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512"}, "jit_avx512"};
const auto conv_avx512_dw_2D = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_avx512_dw_3D = CPUSpecificParams{{nCdhw16c}, {nCdhw16c}, {"jit_avx512_dw"}, "jit_avx512_dw"};
const auto conv_sse42_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_sse42_1x1"}, "jit_sse42_1x1"};
const auto conv_avx2_2D_1x1 = CPUSpecificParams{{nChw8c}, {nChw8c}, {"jit_avx2_1x1"}, "jit_avx2_1x1"};
const auto conv_avx512_2D_1x1 = CPUSpecificParams{{nChw16c}, {nChw16c}, {"jit_avx512_1x1"}, "jit_avx512_1x1"};
// utility functions
std::vector<CPUSpecificParams> filterCPUSpecificParams(std::vector<CPUSpecificParams>& paramsVector);
std::vector<CPUSpecificParams> filterCPUInfoForDevice(std::vector<CPUSpecificParams> CPUParams);

@ -1 +1 @@
Subproject commit a81b4753105bb0a1622790256b02f19916cce77c
Subproject commit 2f19a90c0273415e832520264c23d365b2dc43ed