[IE CLDNN] Changed weights layout used in the plugin (#3858)

Before this patch constant with weights could be not detected if
it wasn't directly connected to Conv/Deconv layer.
Now weights always uses common data format (bfzyx) in the plugin which is
converted into weights format later (goiyx, oiyx, etc), so weights sub-graph
can now contain anything
This commit is contained in:
Vladimir Paramuzov 2021-01-19 13:06:20 +03:00 committed by GitHub
parent e88de8f822
commit 9cc58fc66a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 506 additions and 199 deletions

View File

@ -139,7 +139,6 @@ inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) {
}
}
inline cldnn::format DefaultFormatForDims(size_t dimensions) {
switch (dimensions) {
case 0:
@ -159,4 +158,29 @@ inline cldnn::format DefaultFormatForDims(size_t dimensions) {
return cldnn::format::bfyx; // Should not get here
}
// This helper function is needed to convert permute order from IE format (bfyx) into cldnn format (bfxy)
inline std::vector<uint16_t> ConvertPermuteOrder(const std::vector<uint16_t>& ie_order, size_t rank = 0) {
std::vector<uint16_t> ie_order_aligned = ie_order;
// if order size is less than 4 - fill the rest with just copy
rank = std::max(rank, (size_t)4);
for (auto o = ie_order_aligned.size(); o < rank; o++)
ie_order_aligned.push_back((uint16_t)o);
std::vector<uint16_t> cldnn_order;
// 1. Switch permute order values for spatial dims
for (auto const& o : ie_order_aligned) {
if (o >= 2)
cldnn_order.push_back(1 + ie_order_aligned.size() - o);
else
cldnn_order.push_back(o);
}
// 2. Swap spatial positions
for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
}
return cldnn_order;
}
} // namespace CLDNNPlugin

View File

@ -327,7 +327,7 @@ void Program::InitProfileInfo(const std::string& layerName,
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
std::list<std::shared_ptr<ngraph::Node>> nodes_to_process = { node };
while (!nodes_to_process.empty()) {
auto& current_node = nodes_to_process.front();
auto current_node = nodes_to_process.front();
nodes_to_process.pop_front();
for (size_t i = 0; i < current_node->get_input_size(); i++) {

View File

@ -33,25 +33,24 @@ static ConstProperties getConstProperties(const std::shared_ptr<ngraph::op::Cons
for (auto& t : outTensors) {
auto outOp = t.get_node();
if (dynamic_cast<ngraph::op::v1::Convolution*>(outOp)) {
return {true, false, false};
return {t.get_index() == 1, false, false};
} else if (dynamic_cast<ngraph::op::v1::BinaryConvolution*>(outOp)) {
return {true, false, false};
return {t.get_index() == 1, false, false};
} else if (auto castedOp = dynamic_cast<ngraph::op::v1::DeformableConvolution*>(outOp)) {
return {true, castedOp->get_group() > 1, false};
return {t.get_index() == 2, castedOp->get_group() > 1, false};
} else if (dynamic_cast<ngraph::op::v1::GroupConvolution*>(outOp)) {
return {true, true, false};
return {t.get_index() == 1, true, false};
} else if (dynamic_cast<ngraph::op::v1::ConvolutionBackpropData*>(outOp)) {
return {true, false, true};
return {t.get_index() == 1, false, true};
} else if (dynamic_cast<ngraph::op::v1::GroupConvolutionBackpropData*>(outOp)) {
return {true, true, true};
return {t.get_index() == 1, true, true};
}
}
}
return {false, false, false};
}
void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
auto constDims = op->get_shape();
static cldnn::tensor getConstTensor(const ngraph::Shape constDims) {
cldnn::tensor constTensor;
switch (constDims.size()) {
case 6: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
@ -75,6 +74,12 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
break;
default: THROW_IE_EXCEPTION << "Invalid constant blob dimensions";
}
return constTensor;
}
void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
auto constDims = op->get_shape();
cldnn::tensor constTensor = getConstTensor(constDims);
// WA to inconsistency between input and const 1d tensors
// For Concat along batch we go with batch interpretation
@ -119,45 +124,37 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
auto constFormat = DefaultFormatForDims(op->get_output_shape(0).size());
auto prop = getConstProperties(op);
if (prop.isWeights) {
// Deconvolution has reversed channels order (io instead of oi)
if (prop.reversedChannelsOrder) {
if (prop.hasGroupDimension) {
switch (op->get_output_shape(0).size()) {
case 5: constFormat = cldnn::format::gioyx; break;
case 6: constFormat = cldnn::format::giozyx; break;
}
} else {
switch (op->get_output_shape(0).size()) {
case 4: constFormat = cldnn::format::ioyx; break;
case 5: constFormat = cldnn::format::iozyx; break;
}
}
} else {
if (prop.hasGroupDimension) {
switch (op->get_output_shape(0).size()) {
case 5: constFormat = cldnn::format::goiyx; break;
case 6: constFormat = cldnn::format::goizyx; break;
}
} else {
switch (op->get_output_shape(0).size()) {
case 4: constFormat = cldnn::format::oiyx; break;
case 5: constFormat = cldnn::format::oizyx; break;
}
}
}
std::vector<cldnn::tensor::value_type> dims(constDims.begin(), constDims.end());
for (size_t i = dims.size(); i < 4; i++) {
dims.push_back(1);
}
constTensor = cldnn::tensor(constFormat, dims);
}
// If constDims has a dimension = 0, then create tensor with single value
// TODO: check if dim=0 is a valid case
if (std::accumulate(constDims.begin(), constDims.end(), 1, std::multiplies<size_t>()) == 0)
constTensor = cldnn::tensor{1};
// Swap O and I dimensions to match expected deconvolution weights format
bool swap_oi = prop.isWeights && prop.reversedChannelsOrder;
size_t inputFeatureElements = 1;
size_t outputFeatureElements = 1;
size_t groups = 1;
if (swap_oi) {
size_t expected_min_rank = 2 + (prop.hasGroupDimension ? 1 : 0);
if (expected_min_rank > constDims.size())
THROW_IE_EXCEPTION << "Invalid constant properties or shape";
auto newDims = constDims;
if (prop.hasGroupDimension) {
std::swap(newDims[2], newDims[1]);
inputFeatureElements = newDims[2];
outputFeatureElements = newDims[1];
groups = newDims[0];
} else {
std::swap(newDims[1], newDims[0]);
inputFeatureElements = newDims[1];
outputFeatureElements = newDims[0];
groups = 1;
}
constTensor = getConstTensor(newDims);
}
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)),
constFormat,
constTensor);
@ -176,7 +173,30 @@ void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant
auto buf = tmpPointer.data();
auto bufSize = constLayout.bytes_count();
std::memcpy(&buf[0], &data[0], bufSize);
// Do actual weights reorder and change O and I channels order
if (swap_oi) {
auto elementSize = cldnn::data_type_traits::size_of(constLayout.data_type);
size_t spatial_dim_off = prop.hasGroupDimension ? 3 : 2;
size_t featureSize = elementSize;
for (size_t i = spatial_dim_off; i < constDims.size(); i++) {
featureSize *= constDims[i];
}
for (size_t g = 0; g < groups; g++) {
for (size_t i = 0; i < inputFeatureElements; i++) {
for (size_t o = 0; o < outputFeatureElements; o++) {
size_t outputShift = ((g*outputFeatureElements + o)*inputFeatureElements + i)*featureSize;
size_t inputShift = ((g*inputFeatureElements + i)*outputFeatureElements + o)*featureSize;
for (size_t b = 0; b < featureSize; b++) {
buf[outputShift + b] = data[inputShift + b];
}
}
}
}
} else {
std::memcpy(&buf[0], &data[0], bufSize);
}
p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
p.blobMemCache[data] = initialconstPrimID;
constPrimID = initialconstPrimID;

View File

@ -16,7 +16,7 @@
#include "api/convolution.hpp"
#include "api/deconvolution.hpp"
#include "api/binary_convolution.hpp"
#include "api/reshape.hpp"
#include "api/permute.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
@ -71,40 +71,9 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::
auto outDims = op->get_output_shape(0);
auto outPrecision = op->get_output_element_type(0);
auto weightsName = inputs[1];
std::vector<cldnn::primitive_id> weights = {inputs[1]};
const bool weights_have_group_dim = true;
// WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
// Dimensions order is GOIYZ, but
// the selected format is OIZYX by default.
if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) == nullptr) {
std::string reshapeName = layerName + "_cldnn_weights_reshape";
std::string reorderName = layerName + "_cldnn_weights_reorder";
auto weights_shape = op->get_input_shape(1);
std::vector<size_t> new_weights_shape;
new_weights_shape.push_back(weights_shape[0] * weights_shape[1]); // Merged G and O dims
for (size_t i = 2; i < weights_shape.size(); i++) {
new_weights_shape.push_back(weights_shape[i]);
}
auto reshapePrim = cldnn::reshape(reshapeName,
weightsName,
CldnnTensorFromIEDims(new_weights_shape));
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
auto reorderPrim = cldnn::reorder(reorderName,
reshapeName,
DefaultFormatForDims(new_weights_shape.size()),
DataTypeFromPrecision(op->get_input_element_type(1)));
p.AddPrimitive(reorderPrim);
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
weightsName = reorderName;
}
std::vector<cldnn::primitive_id> weights = {weightsName};
auto convPrim = cldnn::convolution(layerName,
inputs[0],
weights,
@ -114,7 +83,8 @@ void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
DataTypeFromPrecision(outPrecision));
DataTypeFromPrecision(outPrecision),
weights_have_group_dim);
p.AddPrimitive(convPrim);
p.AddPrimitiveToProfiler(op);
@ -130,6 +100,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convo
auto outPrecision = op->get_output_element_type(0);
std::vector<cldnn::primitive_id> weights = {inputs[1]};
const bool weights_have_group_dim = false;
auto convPrim = cldnn::convolution(layerName,
inputs[0],
weights,
@ -139,7 +111,8 @@ void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convo
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
DataTypeFromPrecision(outPrecision));
DataTypeFromPrecision(outPrecision),
weights_have_group_dim);
p.AddPrimitive(convPrim);
p.AddPrimitiveToProfiler(op);
@ -159,25 +132,30 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
}
auto weightsName = inputs[1];
// WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
auto weights_node = op->get_input_node_shared_ptr(1);
// WA: For the cases like Const(weights)->Sub(zp)->Deconv.
// Dimensions order of weights blob is IOYX, but
// the selected format is OIYX by default. So we need to swap I and O dimensions to match the format
if (IsNodeOnConstPath(op->get_input_node_shared_ptr(1))) {
std::string reshapeName = layerName + "_cldnn_weights_reshape";
auto weights_shape = op->get_input_shape(1);
std::swap(weights_shape[0], weights_shape[1]);
auto reshapePrim = cldnn::reshape(reshapeName,
// the selected format is OIYX by default. So we need to swap (and transpose) I and O dimensions to match the format
// For Constant node on input transpose is not needed, because the data is transposed on const node creation
if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
std::string permuteName = layerName + "_cldnn_weights_permute";
auto weights_rank = op->get_input_shape(1).size();
std::vector<uint16_t> permute_order(weights_rank);
std::iota(std::begin(permute_order), std::end(permute_order), 0);
// Should be 1, 0, 2, 3 {, 4} to swap O and I
std::swap(permute_order[1], permute_order[0]);
auto permutePrim = cldnn::permute(permuteName,
weightsName,
CldnnTensorFromIEDims(weights_shape));
ConvertPermuteOrder(permute_order, weights_rank));
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
p.AddPrimitive(permutePrim);
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
weightsName = reshapeName;
weightsName = permuteName;
}
std::vector<cldnn::primitive_id> weights = {weightsName};
const bool weights_have_group_dim = false;
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
auto deconvPrim = cldnn::deconvolution(layerName,
@ -187,9 +165,10 @@ void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::o
params.groups,
params.stride,
params.padding,
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
p.AddPrimitive(deconvPrim);
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
weights_have_group_dim);
p.AddPrimitive(deconvPrim);
p.AddPrimitiveToProfiler(op);
}
@ -201,13 +180,38 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
auto dilations = op->get_dilations();
for (auto d : dilations) {
if (d != 1) {
THROW_IE_EXCEPTION << "Unsupported dilation in ConvolutionBackpropData " << op->get_friendly_name();
THROW_IE_EXCEPTION << "Unsupported dilation in GroupConvolutionBackpropData " << op->get_friendly_name();
}
}
uint32_t groups = op->get_input_shape(1)[0];
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), groups);
std::vector<cldnn::primitive_id> weights = {inputs[1]};
auto weightsName = inputs[1];
auto weights_node = op->get_input_node_shared_ptr(1);
// WA: For the cases like Const(weights)->Sub(zp)->Deconv.
// Dimensions order of weights blob is IOYX, but
// the selected format is OIYX by default. So we need to swap I and O dimensions to match the format.
// For Constant node on input transpose is not needed, because the data is transposed on const node creation
if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast<ngraph::op::v0::Constant>(weights_node) == nullptr) {
std::string permuteName = layerName + "_cldnn_weights_permute";
auto weights_rank = op->get_input_shape(1).size();
std::vector<uint16_t> permute_order(weights_rank);
std::iota(std::begin(permute_order), std::end(permute_order), 0);
// Should be 0, 2, 1, 3, 4 {, 5} to swap O and I
std::swap(permute_order[2], permute_order[1]);
auto permutePrim = cldnn::permute(permuteName,
weightsName,
ConvertPermuteOrder(permute_order, weights_rank));
p.AddPrimitive(permutePrim);
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
weightsName = permuteName;
}
std::vector<cldnn::primitive_id> weights = {weightsName};
const bool weights_have_group_dim = true;
auto deconvPrim = cldnn::deconvolution(layerName,
inputs[0],
@ -216,9 +220,10 @@ void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngra
params.groups,
params.stride,
params.padding,
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
p.AddPrimitive(deconvPrim);
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
weights_have_group_dim);
p.AddPrimitive(deconvPrim);
p.AddPrimitiveToProfiler(op);
}

View File

@ -3,6 +3,7 @@
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/transpose.hpp"
#include "ngraph/op/constant.hpp"
@ -11,23 +12,6 @@
namespace CLDNNPlugin {
template<class Type>
std::vector<Type> GetPermuteOrder(const std::vector<Type>& ie_order, Type value_to_align = 0) {
static_assert(std::is_integral<Type>::value, "Integeral required.");
std::vector<Type> cldnn_order = ie_order;
// 1. Align to min. 4 sizes
if (cldnn_order.size() < 4)
cldnn_order.push_back(value_to_align);
// 2. Swap spatial positions
for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
}
return cldnn_order;
}
void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpose>& op) {
p.ValidateInputs(op, {1, 2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
@ -49,23 +33,7 @@ void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpo
ie_order.push_back((uint16_t)o);
}
// if order size is less than 4 - fill the rest with just copy
for (auto o = ie_order.size(); o < rank; o++)
ie_order.push_back((uint16_t)o);
/*
Because of the cldnn ordering: bfxy, and IE ordering: bfyx
we need to adjust the permute order.
*/
std::vector<uint16_t> cldnn_permute_order;
// 1. Switch permute order values for spatial dims
for (auto const& o : ie_order) {
if (o >= 2)
cldnn_permute_order.push_back(1 + ie_order.size() - o);
else
cldnn_permute_order.push_back(o);
}
cldnn_permute_order = GetPermuteOrder(cldnn_permute_order);
std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(ie_order, rank);
auto permutePrim = cldnn::permute(layerName,
inputPrimitives[0],

View File

@ -34,6 +34,8 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*)",
R"(.*EltwiseLayerTest.*IS=\(2\).*OpType=Mod.*opType=VECTOR.*)",
R"(.*EltwiseLayerTest.*OpType=FloorMod.*netPRC=I64.*)",
// TODO: Issue: 46841
R"(.*(QuantGroupConvBackpropData3D).*)",
// These tests might fail due to accuracy loss a bit bigger than threshold
R"(.*(GRUCellTest).*)",

View File

@ -0,0 +1,80 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "subgraph_tests/quantized_convolution_backprop_data.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace SubgraphTestsDefinitions;
using namespace ngraph::helpers;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32
};
const std::vector<size_t> numOutChannels = {16, 32};
const std::vector<size_t > levels = {256};
const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
/* ============= 2D GroupConvolutionBackpropData ============= */
const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
const auto quantConvBackpropData2DParams = ::testing::Combine(
::testing::ValuesIn(kernels2D),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::AUTO),
::testing::ValuesIn(levels),
::testing::ValuesIn(granularity)
);
INSTANTIATE_TEST_CASE_P(smoke_QuantConvBackpropData2D, QuantConvBackpropDataLayerTest,
::testing::Combine(
quantConvBackpropData2DParams,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes2D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
QuantConvBackpropDataLayerTest::getTestCaseName);
/* ============= 3D ConvolutionBackpropData ============= */
const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
const auto quantConvBackpropData3DParams = ::testing::Combine(
::testing::ValuesIn(kernels3D),
::testing::ValuesIn(strides3D),
::testing::ValuesIn(padBegins3D),
::testing::ValuesIn(padEnds3D),
::testing::ValuesIn(dilations3D),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::AUTO),
::testing::ValuesIn(levels),
::testing::ValuesIn(granularity)
);
INSTANTIATE_TEST_CASE_P(smoke_QuantConvBackpropData3D, QuantConvBackpropDataLayerTest,
::testing::Combine(
quantConvBackpropData3DParams,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes3D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
QuantConvBackpropDataLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,83 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace SubgraphTestsDefinitions;
using namespace ngraph::helpers;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32
};
const std::vector<size_t> numOutChannels = {16, 32};
const std::vector<size_t> numGroups = {2, 8, 16};
const std::vector<size_t > levels = {256};
const std::vector<QuantizationGranularity > granularity = {Pertensor, Perchannel};
/* ============= 2D GroupConvolutionBackpropData ============= */
const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}};
const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
const auto quantGroupConvBackpropData2DParams = ::testing::Combine(
::testing::ValuesIn(kernels2D),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutChannels),
::testing::ValuesIn(numGroups),
::testing::Values(ngraph::op::PadType::AUTO),
::testing::ValuesIn(levels),
::testing::ValuesIn(granularity)
);
INSTANTIATE_TEST_CASE_P(smoke_QuantGroupConvBackpropData2D, QuantGroupConvBackpropDataLayerTest,
::testing::Combine(
quantGroupConvBackpropData2DParams,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes2D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
QuantGroupConvBackpropDataLayerTest::getTestCaseName);
/* ============= 3D GroupConvolutionBackpropData ============= */
const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}};
const std::vector<std::vector<size_t >> kernels3D = {{3, 3, 3}};
const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
const auto quantGroupConvBackpropData3DParams = ::testing::Combine(
::testing::ValuesIn(kernels3D),
::testing::ValuesIn(strides3D),
::testing::ValuesIn(padBegins3D),
::testing::ValuesIn(padEnds3D),
::testing::ValuesIn(dilations3D),
::testing::ValuesIn(numOutChannels),
::testing::ValuesIn(numGroups),
::testing::Values(ngraph::op::PadType::AUTO),
::testing::ValuesIn(levels),
::testing::ValuesIn(granularity)
);
INSTANTIATE_TEST_CASE_P(smoke_QuantGroupConvBackpropData3D, QuantGroupConvBackpropDataLayerTest,
::testing::Combine(
quantGroupConvBackpropData3DParams,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes3D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
QuantGroupConvBackpropDataLayerTest::getTestCaseName);
} // namespace

View File

@ -48,6 +48,12 @@ struct convolution : public primitive_base<convolution> {
/// @param with_activation Enable Relu activation.
/// @param activation_slp Relu activation slope.
/// @param output_size User-defined output data size of the primitive (w/o padding).
/// @param output_type User-defined output data type of the primitive.
/// @param grouped_weights_shape Defines if weights tensor has explicit group dimension.
/// This parameter affects how bfzyx and bfwzyx format on weights is converted:
/// bfzyx -> oizyx (grouped_weights_shape=false) or goiyx (grouped_weights_shape=true)
/// bfwzyx -> error (grouped_weights_shape=false) or goizyx (grouped_weights_shape=true)
/// If weights already have (g)oi(z)yx format, then this flag has no effect
convolution(const primitive_id& id,
const primitive_id& input,
const std::vector<primitive_id>& weights,
@ -58,6 +64,7 @@ struct convolution : public primitive_base<convolution> {
tensor dilation,
tensor output_size,
data_types output_type,
bool grouped_weights_shape,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding, optional_data_type{output_type}),
input_offset(input_offset),
@ -70,6 +77,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -107,6 +115,7 @@ struct convolution : public primitive_base<convolution> {
tensor input_offset,
tensor dilation,
tensor output_size,
bool grouped_weights_shape,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
input_offset(input_offset),
@ -119,6 +128,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(bias),
weights_zero_points(w_zero_point),
@ -163,6 +173,7 @@ struct convolution : public primitive_base<convolution> {
tensor input_offset,
tensor dilation,
tensor output_size,
bool grouped_weights_shape,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding, optional_data_type{output_data_type}),
input_offset(input_offset),
@ -175,6 +186,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(bias),
weights_zero_points(w_zero_point),
@ -220,6 +232,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -264,6 +277,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(padding_above),
padding_below(padding_below),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -310,6 +324,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(padding_above),
padding_below(padding_below),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -343,6 +358,7 @@ struct convolution : public primitive_base<convolution> {
tensor stride = {1, 1, 1, 1},
tensor input_offset = tensor(0),
tensor dilation = {1, 1, 1, 1},
bool grouped_weights_shape = false,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
input_offset(input_offset),
@ -354,6 +370,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -385,6 +402,7 @@ struct convolution : public primitive_base<convolution> {
tensor stride = {1, 1, 1, 1},
tensor input_offset = tensor(0),
tensor dilation = {1, 1, 1, 1},
bool grouped_weights_shape = false,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
input_offset(input_offset),
@ -396,6 +414,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(std::vector<primitive_id>(0)),
weights_zero_points(std::vector<primitive_id>(0)),
@ -437,6 +456,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(padding_above),
padding_below(padding_below),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(std::vector<primitive_id>(0)),
weights_zero_points(std::vector<primitive_id>(0)),
@ -480,6 +500,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(padding_above),
padding_below(padding_below),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(std::vector<primitive_id>(0)),
weights_zero_points(std::vector<primitive_id>(0)),
@ -508,6 +529,7 @@ struct convolution : public primitive_base<convolution> {
tensor stride = {1, 1, 1, 1},
tensor input_offset = tensor(0),
tensor dilation = {1, 1, 1, 1},
bool grouped_weights_shape = false,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
input_offset(input_offset),
@ -519,6 +541,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(std::vector<primitive_id>(0)),
weights_zero_points(std::vector<primitive_id>(0)),
@ -561,6 +584,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -604,6 +628,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(false),
grouped_weights_shape(false),
weights(weights),
bias(std::vector<primitive_id>(0)),
weights_zero_points(std::vector<primitive_id>(0)),
@ -651,6 +676,7 @@ struct convolution : public primitive_base<convolution> {
padding_above(tensor(0)),
padding_below(tensor(0)),
deformable_mode(true),
grouped_weights_shape(false),
weights(weights),
bias(bias),
weights_zero_points(std::vector<primitive_id>(0)),
@ -756,6 +782,8 @@ struct convolution : public primitive_base<convolution> {
tensor padding_below;
/// @param deformable_mode.
bool deformable_mode;
/// @param grouped_weights_shape Defines if weights tensor has explicit group dimension.
bool grouped_weights_shape;
/// @brief List of primitive ids containing weights data.
const primitive_id_arr weights;
/// @brief List of primitive ids containing bias data.

View File

@ -55,6 +55,7 @@ struct deconvolution : public primitive_base<deconvolution> {
stride(stride),
with_output_size(false),
groups(1),
grouped_weights_shape(false),
weights(weights),
bias(bias) {}
/// @brief Constructs deconvolution primitive.
@ -81,6 +82,7 @@ struct deconvolution : public primitive_base<deconvolution> {
stride(stride),
with_output_size(false),
groups(groups),
grouped_weights_shape(false),
weights(weights),
bias(bias) {}
@ -104,6 +106,7 @@ struct deconvolution : public primitive_base<deconvolution> {
stride(stride),
with_output_size(false),
groups(1),
grouped_weights_shape(false),
weights(weights),
bias(std::vector<primitive_id>(0)) {}
@ -129,6 +132,7 @@ struct deconvolution : public primitive_base<deconvolution> {
stride(stride),
with_output_size(false),
groups(groups),
grouped_weights_shape(false),
weights(weights),
bias(std::vector<primitive_id>(0)) {}
@ -157,6 +161,7 @@ struct deconvolution : public primitive_base<deconvolution> {
with_output_size(true),
output_size(output_size),
groups(1),
grouped_weights_shape(false),
weights(weights),
bias(bias) {}
@ -180,6 +185,7 @@ struct deconvolution : public primitive_base<deconvolution> {
tensor stride,
tensor input_offset,
tensor output_size,
bool grouped_weights_shape,
const padding& output_padding = padding())
: primitive_base(id, {input}, output_padding),
input_offset(input_offset),
@ -187,6 +193,7 @@ struct deconvolution : public primitive_base<deconvolution> {
with_output_size(true),
output_size(output_size),
groups(groups),
grouped_weights_shape(grouped_weights_shape),
weights(weights),
bias(bias) {}
@ -213,6 +220,7 @@ struct deconvolution : public primitive_base<deconvolution> {
with_output_size(true),
output_size(output_size),
groups(1),
grouped_weights_shape(false),
weights(weights),
bias(std::vector<primitive_id>(0)) {}
@ -283,6 +291,8 @@ struct deconvolution : public primitive_base<deconvolution> {
tensor output_size;
/// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
uint32_t groups;
/// @param grouped_weights_shape Defines if weights tensor has explicit group dimension.
bool grouped_weights_shape;
/// @brief List of primitive ids containing weights data.
const primitive_id_arr weights;
/// @brief List of primitive ids containing bias data.

View File

@ -615,13 +615,13 @@ public:
*
* @endcode
*/
tensor(value_type batch_num, value_type feature_num, value_type width, value_type height)
tensor(value_type batch_num, value_type feature_num, value_type x, value_type y)
: tensor(1) {
_sizes[0] = batch_num;
_sizes[tensor_batch_dim_max] = feature_num;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
if (batch_num == 0 && feature_num == 0 && width == 0 && height == 0)
_sizes[tensor_batch_dim_max + tensor_feature_dim_max] = x;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = y;
if (batch_num == 0 && feature_num == 0 && x == 0 && y == 0)
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = 0;
}
@ -629,7 +629,7 @@ public:
/// @details Example:
/*! @code
*
tensor my_tensor( 2, 3, 4, 5, 6 ); // b=2, f=3, x=4, y=5, z =6
tensor my_tensor( 2, 3, 4, 5, 6 ); // b=2, f=3, x=4, y=5, z=6
cout << my_tensor.batch[0] << endl; // 2
cout << my_tensor.feature[0] << endl; // 3
cout << "x=" << my_tensor.spatial[0] << endl; // x=4
@ -638,38 +638,37 @@ public:
*
* @endcode
*/
tensor(value_type batch_num, value_type feature_num, value_type width, value_type height, value_type depth)
tensor(value_type batch_num, value_type feature_num, value_type x, value_type y, value_type z)
: tensor(1) {
_sizes[0] = batch_num;
_sizes[tensor_batch_dim_max] = feature_num;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = depth;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max] = x;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = y;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = z;
}
/// @brief Constructs @p tensor.
/// @details Example:
/*! @code
*
tensor my_tensor( 2, 3, 4, 5, 6, 7 ); // b=2, f=3, x=4, y=5, lx= 6, ly =7
tensor my_tensor( 2, 3, 4, 5, 6, 7 ); // b=2, f=3, x=4, y=5, z=6, w=7
cout << my_tensor.batch[0] << endl; // 2
cout << my_tensor.feature[0] << endl; // 3
cout << "x=" << my_tensor.spatial[0] << endl; // x=4
cout << "y=" << my_tensor.spatial[1] << endl; // y=5
cout << "local x=" << my_tensor.local[0] << endl; // local x=6
cout << "loxal y=" << my_tensor.local[1] << endl; // local y=7
cout << "z=" << my_tensor.spatial[2] << endl; // z=6
cout << "w=" << my_tensor.spatial[3] << endl; // w=7
*
* @endcode
*/
tensor(value_type batch_num, value_type feature_num, value_type width,
value_type height, value_type local_x, value_type local_y)
tensor(value_type batch_num, value_type feature_num, value_type x, value_type y, value_type z, value_type w)
: tensor(1) {
_sizes[0] = batch_num;
_sizes[tensor_batch_dim_max] = feature_num;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max] = width;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = height;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max] = local_x;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + tensor_spatial_dim_max + 1] = local_y;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max] = x;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 1] = y;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 2] = z;
_sizes[tensor_batch_dim_max + tensor_feature_dim_max + 3] = w;
}
/// @brief Constructs @p tensor using vector of sizes.

View File

@ -810,6 +810,11 @@ WeightsTensor WeightsTensor::TransformIgnorePadding(WeightsLayout l, WeightsType
vec[Channelndex(l, WeightsChannelName::Z)] = 1;
vec[Channelndex(l, WeightsChannelName::IFM)] = IFM().v;
vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
} else if (g > 1 && src_channels == 5 && dst_channels == 4) {
vec[Channelndex(l, WeightsChannelName::X)] = X().v;
vec[Channelndex(l, WeightsChannelName::Y)] = Y().v;
vec[Channelndex(l, WeightsChannelName::IFM)] = Z().v;
vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v * IFM().v;
} else {
assert(0);
}

View File

@ -54,7 +54,7 @@ CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params,
} else {
dispatchData.gws[0] = output.Batch().v;
dispatchData.gws[1] = params.packed_binary_output ? CeilDiv(output.Feature().v, 32) : output.Feature().v;
dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v, 16);
dispatchData.gws[2] = Align(output.X().v * output.Y().v * output.Z().v * output.W().v, 16);
dispatchData.lws[0] = 1;
dispatchData.lws[1] = 1;

View File

@ -35,6 +35,7 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
k.EnableInputLayout(DataLayout::yxfb);
k.EnableInputLayout(DataLayout::byxf);
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableInputLayout(DataLayout::bfwzyx);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
@ -45,6 +46,7 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::yxfb);
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfwzyx);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);

View File

@ -39,6 +39,12 @@ KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
const int x = zyx % OUTPUT_SIZE_X;
const int y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
const int z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y;
#elif OUTPUT_DIMS == 6
const int wzyx = get_global_id(2);
const int x = wzyx % OUTPUT_SIZE_X;
const int y = (wzyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
const int z = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
const int w = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) / OUTPUT_SIZE_Z;
#endif
#if PACKED_BINARY_OUTPUT
@ -77,37 +83,49 @@ KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
#else
#if INPUT0_DIMS == 5
#if INPUT0_DIMS == 6
const int input_offset = INPUT0_GET_INDEX(b, of, w, z, y, x);
#elif INPUT0_DIMS == 5
const int input_offset = INPUT0_GET_INDEX(b, of, z, y, x);
#elif INPUT0_DIMS <= 4
const int input_offset = INPUT0_GET_INDEX(b, of, y, x);
#endif
#if OUTPUT_DIMS == 5
#if OUTPUT_DIMS == 6
const int output_offset = OUTPUT_GET_INDEX(b, of, w, z, y, x);
#elif OUTPUT_DIMS == 5
const int output_offset = OUTPUT_GET_INDEX(b, of, z, y, x);
#elif OUTPUT_DIMS <= 4
const int output_offset = OUTPUT_GET_INDEX(b, of, y, x);
#endif
#if INPUT1_DIMS == 5
#if INPUT1_DIMS == 6
const int input_low_offset = INPUT1_GET_INDEX_SAFE(b, of, w, z, y, x);
#elif INPUT1_DIMS == 5
const int input_low_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x);
#elif INPUT1_DIMS <= 4
const int input_low_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x);
#endif
#if INPUT2_DIMS == 5
#if INPUT2_DIMS == 6
const int input_high_offset = INPUT2_GET_INDEX_SAFE(b, of, w, z, y, x);
#elif INPUT2_DIMS == 5
const int input_high_offset = INPUT2_GET_INDEX_SAFE(b, of, z, y, x);
#elif INPUT2_DIMS <= 4
const int input_high_offset = INPUT2_GET_INDEX_SAFE(b, of, y, x);
#endif
#if INPUT3_DIMS == 5
#if INPUT3_DIMS == 6
const int output_low_offset = INPUT3_GET_INDEX_SAFE(b, of, w, z, y, x);
#elif INPUT3_DIMS == 5
const int output_low_offset = INPUT3_GET_INDEX_SAFE(b, of, z, y, x);
#elif INPUT3_DIMS <= 4
const int output_low_offset = INPUT3_GET_INDEX_SAFE(b, of, y, x);
#endif
#if INPUT4_DIMS == 5
#if INPUT4_DIMS == 6
const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, w, z, y, x);
#elif INPUT4_DIMS == 5
const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, z, y, x);
#elif INPUT4_DIMS <= 4
const int output_high_offset = INPUT4_GET_INDEX_SAFE(b, of, y, x);

View File

@ -43,14 +43,25 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
const int x = zyx % OUTPUT_SIZE_X;
const int y = (zyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
const int z = (zyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y;
#elif OUTPUT_DIMS == 6
const int wzyx = get_global_id(GWS_YX);
const int x = wzyx % OUTPUT_SIZE_X;
const int y = (wzyx / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y;
const int z = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
const int w = ((wzyx / OUTPUT_SIZE_X) / OUTPUT_SIZE_Y) / OUTPUT_SIZE_Z;
#endif
#if INPUT0_DIMS == 5
#if INPUT0_DIMS == 6
const int input_offset = INPUT0_GET_INDEX(b, of, w, z, y, x);
#elif INPUT0_DIMS == 5
const int input_offset = INPUT0_GET_INDEX(b, of, z, y, x);
#elif INPUT0_DIMS <= 4
const int input_offset = INPUT0_GET_INDEX(b, of, y, x);
#endif
#if OUTPUT_DIMS == 5
#if OUTPUT_DIMS == 6
const int output_offset = OUTPUT_GET_INDEX(b, of, w, z, y, x);
#elif OUTPUT_DIMS == 5
const int output_offset = OUTPUT_GET_INDEX(b, of, z, y, x);
#elif OUTPUT_DIMS <= 4
const int output_offset = OUTPUT_GET_INDEX(b, of, y, x);
@ -61,6 +72,8 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, y, x);
#elif INPUT1_DIMS == 5
const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, z, y, x);
#elif INPUT1_DIMS == 6
const int in_range_offset = INPUT1_GET_INDEX_SAFE(b, of, w, z, y, x);
#endif
#endif
@ -68,6 +81,8 @@ KERNEL(quantize_gpu_scale_shift_opt)(const __global INPUT0_TYPE* input,
const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, y, x);
#elif INPUT7_DIMS == 5
const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, z, y, x);
#elif INPUT7_DIMS == 6
const int scales_offset = INPUT7_GET_INDEX_SAFE(b, of, w, z, y, x);
#endif
#if PER_TENSOR_INPUT_SCALE

View File

@ -217,7 +217,17 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) {
// get output feature map from weights. It should be the same as number of biases. Will be verifed in
// convolution::create()
auto number_of_features = weights_layout.size.batch[0] * weights_layout.size.group[0];
auto group = desc->groups;
int32_t number_of_features = 0;
if (desc->grouped_weights_shape && !format::is_grouped(weights_layout.format)) {
number_of_features = weights_layout.size.feature[0] * static_cast<int32_t>(group);
} else {
if (format::is_grouped(weights_layout.format)) {
number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(group);
} else {
number_of_features = weights_layout.size.batch[0];
}
}
if (desc->with_output_size) {
CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(),
@ -336,6 +346,11 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
auto split = node.get_split();
for (decltype(split) j = 0; j < split; j++) {
auto filter_inst = node.weights(j).get_output_layout(); // convolution filter
auto weights_ifm = filter_inst.size.feature[0];
if (argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
weights_ifm = filter_inst.size.spatial[filter_inst.format.spatial_num() - 1] * argument.groups;
}
if (bias_term()) {
auto bias_inst = node.bias(j).get_output_layout();
CLDNN_ERROR_NOT_EQUAL(node.id(),
@ -406,7 +421,7 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
"Weights feature maps number",
(input_inst.size.feature[0] - input_offset.feature[0]) / split,
"input feature maps number",
filter_inst.size.feature[0],
weights_ifm,
"Weights/ifm mismatch");
}
}

View File

@ -49,7 +49,16 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) {
auto strd = desc->stride;
auto group = desc->groups;
auto number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(group);
int32_t number_of_features = 0;
if (desc->grouped_weights_shape && !format::is_grouped(weights_layout.format)) {
number_of_features = weights_layout.size.feature[0] * static_cast<int32_t>(group);
} else {
if (format::is_grouped(weights_layout.format)) {
number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(group);
} else {
number_of_features = weights_layout.size.batch[0];
}
}
if (desc->with_output_size) {
CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(),
@ -173,6 +182,10 @@ deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_no
for (decltype(split) j = 0; j < split; j++) {
auto filter_inst = node.weights(j).get_output_layout(); // deconvolution filter
auto input_offset = argument.input_offset;
auto weights_ifm = filter_inst.size.feature[0];
if (argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) {
weights_ifm = filter_inst.size.spatial[filter_inst.format.spatial_num() - 1] * argument.groups;
}
if (argument.bias.size() != 0) {
auto bias_inst = node.bias(j).get_output_layout();
@ -241,7 +254,7 @@ deconvolution_inst::typed_primitive_inst(network_impl& network, deconvolution_no
"Weights feature maps number",
(input_inst.size.feature[0] - input_offset.feature[0]) / split,
"input feature maps number",
filter_inst.size.feature[0],
weights_ifm,
"Weights/ifm mimsmatch");
}
}

View File

@ -88,10 +88,8 @@ public:
const auto& deformable_groups = primitive->deformable_groups;
const auto transposed = arg.get_transposed();
assert(arg.get_output_layout().size.feature[0] == weights_layout.size.batch[0] * weights_layout.size.group[0]);
auto conv_params = get_weight_bias_zero_point_default_params<kernel_selector::convolution_params>(
arg, split, 1);
arg, split, 1, primitive->grouped_weights_shape);
auto conv_optional_params =
get_default_weights_bias_optional_params<kernel_selector::convolution_optional_params>(arg.get_program());

View File

@ -81,7 +81,8 @@ public:
auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
arg,
(groups > 1) ? 1 : actual_split,
1);
1,
primitive->grouped_weights_shape);
auto deconv_optional_params =
get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());

View File

@ -164,6 +164,12 @@ attach_quantize_gpu::attach_quantize_gpu() {
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);

View File

@ -90,6 +90,7 @@ void pre_replace_deconv::run(program_impl& p) {
bias_vec.push_back(bias_id);
auto input_offset = deconv_prim->input_offset;
auto output_padding = deconv_prim->output_padding;
auto grouped_weights_shape = deconv_prim->grouped_weights_shape;
// remove deconvolution node and its connections to weights and biases, rename it and move to the optimized
// list
@ -136,6 +137,7 @@ void pre_replace_deconv::run(program_impl& p) {
stride,
input_offset,
tensor{ 1, 1, 1, 1 },
grouped_weights_shape,
output_padding);
p.get_or_create(conv_prim);
} else {
@ -146,6 +148,7 @@ void pre_replace_deconv::run(program_impl& p) {
stride,
input_offset,
tensor{ 1, 1, 1, 1 },
grouped_weights_shape,
output_padding);
p.get_or_create(conv_prim);
}
@ -231,6 +234,7 @@ void pre_replace_deconv::run(program_impl& p) {
tensor stride = { 1, 1, 1, 1 };
tensor input_offset = { 0, 0, -scale_factor, -scale_factor };
auto output_padding = deconv_prim->output_padding;
auto grouped_weights_shape = deconv_prim->grouped_weights_shape;
// remove deconvolution node and its connections to weights and biases,
// rename it and move to the optimized list
@ -309,6 +313,7 @@ void pre_replace_deconv::run(program_impl& p) {
stride,
input_offset,
tensor{ 1, 1, 1, 1 },
grouped_weights_shape,
output_padding);
p.get_or_create(conv_prim);

View File

@ -312,7 +312,8 @@ void prepare_primitive_fusing::fuse_bias(program_impl &p) {
desc->input_offset,
desc->dilation,
conv.get_output_layout().size,
conv.get_output_layout().data_type);
conv.get_output_layout().data_type,
desc->grouped_weights_shape);
conv_with_bias_prim->activations_zero_points = desc->activations_zero_points;
conv_with_bias_prim->weights_zero_points = desc->weights_zero_points;
@ -334,7 +335,8 @@ void prepare_primitive_fusing::fuse_bias(program_impl &p) {
desc->groups,
desc->stride,
desc->input_offset,
deconv.get_output_layout().size);
deconv.get_output_layout().size,
desc->grouped_weights_shape);
auto& new_deconv_node = p.get_or_create(deconv_with_bias_prim);
fuse_bias_f(deconv, new_deconv_node, bias_node, eltw_node);

View File

@ -658,6 +658,7 @@ void prepare_quantization::prepare_asymmetric_quantization(program_impl &p) {
old_conv_prim->input_offset,
old_conv_prim->dilation,
output_size,
old_conv_prim->grouped_weights_shape,
old_conv_prim->output_padding);
auto& new_conv_node = p.get_or_create(new_conv_prim);

View File

@ -106,11 +106,11 @@ kernel_selector::weights_type to_weights_type(data_types dt);
data_types from_weights_type(kernel_selector::weights_type dt);
kernel_selector::data_layout to_data_layout(format f);
cldnn::format from_data_layout(kernel_selector::data_layout l);
kernel_selector::weights_layout to_weights_layout(format f);
kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped);
cldnn::format::type from_weights_layout(kernel_selector::weights_layout l);
kernel_selector::tuning_mode to_tuning_mode(cldnn::tuning_mode mode);
kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split = 1, const tensor view_offset = tensor {});
kernel_selector::weights_tensor convert_weights_tensor(const layout& l);
kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped = false);
layout from_weights_tensor(const kernel_selector::weights_tensor& t);
kernel_selector::activation_function get_kernel_selector_activation_param(activation_func activation_func);
@ -192,10 +192,10 @@ inline params_t get_default_params(const arg_t& arg, uint32_t split = 1) {
}
template <typename params_t, typename arg_t>
inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1) {
inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1, bool has_group_dimension = false) {
params_t params = get_default_params<params_t>(arg, split);
const auto& weights_layout = arg.weights().get_output_layout();
params.weights = convert_weights_tensor(weights_layout);
params.weights = convert_weights_tensor(weights_layout, has_group_dimension);
if (arg.bias_term()) {
auto bias_layout = arg.bias().get_output_layout();
@ -210,8 +210,8 @@ inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split
}
template <typename params_t, typename arg_t>
params_t get_weight_bias_zero_point_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1) {
params_t params = get_weights_bias_default_params<params_t>(arg, split, groups);
params_t get_weight_bias_zero_point_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1, bool has_group_dimension = false) {
params_t params = get_weights_bias_default_params<params_t>(arg, split, groups, has_group_dimension);
if (arg.weights_zero_points_term()) {
params.weights_zero_points.push_back(

View File

@ -200,7 +200,7 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) {
}
}
kernel_selector::weights_layout to_weights_layout(format f) {
kernel_selector::weights_layout to_weights_layout(format f, bool is_grouped) {
switch (f) {
case format::bfyx:
case format::oiyx:
@ -271,6 +271,12 @@ kernel_selector::weights_layout to_weights_layout(format f) {
case format::os_is_y_x8_osv8_isv4_swizzled_by_4:
return kernel_selector::weights_layout::os_is_y_x8_osv8_isv4_swizzled_by_4;
case format::bfzyx:
return is_grouped ? kernel_selector::weights_layout::goiyx : kernel_selector::weights_layout::oizyx;
case format::bfwzyx: {
if (!is_grouped)
throw std::runtime_error("Invalid conversion of data format to weights format. bfwzyx can't be non-grouped as 4D spatials are not supported");
return kernel_selector::weights_layout::goizyx;
}
case format::oizyx:
return kernel_selector::weights_layout::oizyx;
case format::iozyx:
@ -593,10 +599,10 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
return kernel_selector::data_tensor(vec, to_data_type(l.data_type), ks_layout);
}
kernel_selector::weights_tensor convert_weights_tensor(const layout& l) {
kernel_selector::weights_tensor convert_weights_tensor(const layout& l, bool is_grouped) {
const auto& t = l.size.sizes(l.format);
const auto ks_type = to_weights_type(l.data_type);
const auto ks_layout = to_weights_layout(l.format);
const auto ks_layout = to_weights_layout(l.format, is_grouped);
std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(ks_layout));
for (size_t i = 0; i < vec.size(); i++) {

View File

@ -1132,7 +1132,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) {
input_layout("input", input.get_layout()),
data("weights_1", weights_1),
data("biases_1", biases_1),
convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32));
convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32, true));
network network(engine, topology);
network.set_input_data("input", input);
@ -4295,7 +4295,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an
data("a_zp", a_zp),
data("w_zp", w_zp),
convolution("conv", "input", { "weights" }, { "biases" }, { "w_zp" }, { "a_zp" }, 1, data_types::f32,
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options opts;
@ -4366,7 +4366,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
data("biases", biases),
data("a_zp", a_zp),
convolution("conv", "input", { "weights" }, { "biases" }, { }, { "a_zp" }, 1, data_types::f32,
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options opts;
@ -4451,7 +4451,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
data("biases", biases),
data("a_zp", a_zp),
convolution("conv", "input", { "weights" }, { "biases" }, { }, { "a_zp" }, 1, data_types::f32,
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options opts;
@ -4552,7 +4552,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio
activation("activation", "input", activation_func::relu), // needed just to add padding
eltwise("in", {"activation", "a_zp"}, eltwise_mode::sub, data_types::f32),
convolution("conv", "in", { "weights" }, { "biases" }, 1,
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, data_types::f32),
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, data_types::f32, false),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options opts;
@ -4623,7 +4623,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p
data("biases", biases),
data("w_zp", w_zp),
convolution("conv", "input", { "weights" }, { "biases" }, { "w_zp" }, { }, 1, data_types::f32,
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}),
tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}, false),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options opts;
@ -7018,7 +7018,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa
reorder("input_reordered", "input", reordered_input_layout),
data("weights", weights),
data("bias", bias),
convolution("conv", "input_reordered", { "weights" }, { "bias" }, num_groups, stride, input_offset, dilation, output_size, data_types::f32),
convolution("conv", "input_reordered", { "weights" }, { "bias" }, num_groups, stride, input_offset, dilation, output_size, data_types::f32, true),
reorder("out", "conv", format::bfyx, data_types::f32));
build_options options;
@ -7440,7 +7440,8 @@ TEST_P(convolution_grouped_gpu, base) {
stride_tensor,
tensor(batch(0), feature(0), spatial(-input_offset_x, -input_offset_y, -input_offset_z, 0)),
tensor(batch(1), feature(1), spatial(1, 1, 1, 1)),
ref_conv_out_size),
ref_conv_out_size,
true),
reorder("out", "conv", {data_types::f32, format::bfzyx, ref_conv_out_size}));
if (has_input_zp)

View File

@ -512,9 +512,9 @@ public:
#define CASE_CONV_FP32_8 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_9 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_10 {32, 16, 4, 5, 4}, {32, 32, 4, 5, 4}, {1, 1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bs_fs_zyx_bsv16_fsv16, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_13 {1, 16, 18, 5, 4}, {1, 16, 16, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_13 {1, 16, 18, 5, 4}, {1, 16, 16, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_FP32_14 {1, 3, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_CONV_FP16_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx, data_types::f16, format::bfyx
@ -527,8 +527,8 @@ public:
#define CASE_CONV_FP16_8 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_9 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_10 {32, 16, 4, 5, 4}, {32, 32, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::bs_fs_zyx_bsv16_fsv16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_11 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_12 {1, 16, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f16, format::b_fs_zyx_fsv16, data_types::f16, format::g_os_is_zyx_isv16_osv16, data_types::f16, format::bfzyx
#define CASE_CONV_FP16_13 {16, 32, 4, 5}, {16, 64, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f16, format::fs_b_yx_fsv32, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_CONV_U8S8_1 {1, 15, 4, 5}, {1, 30, 2, 3}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
@ -571,10 +571,10 @@ public:
#define CASE_CONV_ELTW_FP32_2 {1, 16, 4, 5}, {1, 32, 2, 3}, {1, 1, 1, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::os_is_yx_isv16_osv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_FP32_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::os_is_yx_isv16_osv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_FP32_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 32, 1, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::gs_oiyx_gsv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_FP32_5 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 32, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_ELTW_FP32_6 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 16, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_ELTW_FP32_5 {1, 32, 4, 5, 4}, {1, 32, 2, 3, 2}, {1, 32, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_ELTW_FP32_6 {1, 32, 4, 5, 4}, {1, 16, 2, 3, 2}, {1, 16, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_ELTW_FP32_7 {1, 16, 3, 5}, {1, 32, 1, 3}, {1, 32, 3, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::os_is_yx_isv16_osv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_FP32_8 {1, 32, 3, 5, 4}, {1, 16, 1, 3, 2}, {1, 1, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_ELTW_FP32_8 {1, 32, 3, 5, 4}, {1, 16, 1, 3, 2}, {1, 1, 2, 1, 1}, {1, 1, 3, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
#define CASE_CONV_ELTW_i8_1 {1, 16, 3, 5}, {1, 32, 1, 3}, {1, 32, 3, 1}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::os_is_yx_osv16_isv16, data_types::f32, format::bfyx
#define CASE_CONV_ELTW_i8_2 {1, 16, 3, 5, 3}, {1, 32, 2, 4, 2}, {1, 1, 2, 4, 2}, {1, 1, 2, 2, 2}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfzyx, data_types::i8, format::oiyx, data_types::f32, format::bfzyx
@ -2214,7 +2214,7 @@ TEST_P(conv_int8_asymmetric_weights, basic) {
data("bias", get_mem(get_bias_layout(p))),
data("w_zp", get_mem(get_weights_zp_layout(p), 1, 127)),
eltwise("w_sub", {"weights", "w_zp"}, eltwise_mode::sub, data_types::f32),
convolution("conv_prim", "input", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32),
convolution("conv_prim", "input", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32, false),
reorder("reorder_bfyx", "conv_prim", p.default_format, data_types::f32)
);
tolerance = 1.f;
@ -2281,7 +2281,7 @@ TEST_P(conv_int8_asymmetric_data, basic) {
data("bias", get_mem(get_bias_layout(p))),
data("a_zp", get_mem(get_activations_zp_layout(p), 1, 127)),
eltwise("a_sub", {"input", "a_zp"}, eltwise_mode::sub, data_types::f32),
convolution("conv_prim", "a_sub", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32),
convolution("conv_prim", "a_sub", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32, false),
reorder("reorder_bfyx", "conv_prim", p.default_format, data_types::f32)
);
tolerance = 1.f;
@ -2352,7 +2352,7 @@ TEST_P(conv_int8_asymmetric_data_and_weights, basic) {
data("w_zp", get_mem(get_weights_zp_layout(p), 1, 127)),
eltwise("a_sub", {"input", "a_zp"}, eltwise_mode::sub, data_types::f32),
eltwise("w_sub", {"weights", "w_zp"}, eltwise_mode::sub, data_types::f32),
convolution("conv_prim", "a_sub", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32),
convolution("conv_prim", "a_sub", {"w_sub"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation, p.out_shape, data_types::f32, false),
reorder("reorder_bfyx", "conv_prim", p.default_format, data_types::f32)
);
tolerance = 1.f;
@ -3980,7 +3980,7 @@ using deconv_test_params = bc_test_params;
#define CASE_DECONV_S8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1}, tensor{1}, 32, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_S8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::i8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
@ -3989,7 +3989,7 @@ using deconv_test_params = bc_test_params;
#define CASE_DECONV_U8S8_1 {1, 15, 4, 5}, {1, 30, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_2 {1, 16, 4, 5}, {1, 32, 6, 7}, {1, 1, 3, 3}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_3 {1, 16, 4, 5}, {1, 32, 4, 5}, {1, 1, 1, 1}, tensor{1}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1, 0, 0}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_4 {1, 32, 4, 5}, {1, 32, 4, 5}, {1, 1, 3, 3}, tensor{1}, tensor{0, 0, -1, -1}, tensor{1}, 32, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::goiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_5 {1, 15, 4, 5}, {1, 30, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_6 {1, 16, 4, 5}, {1, 32, 9, 11}, {1, 1, 3, 3}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_DECONV_U8S8_7 {1, 16, 4, 5}, {1, 32, 7, 9}, {1, 1, 1, 1}, tensor{1, 1, 2, 2}, tensor{0}, tensor{1}, 1, data_types::u8, format::b_fs_yx_fsv16, data_types::i8, format::oiyx, data_types::f32, format::bfyx