[GPU] Align permute axis format with IE (#11379)
This commit is contained in:
committed by
GitHub
parent
d879e34363
commit
afdaa7cf89
@@ -169,31 +169,6 @@ inline cldnn::format DefaultFormatForDims(size_t dimensions) {
|
||||
return cldnn::format::bfyx; // Should not get here
|
||||
}
|
||||
|
||||
// This helper function is needed to convert permute order from IE format (bfyx) into cldnn format (bfxy)
|
||||
inline std::vector<uint16_t> ConvertPermuteOrder(const std::vector<uint16_t>& ie_order, size_t rank = 0) {
|
||||
std::vector<uint16_t> ie_order_aligned = ie_order;
|
||||
// if order size is less than 4 - fill the rest with just copy
|
||||
rank = std::max(rank, (size_t)4);
|
||||
for (auto o = ie_order_aligned.size(); o < rank; o++)
|
||||
ie_order_aligned.push_back((uint16_t)o);
|
||||
|
||||
std::vector<uint16_t> cldnn_order;
|
||||
// 1. Switch permute order values for spatial dims
|
||||
for (auto const& o : ie_order_aligned) {
|
||||
if (o >= 2)
|
||||
cldnn_order.push_back(1 + ie_order_aligned.size() - o);
|
||||
else
|
||||
cldnn_order.push_back(o);
|
||||
}
|
||||
|
||||
// 2. Swap spatial positions
|
||||
for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
|
||||
std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
|
||||
}
|
||||
|
||||
return cldnn_order;
|
||||
}
|
||||
|
||||
inline InferenceEngine::Layout InferenceEngineLayoutFromOVLayout(ov::Layout l) {
|
||||
if (l == ov::Layout("C")) return InferenceEngine::Layout::C;
|
||||
if (l == ov::Layout("CN")) return InferenceEngine::Layout::CN;
|
||||
|
||||
@@ -35,7 +35,7 @@ struct permute : public primitive_base<permute> {
|
||||
const std::vector<uint16_t>& permute_order = {},
|
||||
const primitive_id& ext_prim_id = "",
|
||||
const padding& output_padding = padding())
|
||||
: primitive_base(id, {input}, ext_prim_id, output_padding), permute_order(permute_order) {}
|
||||
: primitive_base(id, {input}, ext_prim_id, output_padding), permute_order(permute_order) { }
|
||||
|
||||
/// @brief Array of permuted output order in bfyx format.
|
||||
std::vector<uint16_t> permute_order;
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "lstm_inst.h"
|
||||
#include "reshape_inst.h"
|
||||
#include "resample_inst.h"
|
||||
#include "permute_inst.h"
|
||||
#include "depth_to_space_inst.h"
|
||||
#include "lstm_dynamic_inst.h"
|
||||
#include "lstm_dynamic_input_inst.h"
|
||||
|
||||
@@ -15,6 +15,33 @@ using namespace cldnn;
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
// This helper function is needed to convert permute order from IE format (bfyx) into cldnn format (bfxy)
|
||||
inline std::vector<uint16_t> convert_permute_order(const std::vector<uint16_t>& ie_order, size_t rank = 0) {
|
||||
std::vector<uint16_t> ie_order_aligned = ie_order;
|
||||
// if order size is less than 4 - fill the rest with just copy
|
||||
rank = std::max(rank, (size_t)4);
|
||||
for (auto o = ie_order_aligned.size(); o < rank; o++)
|
||||
ie_order_aligned.push_back((uint16_t)o);
|
||||
|
||||
std::vector<uint16_t> cldnn_order;
|
||||
// 1. Switch permute order values for spatial dims
|
||||
for (auto const& o : ie_order_aligned) {
|
||||
if (o >= 2)
|
||||
cldnn_order.push_back(1 + ie_order_aligned.size() - o);
|
||||
else
|
||||
cldnn_order.push_back(o);
|
||||
}
|
||||
|
||||
// 2. Swap spatial positions
|
||||
for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
|
||||
std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
|
||||
}
|
||||
|
||||
return cldnn_order;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
using parent = typed_primitive_impl_ocl<permute>;
|
||||
using parent::parent;
|
||||
@@ -28,7 +55,8 @@ struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
auto permute_optional_params =
|
||||
get_default_optional_params<kernel_selector::permute_optional_params>(arg.get_program());
|
||||
|
||||
const auto& permute_order = arg.get_primitive()->permute_order;
|
||||
auto in_rank = arg.get_dependency(0).get_output_layout().get_rank();
|
||||
auto permute_order = convert_permute_order(arg.get_primitive()->permute_order, in_rank);
|
||||
permute_params.order = permute_order;
|
||||
auto& kernel_selector = kernel_selector::permute_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(permute_params, permute_optional_params);
|
||||
|
||||
@@ -376,7 +376,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
auto& permute_order = next.as<permute>().get_primitive()->permute_order;
|
||||
if ((fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::b_fs_yx_fsv32 || fmt_prev == format::b_fs_zyx_fsv32 ||
|
||||
fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::b_fs_zyx_fsv16 || fmt_prev == format::bs_fs_yx_bsv16_fsv16)
|
||||
&& permute_order[1] == 2
|
||||
&& permute_order.back() != 1
|
||||
&& (!next.as<permute>().is_rotating_except_batch())) {
|
||||
return false;
|
||||
}
|
||||
@@ -428,7 +428,7 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node
|
||||
auto& permute_order = prev.as<permute>().get_primitive()->permute_order;
|
||||
if ((fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::b_fs_yx_fsv32 || fmt_prev == format::b_fs_zyx_fsv32 ||
|
||||
fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::b_fs_zyx_fsv16 || fmt_prev == format::bs_fs_yx_bsv16_fsv16)
|
||||
&& permute_order[1] == 2
|
||||
&& permute_order.back() != 1
|
||||
&& (!prev.as<permute>().is_rotating_except_batch())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -24,20 +24,26 @@ layout permute_inst::calc_output_layout(permute_node const& node) {
|
||||
"Output data type forcing is not supported for permute_node!");
|
||||
auto input_layout = node.input().get_output_layout();
|
||||
auto permute_order = node.get_primitive()->permute_order;
|
||||
std::vector<tensor::value_type> output_sizes;
|
||||
std::vector<tensor::value_type> output_shape;
|
||||
|
||||
auto input_shape = input_layout.get_dims();
|
||||
|
||||
for (size_t x = 0; x < permute_order.size(); x++) {
|
||||
output_sizes.push_back(input_layout.size.raw[permute_order[x]]);
|
||||
output_shape.push_back(input_shape[permute_order[x]]);
|
||||
}
|
||||
|
||||
auto input_size = tensor(output_sizes);
|
||||
for (size_t i = output_shape.size(); i < 4; i++) {
|
||||
output_shape.push_back(1);
|
||||
}
|
||||
|
||||
auto output_size = tensor(format::get_default_format(input_layout.get_rank()), output_shape);
|
||||
auto op = node.get_primitive()->output_padding;
|
||||
|
||||
if (node.has_fused_primitives()) {
|
||||
input_layout.data_type = node.get_fused_output_layout().data_type;
|
||||
}
|
||||
|
||||
return layout(input_layout.data_type, input_layout.format, input_size, op);
|
||||
return layout(input_layout.data_type, input_layout.format, output_size, op);
|
||||
}
|
||||
|
||||
std::string permute_inst::to_string(permute_node const& node) {
|
||||
@@ -67,13 +73,6 @@ std::string permute_inst::to_string(permute_node const& node) {
|
||||
permute_inst::typed_primitive_inst(network& network, permute_node const& node) : parent(network, node) {
|
||||
auto permute_order = argument.permute_order;
|
||||
|
||||
CLDNN_ERROR_LESS_THAN(node.id(),
|
||||
"Permute order size",
|
||||
permute_order.size(),
|
||||
"minimum order size",
|
||||
4,
|
||||
"Permute order size needs to be at least 4.");
|
||||
|
||||
auto required_order_values_size = static_cast<uint32_t>(permute_order.size());
|
||||
|
||||
for (decltype(required_order_values_size) i = 0; i < required_order_values_size; i++) {
|
||||
|
||||
@@ -127,7 +127,7 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ng
|
||||
std::swap(permute_order[1], permute_order[0]);
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
weightsName,
|
||||
ConvertPermuteOrder(permute_order, weights_rank),
|
||||
permute_order,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(permutePrim);
|
||||
@@ -191,7 +191,7 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p
|
||||
std::swap(permute_order[2], permute_order[1]);
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
weightsName,
|
||||
ConvertPermuteOrder(permute_order, weights_rank),
|
||||
permute_order,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(permutePrim);
|
||||
|
||||
@@ -87,11 +87,10 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
for (auto o = transpose_order.size(); o < 4; o++)
|
||||
transpose_order.push_back((uint16_t)o);
|
||||
|
||||
std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(transpose_order);
|
||||
auto permuteName = op->get_friendly_name() + "/transpose_b";
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
weightsName,
|
||||
cldnn_permute_order,
|
||||
transpose_order,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(permutePrim);
|
||||
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
|
||||
@@ -107,11 +106,10 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
for (auto o = transpose_order.size(); o < 4; o++)
|
||||
transpose_order.push_back((uint16_t)o);
|
||||
|
||||
std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(transpose_order);
|
||||
auto permuteName = op->get_friendly_name() + "/transpose_a";
|
||||
auto permutePrim = cldnn::permute(permuteName,
|
||||
inputName,
|
||||
cldnn_permute_order,
|
||||
transpose_order,
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(permutePrim);
|
||||
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
|
||||
|
||||
@@ -20,13 +20,13 @@ static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
std::string layerName = layer_type_name_ID(op);
|
||||
|
||||
std::vector<uint16_t> ie_order;
|
||||
std::vector<uint16_t> order;
|
||||
if (op->get_input_size() == 2) {
|
||||
auto order_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
|
||||
if (!order_constant) {
|
||||
IE_THROW() << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
|
||||
}
|
||||
ie_order = order_constant->cast_vector<uint16_t>();
|
||||
order = order_constant->cast_vector<uint16_t>();
|
||||
}
|
||||
|
||||
auto is_convert_color_type = [](const std::shared_ptr<ov::Node> &node) {
|
||||
@@ -40,7 +40,7 @@ static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::
|
||||
// In case of ConvertColor operation we have NHWC (byxf) input format which should be converted to
|
||||
// NCHW (bfyx) by this Permute, so we replace Permute with Reorder (to bfyx) primitve
|
||||
auto input = op->input(0).get_source_output().get_node_shared_ptr();
|
||||
if (is_convert_color_type(input) && ie_order == std::vector<uint16_t>{0, 3, 1, 2}) {
|
||||
if (is_convert_color_type(input) && order == std::vector<uint16_t>{0, 3, 1, 2}) {
|
||||
auto precision = input->get_element_type();
|
||||
p.AddPrimitive(cldnn::reorder(layerName,
|
||||
inputPrimitives[0],
|
||||
@@ -54,17 +54,15 @@ static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::
|
||||
}
|
||||
|
||||
int rank = std::max(4, static_cast<int>(op->get_input_shape(0).size()));
|
||||
if (ie_order.empty()) {
|
||||
if (order.empty()) {
|
||||
// if order size is less than 4 - fill the rest with just copy
|
||||
for (int o = rank - 1; o >= 0; o--)
|
||||
ie_order.push_back((uint16_t)o);
|
||||
order.push_back((uint16_t)o);
|
||||
}
|
||||
|
||||
std::vector<uint16_t> cldnn_permute_order = ConvertPermuteOrder(ie_order, rank);
|
||||
|
||||
auto permutePrim = cldnn::permute(layerName,
|
||||
inputPrimitives[0],
|
||||
cldnn_permute_order,
|
||||
order,
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(permutePrim);
|
||||
|
||||
@@ -90,8 +90,8 @@ TEST_P(permute_eltwise_loop, basic) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
#define CASE_LOOP_F32_1 3, { 1, 8, 3, 2 }, { 1, 2, 8, 3 }, { 1, 2, 8, 1 }, { 0, 3, 1, 2 }, data_types::f32, format::bfyx, data_types::f32
|
||||
#define CASE_LOOP_F16_0 4, { 1, 12, 4, 2 }, { 1, 2, 12, 4 }, { 1, 2, 12, 1 }, { 0, 3, 1, 2 }, data_types::f16, format::bfyx, data_types::f16
|
||||
#define CASE_LOOP_F32_1 3, { 1, 8, 3, 2 }, { 1, 2, 8, 3 }, { 1, 2, 8, 1 }, { 0, 2, 3, 1 }, data_types::f32, format::bfyx, data_types::f32
|
||||
#define CASE_LOOP_F16_0 4, { 1, 12, 4, 2 }, { 1, 2, 12, 4 }, { 1, 2, 12, 1 }, { 0, 2, 3, 1 }, data_types::f16, format::bfyx, data_types::f16
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_eltwise_loop, ::testing::ValuesIn(std::vector<loop_params>{
|
||||
loop_params{ CASE_LOOP_F32_1, 3, 5 },
|
||||
|
||||
@@ -87,75 +87,75 @@ public:
|
||||
/* ------------------------------------------------------------------------------------------------------------ */
|
||||
#define CASE_PERMUTE_F32_0 { 1, 16, 2, 2 }, { 1, 16, 2, 2 }, { 0, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_1 { 1, 15, 16, 16 }, { 1, 15, 16, 16 }, { 0, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_2 { 1, 8, 16, 16 }, { 16, 16, 8, 1 }, { 3, 2, 1, 0 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_3 { 1, 1, 3, 4 }, { 1, 3, 4, 1 }, { 1, 2, 3, 0 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_2 { 1, 8, 16, 16 }, { 16, 16, 8, 1 }, { 2, 3, 0, 1 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_3 { 1, 1, 3, 4 }, { 1, 3, 4, 1 }, { 1, 3, 0, 2 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_4 { 2, 16, 16, 16 }, { 2, 16, 16, 16 }, { 0, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_5 { 1, 32, 4, 5 }, { 32, 4, 5, 1 }, { 1, 2, 3, 0 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_6 { 1, 16, 4, 5 }, { 5, 16, 4, 1 }, { 3, 1, 2, 0 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_7 { 1, 16, 1, 1 }, { 1, 1, 1, 16 }, { 2, 3, 0, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_5 { 1, 32, 4, 5 }, { 32, 4, 5, 1 }, { 1, 3, 0, 2 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_6 { 1, 16, 4, 5 }, { 5, 16, 4, 1 }, { 2, 1, 0, 3 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F32_7 { 1, 16, 1, 1 }, { 1, 1, 1, 16 }, { 3, 2, 1, 0 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_PERMUTE_F16_0 { 1, 16, 4, 5 }, { 1, 16, 4, 5 }, { 0, 1, 2, 3 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_1 { 2, 16, 4, 5 }, { 16, 4, 5, 2 }, { 1, 2, 3, 0 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_2 { 1, 32, 2, 3 }, { 2, 3, 32, 1 }, { 2, 3, 1, 0 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_3 { 3, 16, 1, 1 }, { 1, 1, 16, 3 }, { 3, 2, 1, 0 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_4 { 2, 15, 4, 5 }, { 4, 2, 5, 15 }, { 2, 0, 3, 1 }, tensor{ 0 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_5 { 1, 15, 1, 2 }, { 15, 2, 1, 1 }, { 1, 3, 2, 0 }, tensor{ 0 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_6 { 1, 15, 4, 4 }, { 4, 4, 1, 15 }, { 2, 3, 0, 1 }, tensor{ 0 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_1 { 2, 16, 4, 5 }, { 16, 4, 5, 2 }, { 1, 3, 0, 2 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_2 { 1, 32, 2, 3 }, { 2, 3, 32, 1 }, { 3, 2, 0, 1 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_3 { 3, 16, 1, 1 }, { 1, 1, 16, 3 }, { 2, 3, 0, 1 }, tensor{ 0 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_4 { 2, 15, 4, 5 }, { 4, 2, 5, 15 }, { 3, 0, 1, 2 }, tensor{ 0 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_5 { 1, 15, 1, 2 }, { 15, 2, 1, 1 }, { 1, 2, 0, 3 }, tensor{ 0 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_F16_6 { 1, 15, 4, 4 }, { 4, 4, 1, 15 }, { 3, 2, 1, 0 }, tensor{ 0 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_PERMUTE_S8_0 { 1, 15, 4, 5 }, { 1, 15, 4, 5 }, { 0, 1, 2, 3 }, tensor{ 0 }, data_types::i8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_S8_1 { 1, 15, 4, 5 }, { 5, 4, 15, 1 }, { 3, 2, 1, 0 }, tensor{ 0 }, data_types::i8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_S8_2 { 1, 16, 1, 2 }, { 1, 1, 16, 2 }, { 2, 0, 1, 3 }, tensor{ 0 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_S8_3 { 1, 16, 2, 2 }, { 2, 2, 16, 1 }, { 2, 3, 1, 0 }, tensor{ 0 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_0 { 1, 15, 4, 5 }, { 15, 5, 1, 4 }, { 1, 3, 0, 2 }, tensor{ 0 }, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_1 { 1, 15, 16, 16 }, { 15, 16, 1, 16 }, { 1, 2, 0, 3 }, tensor{ 0 }, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_S8_1 { 1, 15, 4, 5 }, { 5, 4, 15, 1 }, { 2, 3, 0, 1 }, tensor{ 0 }, data_types::i8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_S8_2 { 1, 16, 1, 2 }, { 1, 1, 16, 2 }, { 3, 0, 2, 1 }, tensor{ 0 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_S8_3 { 1, 16, 2, 2 }, { 2, 2, 16, 1 }, { 3, 2, 0, 1 }, tensor{ 0 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_0 { 1, 15, 4, 5 }, { 15, 5, 1, 4 }, { 1, 2, 3, 0 }, tensor{ 0 }, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_1 { 1, 15, 16, 16 }, { 15, 16, 1, 16 }, { 1, 3, 2, 0 }, tensor{ 0 }, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_2 { 1, 32, 5, 4 }, { 1, 32, 5, 4 }, { 0, 1, 2, 3 }, tensor{ 0 }, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_3 { 1, 16, 4, 5 }, { 5, 4, 16, 1 }, { 3, 2, 1, 0 }, tensor{ 0 }, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_U8_3 { 1, 16, 4, 5 }, { 5, 4, 16, 1 }, { 2, 3, 0, 1 }, tensor{ 0 }, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
|
||||
// 3d
|
||||
#define CASE_PERMUTE_F32_3D_0 { 1, 15, 4, 4, 5 }, { 1, 15, 4, 4, 5 }, { 0, 1, 2, 3, 4 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_1 { 2, 15, 2, 3, 4 }, { 15, 2, 3, 4, 2 }, { 1, 2, 3, 4, 0 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_2 { 2, 16, 4, 4, 5 }, { 4, 2, 4, 5, 16 }, { 3, 0, 2, 4, 1 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_3 { 1, 32, 4, 2, 2 }, { 2, 2, 32, 1, 4 }, { 4, 3, 1, 0, 2 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_4 { 1, 16, 1, 1, 1 }, { 1, 1, 1, 16, 1 }, { 2, 4, 0, 1, 3 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_1 { 2, 15, 2, 3, 4 }, { 15, 2, 3, 4, 2 }, { 1, 4, 0, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_2 { 2, 16, 4, 4, 5 }, { 4, 2, 4, 5, 16 }, { 3, 0, 1, 2, 4 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_3 { 1, 32, 4, 2, 2 }, { 2, 2, 32, 1, 4 }, { 2, 3, 4, 0, 1 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F32_3D_4 { 1, 16, 1, 1, 1 }, { 1, 1, 1, 16, 1 }, { 4, 2, 3, 1, 0 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
|
||||
#define CASE_PERMUTE_F16_3D_0 { 1, 15, 4, 4, 5 }, { 1, 15, 4, 4, 5 }, { 0, 1, 2, 3, 4 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_1 { 2, 15, 4, 3, 4 }, { 4, 4, 2, 15, 3 }, { 2, 4, 0, 1, 3 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_1 { 2, 15, 4, 3, 4 }, { 4, 4, 2, 15, 3 }, { 4, 2, 3, 1, 0 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_2 { 2, 16, 4, 4, 3 }, { 2, 4, 3, 16, 4 }, { 0, 3, 4, 1, 2 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_3 { 1, 32, 4, 2, 1 }, { 2, 32, 4, 1, 1 }, { 3, 1, 2, 4, 0 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_4 { 16, 16, 1, 1, 1 },{ 1, 16, 1, 1, 16 },{ 4, 0, 3, 2, 1 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_3 { 1, 32, 4, 2, 1 }, { 2, 32, 4, 1, 1 }, { 3, 1, 0, 2, 4 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_F16_3D_4 { 16, 16, 1, 1, 1 },{ 1, 16, 1, 1, 16 },{ 2, 0, 1, 4, 3 }, tensor{ 0 }, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||
|
||||
#define CASE_PERMUTE_S8_3D_0 { 1, 15, 4, 4, 5 }, { 1, 15, 4, 4, 5 }, { 0, 1, 2, 3, 4 }, tensor{ 0 }, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_S8_3D_1 { 2, 15, 4, 3, 4 }, { 4, 4, 15, 2, 3 }, { 4, 2, 1, 0, 3 }, tensor{ 0 }, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_S8_3D_1 { 2, 15, 4, 3, 4 }, { 4, 4, 15, 2, 3 }, { 2, 4, 3, 0, 1 }, tensor{ 0 }, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_S8_3D_2 { 2, 16, 4, 4, 3 }, { 2, 4, 3, 16, 4 }, { 0, 3, 4, 1, 2 }, tensor{ 0 }, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_S8_3D_3 { 1, 32, 4, 2, 1 }, { 2, 32, 4, 1, 1 }, { 3, 1, 2, 4, 0 }, tensor{ 0 }, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_0 { 16, 16, 1, 1, 1 }, { 1, 1, 16, 16, 1 }, { 2, 4, 0, 1, 3 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_1 { 16, 16, 1, 1, 1 }, { 1, 1, 1, 16, 16 }, { 4, 3, 2, 1, 0 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_2 { 2, 16, 4, 4, 3 }, { 4, 2, 4, 3, 16 }, { 3, 0, 2, 4, 1 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_3 { 1, 32, 4, 2, 1 }, { 1, 2, 32, 1, 4 }, { 4, 3, 1, 0, 2 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_S8_3D_3 { 1, 32, 4, 2, 1 }, { 2, 32, 4, 1, 1 }, { 3, 1, 0, 2, 4 }, tensor{ 0 }, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_0 { 16, 16, 1, 1, 1 }, { 1, 1, 16, 16, 1 }, { 4, 2, 3, 1, 0 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_1 { 16, 16, 1, 1, 1 }, { 1, 1, 1, 16, 16 }, { 2, 3, 0, 1, 4 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_2 { 2, 16, 4, 4, 3 }, { 4, 2, 4, 3, 16 }, { 3, 0, 1, 2, 4 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_U8_3D_3 { 1, 32, 4, 2, 1 }, { 1, 2, 32, 1, 4 }, { 2, 3, 4, 0, 1 }, tensor{ 0 }, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx
|
||||
|
||||
// permute_tile_8x8_4x4
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_0 { 1, 8, 8, 2 }, { 1, 2, 8, 8 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_1 { 1, 5, 8, 2 }, { 1, 2, 5, 8 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_2 { 1, 8, 5, 2 }, { 1, 2, 8, 5 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_3 { 1, 5, 5, 2 }, { 1, 2, 5, 5 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_0 { 1, 8, 8, 2, 2 }, { 1, 2, 8, 8, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_1 { 1, 5, 8, 2, 2 }, { 1, 2, 5, 8, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_2 { 1, 8, 5, 2, 2 }, { 1, 2, 8, 5, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_3 { 1, 5, 5, 2, 2 }, { 1, 2, 5, 5, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_0 { 1, 8, 8, 2, 2, 2 }, { 1, 2, 8, 8, 2, 2 }, { 0, 5, 1, 2, 3, 4 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_1 { 1, 5, 8, 2, 2, 2 }, { 1, 2, 5, 8, 2, 2 }, { 0, 5, 1, 2, 3, 4 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_2 { 1, 8, 5, 2, 2, 2 }, { 1, 2, 8, 5, 2, 2 }, { 0, 5, 1, 2, 3, 4 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_3 { 1, 5, 5, 2, 2, 2 }, { 1, 2, 5, 5, 2, 2 }, { 0, 5, 1, 2, 3, 4 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_0 { 1, 8, 8, 2 }, { 1, 2, 8, 8 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_1 { 1, 5, 8, 2 }, { 1, 2, 5, 8 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_2 { 1, 8, 5, 2 }, { 1, 2, 8, 5 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_4D_3 { 1, 5, 5, 2 }, { 1, 2, 5, 5 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_0 { 1, 8, 8, 2, 2 }, { 1, 2, 8, 8, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_1 { 1, 5, 8, 2, 2 }, { 1, 2, 5, 8, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_2 { 1, 8, 5, 2, 2 }, { 1, 2, 8, 5, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_5D_3 { 1, 5, 5, 2, 2 }, { 1, 2, 5, 5, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_0 { 1, 8, 8, 2, 2, 2 }, { 1, 2, 8, 8, 2, 2 }, { 0, 2, 3, 4, 5, 1 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_1 { 1, 5, 8, 2, 2, 2 }, { 1, 2, 5, 8, 2, 2 }, { 0, 2, 3, 4, 5, 1 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_2 { 1, 8, 5, 2, 2, 2 }, { 1, 2, 8, 5, 2, 2 }, { 0, 2, 3, 4, 5, 1 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_6D_3 { 1, 5, 5, 2, 2, 2 }, { 1, 2, 5, 5, 2, 2 }, { 0, 2, 3, 4, 5, 1 }, tensor{ 0 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
|
||||
// permute_tile_8x8_4x4_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_0 { 1, 16, 16, 2 }, { 1, 2, 16, 16 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_1 { 1, 15, 16, 2 }, { 1, 2, 15, 16 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_2 { 1, 16, 3, 2 }, { 1, 2, 16, 3 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_3 { 1, 5, 7, 2 }, { 1, 2, 5, 7 }, { 0, 3, 1, 2 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_0 { 1, 16, 16, 2, 2 }, { 1, 2, 16, 16, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_1 { 1, 15, 16, 2, 2 }, { 1, 2, 15, 16, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_2 { 1, 16, 3, 2, 2 }, { 1, 2, 16, 3, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_3 { 1, 5, 7, 2, 2 }, { 1, 2, 5, 7, 2 }, { 0, 4, 1, 2, 3 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_0 { 1, 16, 16, 2 }, { 1, 2, 16, 16 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_1 { 1, 15, 16, 2 }, { 1, 2, 15, 16 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_2 { 1, 16, 3, 2 }, { 1, 2, 16, 3 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_4D_3 { 1, 5, 7, 2 }, { 1, 2, 5, 7 }, { 0, 2, 3, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_0 { 1, 16, 16, 2, 2 }, { 1, 2, 16, 16, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_1 { 1, 15, 16, 2, 2 }, { 1, 2, 15, 16, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_2 { 1, 16, 3, 2, 2 }, { 1, 2, 16, 3, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
#define CASE_PERMUTE_TILE_8x8_4x4_FSV16_5D_3 { 1, 5, 7, 2, 2 }, { 1, 2, 5, 7, 2 }, { 0, 2, 3, 4, 1 }, tensor{ 0 }, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::b_fs_zyx_fsv16
|
||||
|
||||
class permute_activation_scale_eltwise: public PermuteFusingTest {};
|
||||
TEST_P(permute_activation_scale_eltwise, basic) {
|
||||
@@ -455,49 +455,49 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_scale_eltwise_actv_scale_actv, ::t
|
||||
/* ---------------------------- PERMUTE FUSE REDUNDANT REORDER cases ------------------------------------------ */
|
||||
/* ------------------------------------------------------------------------------------------------------------ */
|
||||
|
||||
#define CASE_PERMUTE_REORDER_F32_0 { 1, 16, 32, 2 }, { 0, 3, 2, 1 }, { 0, 3, 2, 1 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_F32_1 { 2, 7, 9, 27 }, { 0, 3, 2, 1 }, { 0, 3, 2, 1 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv4, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_F32_2 { 1, 16, 4, 5, 16 }, { 0, 3, 2, 1, 4 }, { 0, 3, 2, 1, 4 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_F16_0 { 1, 16, 2, 4 }, { 0, 3, 2, 1 }, { 0, 3, 2, 1 }, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_F16_1 { 1, 16, 4, 5, 16 }, { 0, 4, 2, 3, 1 }, { 0, 1, 2, 3, 4 }, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_F32_0 { 1, 16, 32, 2 }, { 0, 2, 1, 3 }, { 0, 2, 1, 3 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_F32_1 { 2, 7, 9, 27 }, { 0, 2, 1, 3 }, { 0, 2, 1, 3 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv4, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_F32_2 { 1, 16, 4, 5, 16 }, { 0, 2, 3, 4, 1 }, { 0, 2, 3, 4, 1 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_F16_0 { 1, 16, 2, 4 }, { 0, 2, 1, 3 }, { 0, 2, 1, 3 }, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_F16_1 { 1, 16, 4, 5, 16 }, { 0, 2, 1, 3, 4 }, { 0, 1, 2, 3, 4 }, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_F16_2 { 1, 5, 1, 2, 14 }, { 0, 3, 2, 1, 4 }, { 0, 3, 2, 1, 4 }, data_types::f16, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
|
||||
// type change
|
||||
#define CASE_PERMUTE_REORDER_S8_TO_F32_0 { 1, 15, 4, 5 }, { 0, 3, 2, 1 }, { 0, 3, 2, 1 }, data_types::i8, data_types::f32, format::b_fs_yx_fsv4, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_S8_TO_F32_0 { 1, 15, 4, 5 }, { 0, 2, 1, 3 }, { 0, 2, 1, 3 }, data_types::i8, data_types::f32, format::b_fs_yx_fsv4, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_S8_TO_F32_1 { 1, 2, 15, 4, 5 }, { 0, 3, 2, 1, 4 }, { 0, 3, 2, 1, 4 }, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_F32_TO_F16_0 { 1, 5, 1, 2, 14 }, { 0, 4, 2, 3, 1 }, { 0, 1, 2, 3, 4 }, data_types::f32, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_U8_TO_F16_0 { 1, 17, 1, 2, 7 }, { 0, 4, 2, 3, 1 }, { 0, 1, 2, 3, 4 }, data_types::u8, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_F32_TO_F16_0 { 1, 5, 1, 2, 14 }, { 0, 2, 1, 3, 4 }, { 0, 1, 2, 3, 4 }, data_types::f32, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_U8_TO_F16_0 { 1, 17, 1, 2, 7 }, { 0, 2, 1, 3, 4 }, { 0, 1, 2, 3, 4 }, data_types::u8, data_types::f16, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
|
||||
// dim change
|
||||
#define CASE_PERMUTE_REORDER_4D_TO_5D_F32_0 { 1, 16, 8, 16 }, { 1, 2, 0, 3 }, { 0, 3, 1, 4, 2 }, data_types::f32, data_types::f32, format::bfyx, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_4D_TO_6D_F32_1 { 1, 16, 8, 16 }, { 0, 2, 3, 1 }, { 0, 3, 5, 4, 1, 2 }, data_types::f32, data_types::f32, format::bfyx, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_5D_TO_4D_F32_0 { 1, 16, 4, 5, 18 },{ 0, 4, 2, 3, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_5D_TO_4D_F32_1 { 1, 16, 4, 5, 16 },{ 0, 2, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_5D_TO_6D_F32_2 { 1, 16, 8, 4, 16 }, { 0, 4, 2, 3, 1 }, { 0, 3, 5, 4, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_6D_TO_4D_F32_0 { 1, 16, 4, 5, 4, 16 }, { 0, 2, 5, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::bfwzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_6D_TO_5D_F32_1 { 1, 16, 4, 5, 4, 16 }, { 0, 2, 5, 3, 4, 1 }, { 0, 3, 4, 1, 2 }, data_types::f32, data_types::f32, format::bfwzyx, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_4D_TO_5D_F32_0 { 1, 16, 8, 16 }, { 1, 3, 2, 0 }, { 0, 3, 4, 2, 1 }, data_types::f32, data_types::f32, format::bfyx, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_4D_TO_6D_F32_1 { 1, 16, 8, 16 }, { 0, 3, 1, 2 }, { 0, 4, 5, 1, 3, 2 }, data_types::f32, data_types::f32, format::bfyx, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_5D_TO_4D_F32_0 { 1, 16, 4, 5, 18 },{ 0, 2, 1, 3, 4 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_5D_TO_4D_F32_1 { 1, 16, 4, 5, 16 },{ 0, 4, 1, 2, 3 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_5D_TO_6D_F32_2 { 1, 16, 8, 4, 16 }, { 0, 2, 1, 3, 4 }, { 0, 4, 5, 1, 3, 2 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_6D_TO_4D_F32_0 { 1, 16, 4, 5, 4, 16 }, { 0, 5, 1, 4, 3, 2 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::bfwzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_6D_TO_5D_F32_1 { 1, 16, 4, 5, 4, 16 }, { 0, 5, 1, 4, 3, 2 }, { 0, 3, 4, 1, 2 }, data_types::f32, data_types::f32, format::bfwzyx, format::bfzyx
|
||||
|
||||
// permute_opt for blocked format
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_0 { 1, 256, 2, 64 }, { 0, 3, 1, 2 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_1 { 1, 78, 2, 259 }, { 0, 3, 1, 2 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_2 { 1, 48, 1, 3, 259 }, { 0, 4, 1, 2, 3 }, { 0, 2, 3, 4, 1 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_0 { 1, 256, 2, 64 }, { 0, 2, 3, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_1 { 1, 78, 2, 259 }, { 0, 2, 3, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_2 { 1, 48, 1, 3, 259 }, { 0, 2, 3, 4, 1 }, { 0, 4, 1, 2, 3 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx
|
||||
|
||||
// permute_opt for blocked format => reorder to differnt dim
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_3 { 1, 45, 1, 3, 259 }, { 0, 4, 1, 2, 3 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_4 { 2, 273, 19, 19 }, { 0, 3, 1, 2 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_5 { 2, 546, 2, 2 }, { 0, 3, 1, 2 }, { 0, 2, 3, 1 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_3 { 1, 45, 1, 3, 259 }, { 0, 2, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_4 { 2, 273, 19, 19 }, { 0, 2, 3, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F32_5 { 2, 546, 2, 2 }, { 0, 2, 3, 1 }, { 0, 3, 1, 2 }, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx
|
||||
|
||||
// permute opt for blocked format => reorder to different dim/type
|
||||
#define CASE_PERMUTE_REORDER_TILED_I8_4 { 1, 45, 1, 3, 259 }, { 0, 4, 1, 2, 3 }, { 0, 2, 3, 1 }, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_5 { 1, 48, 3, 256 }, { 0, 3, 1, 2 }, { 0, 2, 4, 3, 1 }, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_6 { 1, 48, 2, 3, 256 }, { 0, 4, 1, 2, 3 }, { 0, 2, 5, 4, 3, 1 }, data_types::f16, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_I8_4 { 1, 45, 1, 3, 259 }, { 0, 2, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::i8, data_types::f32, format::b_fs_zyx_fsv16, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_5 { 1, 48, 3, 256 }, { 0, 2, 3, 1 }, { 0, 4, 1, 3, 2 }, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_6 { 1, 48, 2, 3, 256 }, { 0, 2, 3, 4, 1 }, { 0, 5, 1, 4, 3, 2 }, data_types::f16, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx
|
||||
|
||||
// permute opt for non_blocked format => reorder to differnt dim/type
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_7 { 1, 48, 2, 3, 256 }, { 0, 4, 1, 2, 3 }, { 0, 2, 3, 1 }, data_types::f16, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_8 { 1, 28, 2, 2, 3, 256 }, { 0, 5, 1, 2, 3, 4 }, { 0, 2, 3, 1 }, data_types::f16, data_types::f32, format::bfwzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_9 { 1, 24, 2, 3, 256 }, { 0, 4, 1, 2, 3 }, { 0, 2, 3, 1 }, data_types::f16, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_10 { 1, 35, 3, 253 }, { 0, 3, 1, 2 }, { 0, 2, 4, 3, 1 }, data_types::f16, data_types::f32, format::bfyx, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_11 { 1, 32, 3, 253 }, { 0, 3, 1, 2 }, { 0, 2, 4, 5, 3, 1 }, data_types::f16, data_types::f32, format::bfyx, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_7 { 1, 48, 2, 3, 256 }, { 0, 2, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::f16, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_8 { 1, 28, 2, 2, 3, 256 }, { 0, 2, 3, 4, 5, 1 }, { 0, 3, 1, 2 }, data_types::f16, data_types::f32, format::bfwzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_9 { 1, 24, 2, 3, 256 }, { 0, 2, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::f16, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_10 { 1, 35, 3, 253 }, { 0, 2, 3, 1 }, { 0, 4, 1, 3, 2 }, data_types::f16, data_types::f32, format::bfyx, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_11 { 1, 32, 3, 253 }, { 0, 2, 3, 1 }, { 0, 5, 1, 4, 2, 3 }, data_types::f16, data_types::f32, format::bfyx, format::bfwzyx
|
||||
|
||||
class permute_redundant_reorder : public PermuteReorderFusingTest {};
|
||||
TEST_P(permute_redundant_reorder, basic) {
|
||||
@@ -514,21 +514,21 @@ TEST_P(permute_redundant_reorder, basic) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_redundant_reorder, ::testing::ValuesIn(std::vector<permute_reorder_params>{
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_1, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_0, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_1, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_2, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_1, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_2, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_S8_TO_F32_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_S8_TO_F32_1, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_TO_F16_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_U8_TO_F16_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_0, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_1, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_2, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_S8_TO_F32_0, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_S8_TO_F32_1, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_TO_F16_0, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_U8_TO_F16_0, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_4D_TO_5D_F32_0, 3, 3 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_4D_TO_6D_F32_1, 3, 3 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_4D_F32_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_4D_F32_0, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_4D_F32_1, 3, 3 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_6D_F32_2, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_6D_F32_2, 4, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_6D_TO_4D_F32_0, 3, 3 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_6D_TO_5D_F32_1, 3, 3 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F32_0, 3, 4 },
|
||||
@@ -563,17 +563,17 @@ TEST_P(permute_act_reorder, basic) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_act_reorder, ::testing::ValuesIn(std::vector<permute_reorder_params>{
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_0, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_1, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_0, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_1, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F32_2, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_0, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_1, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_2, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_0, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_1, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_F16_2, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_4D_TO_5D_F32_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_4D_TO_6D_F32_1, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_4D_F32_0, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_4D_F32_0, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_4D_F32_1, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_6D_F32_2, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_5D_TO_6D_F32_2, 4, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_6D_TO_4D_F32_0, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_6D_TO_5D_F32_1, 3, 4 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F32_0, 3, 5 },
|
||||
|
||||
@@ -53,7 +53,7 @@ TEST(depth_to_space_fp16_gpu, d1411_bs2) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
ASSERT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ TEST(depth_to_space_fp16_gpu, d1421_bs2) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
ASSERT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,7 +158,7 @@ TEST(depth_to_space_fp16_gpu, d1933_bs3) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
ASSERT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,7 +197,7 @@ TEST(depth_to_space_fp32_gpu, d1411_bs2) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], output_ptr[i]);
|
||||
ASSERT_EQ(expected_results[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,7 +231,7 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) {
|
||||
auto output = outputs.at("depth_to_space").get_memory();
|
||||
cldnn::mem_lock<FLOAT16> output_ptr (output, get_test_stream());
|
||||
|
||||
std::vector<uint16_t> perm = { 0,4,5,2,1,3 };
|
||||
std::vector<uint16_t> perm = { 0,3,4,1,5,2 };
|
||||
|
||||
topology topology_ref;
|
||||
topology_ref.add(input_layout("Input0", input1->get_layout()));
|
||||
@@ -259,7 +259,7 @@ TEST(depth_to_space_fp32_gpu, d112960540_bs2) {
|
||||
cldnn::mem_lock<FLOAT16> output_ptr_ref(output_ref, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < output->get_layout().count(); ++i) {
|
||||
EXPECT_EQ(output_ptr_ref[i], output_ptr[i]);
|
||||
ASSERT_EQ(output_ptr_ref[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,7 +314,7 @@ TEST(depth_to_space_fp32_gpu, d1933_bs3) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], output_ptr[i]);
|
||||
ASSERT_EQ(expected_results[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -364,7 +364,7 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_blocks_first) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], output_ptr[i]);
|
||||
ASSERT_EQ(expected_results[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -414,6 +414,6 @@ TEST(depth_to_space_fp32_gpu, d1822_bs2_depth_first) {
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_EQ(expected_results[i], output_ptr[i]);
|
||||
ASSERT_EQ(expected_results[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,37 +26,32 @@ TEST(permute_gpu_f32, output_ordering_test)
|
||||
{
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
std::vector<std::vector<int32_t>> input_tensors =
|
||||
{
|
||||
{ 10, 5, 15, 2 },{ 2, 4, 6, 8 },{ 2, 2, 3, 2 },{ 9, 8, 7, 4 }
|
||||
std::vector<std::vector<int32_t>> input_tensors = {
|
||||
{ 10, 5, 15, 2 },
|
||||
{ 2, 4, 6, 8 },
|
||||
{ 2, 2, 3, 2 },
|
||||
{ 9, 8, 7, 4 }
|
||||
};
|
||||
std::vector<std::vector<uint16_t>> permutations =
|
||||
{
|
||||
std::vector<std::vector<uint16_t>> permutations = {
|
||||
{ 0, 1, 2, 3 }, //do nothing
|
||||
{ 0, 1, 3, 2 }, //replace x with y
|
||||
{ 1, 0, 3, 2 }, //replace b with f
|
||||
{ 0, 2, 3, 1 } //big permutation
|
||||
{ 0, 1, 3, 2 }, //replace x with y
|
||||
{ 1, 0, 3, 2 }, //replace b with f
|
||||
{ 0, 2, 3, 1 } //big permutation
|
||||
};
|
||||
std::vector<format> input_formats = { format::bfyx, format::yxfb };
|
||||
|
||||
auto get_permutation = [&](const std::vector<int32_t>& inp1, const std::vector<uint16_t>& order)
|
||||
{
|
||||
auto get_permutation = [&](const std::vector<int32_t>& inp1, const std::vector<uint16_t>& order) {
|
||||
EXPECT_EQ(inp1.size(), order.size());
|
||||
std::vector<int32_t> output;
|
||||
for (auto const& o : order)
|
||||
{
|
||||
for (auto const& o : order) {
|
||||
output.push_back(inp1.at(o));
|
||||
}
|
||||
return output;
|
||||
};
|
||||
|
||||
for (auto const& fr : input_formats)
|
||||
{
|
||||
for (auto const& inp_t : input_tensors)
|
||||
{
|
||||
for (auto const& perm : permutations)
|
||||
{
|
||||
|
||||
for (auto const& fr : input_formats) {
|
||||
for (auto const& inp_t : input_tensors) {
|
||||
for (auto const& perm : permutations) {
|
||||
auto input = engine.allocate_memory({ data_types::f32, fr, tensor(inp_t) });
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
@@ -68,12 +63,12 @@ TEST(permute_gpu_f32, output_ordering_test)
|
||||
auto output = outputs.at("permute");
|
||||
auto output_mem = output.get_memory();
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
auto ref_tensor = get_permutation(inp_t, perm);
|
||||
auto out_tensor = output_mem->get_layout().size;
|
||||
EXPECT_EQ(out_tensor.batch[0], ref_tensor[0]);
|
||||
EXPECT_EQ(out_tensor.feature[0], ref_tensor[1]);
|
||||
EXPECT_EQ(out_tensor.spatial[0], ref_tensor[2]);
|
||||
EXPECT_EQ(out_tensor.spatial[1], ref_tensor[3]);
|
||||
auto ref_tensor = get_permutation(input->get_layout().get_dims(), perm);
|
||||
auto out_tensor = output_mem->get_layout().get_dims();
|
||||
EXPECT_EQ(out_tensor[0], ref_tensor[0]);
|
||||
EXPECT_EQ(out_tensor[1], ref_tensor[1]);
|
||||
EXPECT_EQ(out_tensor[2], ref_tensor[2]);
|
||||
EXPECT_EQ(out_tensor[3], ref_tensor[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -552,9 +547,9 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1)
|
||||
topology topology_unfused(
|
||||
input_layout("input", input->get_layout()),
|
||||
reorder("reorder1", "input", format::b_fs_yx_fsv4, data_types::f32),
|
||||
permute("permute", "reorder1", { 0, 3, 1, 2}),
|
||||
permute("permute", "reorder1", { 0, 2, 3, 1}),
|
||||
reorder("reorder2", "permute", format::bfyx, data_types::f32),
|
||||
permute("out", "reorder2", { 0, 2, 3, 1}));
|
||||
permute("out", "reorder2", { 0, 3, 1, 2}));
|
||||
|
||||
cldnn::build_options options_unfused;
|
||||
options_unfused.set_option(cldnn::build_option::optimize_data(false));
|
||||
@@ -567,9 +562,9 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1)
|
||||
topology topology_fused(
|
||||
input_layout("input", input->get_layout()),
|
||||
reorder("reorder1", "input", format::b_fs_yx_fsv4, data_types::f32),
|
||||
permute("permute", "reorder1", { 0, 3, 1, 2}),
|
||||
permute("permute", "reorder1", { 0, 2, 3, 1}),
|
||||
reorder("reorder2", "permute", format::bfyx, data_types::f32), // to be fused to previous permute
|
||||
permute("out", "reorder2", { 0, 2, 3, 1})); // return to original value
|
||||
permute("out", "reorder2", { 0, 3, 1, 2})); // return to original value
|
||||
|
||||
cldnn::build_options options_fused;
|
||||
options_fused.set_option(cldnn::build_option::optimize_data(true));
|
||||
@@ -794,7 +789,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape)
|
||||
const int w_reshape = 2;
|
||||
const int z_reshape = 2;
|
||||
|
||||
std::vector<uint16_t> permute_order = { 0, 1, 5, 4, 2, 3 };
|
||||
std::vector<uint16_t> permute_order = { 0, 1, 4, 5, 3, 2 };
|
||||
|
||||
auto input_size = cldnn::tensor(batch(b), feature(f), spatial(x, y));
|
||||
auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, input_size });
|
||||
@@ -839,10 +834,10 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape)
|
||||
EXPECT_EQ(expected_out[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1)
|
||||
TEST(permute_gpu_f32, basic_bfzyx_permute_0_4_1_2_3)
|
||||
{
|
||||
// Input : bfzyx:2x2x2x2x3
|
||||
// Permute order : { 0,2,3,4,1 }
|
||||
// Permute order : { 0,4,1,2,3 }
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
@@ -872,7 +867,7 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1)
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 2, 3, 4, 1 }));
|
||||
permute("permute", "input", { 0, 4, 1, 2, 3 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -918,9 +913,9 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_2_3_4_1)
|
||||
* Test cases for permute_tile_8x8_4x4 kernel
|
||||
*
|
||||
* This TCs are enabled only when batch axis move to the last.
|
||||
* i.e permute order is 0,3,1,2 or 0,4,1,2,3 or 0,5,1,2,3,4
|
||||
* i.e permute order is 0,2,3,1 or 0,4,1,2,3 or 0,5,1,2,3,4
|
||||
*/
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_3_1_2) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_2_3_1) {
|
||||
// Input : bfyx:2x8x2x8
|
||||
// Permute order : { 0,3,1,2 }
|
||||
|
||||
@@ -932,14 +927,14 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_3_1_2) {
|
||||
|
||||
std::vector<float> input_data;
|
||||
input_data.reserve(array_size);
|
||||
for (size_t i=0 ; i < array_size; ++i)
|
||||
for (size_t i = 0; i < array_size; ++i)
|
||||
input_data.push_back(static_cast<float>(i));
|
||||
|
||||
set_values(input, input_data);
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 3, 1, 2 }));
|
||||
permute("permute", "input", { 0, 2, 3, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -976,7 +971,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_3_1_2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_3_1_2) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_2_3_1) {
|
||||
// Input : bfyx:2x5x2x8
|
||||
// Permute order : { 0,3,1,2 }
|
||||
|
||||
@@ -995,7 +990,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_3_1_2) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 3, 1, 2 }));
|
||||
permute("permute", "input", { 0, 2, 3, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1026,7 +1021,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_3_1_2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_3_1_2) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_2_3_1) {
|
||||
// Input : bfyx:2x8x2x5
|
||||
// Permute order : { 0,3,1,2 }
|
||||
|
||||
@@ -1051,7 +1046,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_3_1_2) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 3, 1, 2 }));
|
||||
permute("permute", "input", { 0, 2, 3, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1082,7 +1077,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_3_1_2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_3_1_2) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_2_3_1) {
|
||||
// Input : bfyx:2x5x2x5
|
||||
// Permute order : { 0,3,1,2 }
|
||||
|
||||
@@ -1101,7 +1096,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_3_1_2) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 3, 1, 2 }));
|
||||
permute("permute", "input", { 0, 2, 3, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1132,7 +1127,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_3_1_2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_2_3_4_1) {
|
||||
// Input : bfzyx:2x8x2x2x8
|
||||
// Permute order : { 0,4,1,2,3 }
|
||||
|
||||
@@ -1151,7 +1146,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 4, 1, 2, 3 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1194,7 +1189,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_2_3_4_1) {
|
||||
// Input : bfzyx:2x5x2x2x8
|
||||
// Permute order : { 0,4,1,2,3 }
|
||||
|
||||
@@ -1213,7 +1208,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 4, 1, 2, 3 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1248,7 +1243,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_2_3_4_1) {
|
||||
// Input : bfzyx:2x8x2x2x5
|
||||
// Permute order : { 0,4,1,2,3 }
|
||||
|
||||
@@ -1267,7 +1262,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 4, 1, 2, 3 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1302,7 +1297,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_2_3_4_1) {
|
||||
// Input : bfzyx:2x5x2x2x5
|
||||
// Permute order : { 0,4,1,2,3 }
|
||||
|
||||
@@ -1321,7 +1316,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 4, 1, 2, 3 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1356,9 +1351,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_5_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_2_3_4_5_1) {
|
||||
// Input : bfwzyx:2x8x2x2x2x8
|
||||
// Permute order : { 0,5,1,2,3,4 }
|
||||
// Permute order : { 0,2,3,4,5,1 }
|
||||
|
||||
constexpr size_t array_size = 1024;
|
||||
|
||||
@@ -1375,7 +1370,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_5_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 5, 1, 2, 3, 4 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 5, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1428,9 +1423,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_5_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_2_3_4_5_1) {
|
||||
// Input : bfwzyx:2x5x2x2x2x8
|
||||
// Permute order : { 0,5,1,2,3,4 }
|
||||
// Permute order : { 0,2,3,4,5,1 }
|
||||
|
||||
constexpr size_t array_size = 640;
|
||||
|
||||
@@ -1447,7 +1442,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 5, 1, 2, 3, 4 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 5, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1488,9 +1483,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_2_3_4_5_1) {
|
||||
// Input : bfwzyx:2x8x2x2x2x5
|
||||
// Permute order : { 0,5,1,2,3,4 }
|
||||
// Permute order : { 0,2,3,4,5,1 }
|
||||
|
||||
constexpr size_t array_size = 640;
|
||||
|
||||
@@ -1507,7 +1502,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 5, 1, 2, 3, 4 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 5, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1548,9 +1543,9 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_2_3_4_5_1) {
|
||||
// Input : bfwzyx:2x5x2x2x2x5
|
||||
// Permute order : { 0,5,1,2,3,4 }
|
||||
// Permute order : { 0,2,3,4,5,1 }
|
||||
|
||||
constexpr size_t array_size = 400;
|
||||
|
||||
@@ -1567,7 +1562,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_5_4_1_2_3) {
|
||||
|
||||
topology topology(
|
||||
input_layout("input", input->get_layout()),
|
||||
permute("permute", "input", { 0, 5, 1, 2, 3, 4 }));
|
||||
permute("permute", "input", { 0, 2, 3, 4, 5, 1 }));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
@@ -1670,12 +1665,13 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
|
||||
std::swap(internal_sizes.at(2), internal_sizes.back());
|
||||
cldnn::tensor tensor(internal_sizes);
|
||||
|
||||
cldnn::format format = sizes.size() == 4?cldnn::format::bfyx:cldnn::format::bfzyx;
|
||||
cldnn::format format = sizes.size() == 4 ? cldnn::format::bfyx : cldnn::format::bfzyx;
|
||||
|
||||
std::vector<uint16_t> order{0, static_cast<uint16_t>(sizes.size()-1)};
|
||||
for (uint16_t i = 1; i<(sizes.size()-1); ++i) {
|
||||
order.push_back(i);
|
||||
std::vector<uint16_t> order = {0};
|
||||
for (uint16_t i = 1; i < (sizes.size() - 1); ++i) {
|
||||
order.push_back(i+1);
|
||||
}
|
||||
order.push_back(1);
|
||||
|
||||
auto input = engine.allocate_memory({Data_Type, format, tensor});
|
||||
set_random_values<type>(input);
|
||||
|
||||
@@ -21,8 +21,8 @@ TEST(test_device_mem_usage_estimation, basic) {
|
||||
topology topology(
|
||||
input_layout("input1", input1->get_layout()),
|
||||
input_layout("input2", input2->get_layout()),
|
||||
permute("permute1", "input1", { 0, 2, 3, 1 }),
|
||||
permute("permute2", "input2", { 0, 3, 2, 1 }),
|
||||
permute("permute1", "input1", { 0, 3, 1, 2 }),
|
||||
permute("permute2", "input2", { 0, 2, 1, 3 }),
|
||||
eltwise("eltw", {"permute1", "permute2"}, eltwise_mode::sum, data_types::f16),
|
||||
reorder("output", "eltw", format::bfyx, data_types::f32)
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user