diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.h index 1058f0064df..7733055b8ce 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.h @@ -25,7 +25,12 @@ public: KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; std::vector GetSupportedFusedOps() const override { - return { FusedOpType::QUANTIZE }; + return { + FusedOpType::QUANTIZE, + FusedOpType::ACTIVATION, + FusedOpType::SCALE, + FusedOpType::ELTWISE + }; } protected: diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp index 5ad8d92505d..88f076bcea4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // y ou may not use this file except in compliance with the License. @@ -70,9 +70,13 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"}; } - FusedOpsConfiguration tensor_coord = {"_TENSOR", idx_order, "res", input_dt, 1}; - FusedOpsConfiguration linear_coord = {"_LINEAR", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET}; - jit.Merge(MakeFusedOpsJitConstants(params, {tensor_coord, linear_coord})); + if (!params.layoutBased && !params.int8_quantization && !params.broadcast && CheckInputsOutputNoPitchSameDims(params)) { + FusedOpsConfiguration conf = {"", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET}; + jit.Merge(MakeFusedOpsJitConstants(params, {conf})); + } else { + FusedOpsConfiguration conf = {"", idx_order, "res", input_dt, 1}; + jit.Merge(MakeFusedOpsJitConstants(params, {conf})); + } } return jit; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h index 82aac856a61..8039afad3b4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -26,7 +26,12 @@ public: KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; std::vector GetSupportedFusedOps() const override { - return { FusedOpType::QUANTIZE }; + return { + FusedOpType::QUANTIZE, + FusedOpType::ACTIVATION, + FusedOpType::SCALE, + FusedOpType::ELTWISE + }; } JitConstants GetJitConstants(const eltwise_params& params) const; @@ -34,4 +39,4 @@ public: protected: bool Validate(const Params& p, const optional_params& o) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_simple_vload8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_simple_vload8.cl index 0894c7b4254..bf05fe1b24c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_simple_vload8.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_simple_vload8.cl @@ -1,4 +1,4 @@ -// Copyright (c) 2016-2017 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,18 +15,17 @@ #include "include/include_all.cl" -KERNEL(eltwise_gpu_vload8)( - INPUTS_DECLS - __global UNIT_TYPE* output) +KERNEL(eltwise_gpu_vload8)(INPUTS_DECLS + __global OUTPUT_TYPE* output) { const uint global_id = get_global_id(0); VLOAD_DECLS - MAKE_VECTOR_TYPE(UNIT_TYPE, 8) res; + MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8) res; DO_ELTWISE - + res = ACTIVATION(res, ACTIVATION_PARAMS); vstore8(res, global_id, output); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl index e8f7b1fa12d..f83a5851ca7 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl @@ -57,10 +57,10 @@ KERNEL(eltwise)( #else const uint d1 = get_global_id(0); const uint d2 = (uint)get_global_id(1) % OUTPUT_SIZES[1]; - const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZE[2]; - const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZE[2]; - const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[3]; - const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[3]; + const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZES[2]; + const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZES[2]; + const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[4]; + const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[4]; uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1); #endif @@ -117,13 +117,8 @@ KERNEL(eltwise)( DO_ELTWISE; #if HAS_FUSED_OPS - #if ELTWISE_NO_PITCH_SAME_DIMS - FUSED_OPS_LINEAR; - OUTPUT_TYPE out = FUSED_OPS_RESULT_LINEAR; - #else - FUSED_OPS_TENSOR; - OUTPUT_TYPE out = FUSED_OPS_RESULT_TENSOR; - #endif + FUSED_OPS; + OUTPUT_TYPE out = FUSED_OPS_RESULT; #else #define out res #endif diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index ae872043335..7df057de511 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -1395,6 +1395,7 @@ bool FusedOpsCodeGenerator::CanPreloadData(const FusedOpsConfiguration& conf) co switch (d) { case Tensor::DataChannelName::BATCH: can_preload &= idx.b == "0"; break; case Tensor::DataChannelName::FEATURE: can_preload &= idx.f == "0"; break; + case Tensor::DataChannelName::W: can_preload &= idx.w == "0"; break; case Tensor::DataChannelName::Z: can_preload &= idx.z == "0"; break; case Tensor::DataChannelName::Y: can_preload &= idx.y == "0"; break; case Tensor::DataChannelName::X: can_preload &= idx.x == "0"; break; @@ -1619,7 +1620,7 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati case KernelType::ACTIVATION: { auto p = desc.GetOpParams(); base_activation_params activation_p = p->param; - op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_var + ";"; + op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(in_var, vec_size) + ";"; if (activation_p.function != ActivationFunction::NONE) { auto suffix = "_FUSED_OP"+std::to_string(desc.op_id) + conf.suffix; std::string nl_m = std::to_string(activation_p.m); @@ -1678,6 +1679,8 @@ std::string FusedOpsCodeGenerator::GetIdx(size_t input_id, idx_desc idx, bool sh idx_order = idx.b + "," + idx.f + "," + idx.y + "," + idx.x; } else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 5) { idx_order = idx.b + "," + idx.f + "," + idx.z + "," + idx.y + "," + idx.x; + } else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 6) { + idx_order = idx.b + "," + idx.f + "," + idx.w + "," + idx.z + "," + idx.y + "," + idx.x; } if (should_be_safe) { diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h index 197204be396..ecf22d70b36 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h @@ -308,12 +308,13 @@ public: struct idx_desc { std::string b; std::string f; + std::string w; std::string z; std::string y; std::string x; size_t dims; explicit idx_desc(std::vector idx, DataTensor t) - : b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) { + : b("0"), f("0"), w("0"), z("0"), y("0"), x("0"), dims(0) { dims = idx.size(); switch (dims) { case 1: f = idx[0]; break; @@ -321,7 +322,8 @@ public: case 3: b = idx[0]; f = idx[1]; y = idx[2]; break; case 4: b = idx[0]; f = idx[1]; y = idx[2]; x = idx[3]; break; case 5: b = idx[0]; f = idx[1]; z = idx[2]; y = idx[3]; x = idx[4]; break; - default: throw std::runtime_error("More than 5 dimenstions is not supported in fused op generator"); + case 6: b = idx[0]; f = idx[1]; w = idx[2]; z = idx[3]; y = idx[4]; x = idx[5]; break; + default: throw std::runtime_error("More than 6 dimenstions is not supported in fused op generator"); } if (t.Batch().v == 1) { @@ -330,6 +332,9 @@ public: if (t.Feature().v == 1) { f = "0"; } + if (t.W().v == 1) { + w = "0"; + } if (t.Z().v == 1) { z = "0"; } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp index 42bd9693fd8..ebd84484f99 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp @@ -171,6 +171,7 @@ attach_eltwise_gpu::attach_eltwise_gpu() { { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create }, + { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create }, { std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create }, diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp index 7fab414f546..97b26c71b28 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp @@ -423,6 +423,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { should_fuse |= input_data.is_type() && reduce_supports_fusings(input_data.as()); + should_fuse |= input_data.is_type(); + + should_fuse |= input_data.is_type(); + if (!should_fuse) return; @@ -478,6 +482,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { should_fuse |= input_data.is_type() && reduce_supports_fusings(input_data.as()); + should_fuse |= input_data.is_type(); + + should_fuse |= input_data.is_type(); + if (!should_fuse) return; @@ -562,12 +570,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); - should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); - should_fuse |= input_data.is_type() && reduce_supports_fusings(input_data.as()) && quantize_node.get_scale_shift_opt(); + should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); + + should_fuse |= input_data.is_type() && quantize_node.get_scale_shift_opt(); + if (!should_fuse) return; @@ -589,31 +599,26 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) { std::vector parents = node.get_dependencies(); std::list users = node.get_users(); + std::vector can_fuse_parents = { false, false }; + + for (size_t i = 0; i < parents.size(); i++) { + can_fuse_parents[i] = (parents[i]->is_type() && conv_supports_fusings(parents[i]->as())) || + (parents[i]->is_type() && mvn_supports_fusings(parents[i]->as())) || + (parents[i]->is_type()) || + (parents[i]->is_type()) || + (parents[i]->is_type()) || + (parents[i]->is_type() && gemm_supports_fusings(parents[i]->as())) || + (parents[i]->is_type()) || + (parents[i]->is_type()) || + (parents[i]->is_type()) || + (parents[i]->is_type()) || + (parents[i]->is_type() && dts_supports_fusings(parents[i]->as())) || + (parents[i]->is_type() && reduce_supports_fusings(parents[i]->as())); + } + auto parent1 = parents[0]; auto parent2 = parents[1]; - bool can_fuse_parent1 = (parent1->is_type() && conv_supports_fusings(parent1->as())) || - (parent1->is_type() && mvn_supports_fusings(parent1->as())) || - (parent1->is_type()) || (parent1->is_type()) || - (parent1->is_type()) || - (parent1->is_type() && gemm_supports_fusings(parent1->as())) || - (parent1->is_type()) || (parent1->is_type()) || - (parent1->is_type() && dts_supports_fusings(parent1->as())) || - (parent1->is_type()) || (parent1->is_type()) || - (parent1->is_type() && reduce_supports_fusings(parent1->as())); - - bool can_fuse_parent2 = (parent2->is_type() && conv_supports_fusings(parent2->as())) || - (parent2->is_type() && mvn_supports_fusings(parent2->as())) || - (parent2->is_type()) || (parent2->is_type()) || - (parent2->is_type()) || - (parent2->is_type() && gemm_supports_fusings(parent2->as())) || - (parent2->is_type()) || (parent2->is_type()) || - (parent2->is_type() && dts_supports_fusings(parent2->as())) || - (parent2->is_type()) || (parent2->is_type()) || - (parent2->is_type() && reduce_supports_fusings(parent2->as())); - - std::vector can_fuse_parents = { can_fuse_parent1, can_fuse_parent2 }; - auto p1_raw_size = parent1->get_output_layout().size.sizes(); auto p2_raw_size = parent2->get_output_layout().size.sizes(); for (unsigned k = 0; k < p1_raw_size.size(); k++) { diff --git a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h index f89d405aebd..f2bbe601a31 100644 --- a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h +++ b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h @@ -1,5 +1,5 @@ /* -// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2016-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ public: program_node& scale_in() const { return get_dependency(1); } program_node& bias() const { return get_dependency(2); } - bool bias_term() const { return get_dependencies().size() > 2; } + bool bias_term() const { return get_primitive()->bias.length() != 0; } std::shared_ptr get_fuse_params() const override { return std::make_shared(); @@ -62,7 +62,7 @@ public: memory_impl& scale_memory() const { return dep_memory(1); } memory_impl& bias_memory() const { return dep_memory(2); } - bool bias_term() const { return _deps.size() > 2; } + bool bias_term() const { return _node.as().bias_term(); } }; using scale_inst = typed_primitive_inst; diff --git a/inference-engine/thirdparty/clDNN/src/scale.cpp b/inference-engine/thirdparty/clDNN/src/scale.cpp index 9c37754406f..e864ca7f971 100644 --- a/inference-engine/thirdparty/clDNN/src/scale.cpp +++ b/inference-engine/thirdparty/clDNN/src/scale.cpp @@ -49,6 +49,10 @@ layout scale_inst::calc_output_layout(scale_node const& node) { if (desc->output_data_type) result.data_type = *desc->output_data_type; + if (node.has_fused_primitives()) { + result.data_type = node.get_fused_output_layout().data_type; + } + if (scale_x_size != 1) { CLDNN_ERROR_NOT_EQUAL(node.id(), "Scale x size", scale_x_size, "input x size", input_x_size, ""); } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index ffac7b53808..358e49ccba2 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -4314,23 +4314,23 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) { INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, ::testing::ValuesIn(std::vector{ - deconv_test_params{ CASE_DECONV_FP32_1, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_5, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_6, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_7, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_8, 6, 9 }, + deconv_test_params{ CASE_DECONV_FP32_1, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_2, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_4, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_5, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_6, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_7, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_8, 4, 9 }, - deconv_test_params{ CASE_DECONV_FP16_1, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_2, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_4, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_5, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_6, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_7, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_8, 6, 9 }, + deconv_test_params{ CASE_DECONV_FP16_1, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_2, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_4, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_5, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_6, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_7, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_8, 4, 9 }, deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 }, deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 }, @@ -4350,24 +4350,24 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_ deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 }, deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_5, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_6, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_7, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_8, 6, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_1, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_2, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_3, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_4, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_5, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_6, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_7, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP32_3D_8, 4, 9 }, // deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_1, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_2, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_3, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_4, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_5, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_6, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_7, 6, 9 }, - deconv_test_params{ CASE_DECONV_FP16_3D_8, 6, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_1, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_2, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_3, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_4, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_5, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_6, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_7, 4, 9 }, + deconv_test_params{ CASE_DECONV_FP16_3D_8, 4, 9 }, // deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 }, deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 }, @@ -6224,36 +6224,37 @@ struct eltwise_test_params { size_t expected_not_fused_primitives; }; -#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4 -#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4 +#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP32_4 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx, eltwise_mode::sum +#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum +#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum class EltwiseFusingTest : public ::BaseFusingTest { @@ -6317,30 +6318,234 @@ TEST_P(eltwise_quantize, i8_per_channel) { INSTANTIATE_TEST_CASE_P(fusings_gpu, eltwise_quantize, ::testing::ValuesIn(std::vector{ - eltwise_test_params{CASE_ELTWISE_FP16_1}, - eltwise_test_params{CASE_ELTWISE_FP16_2}, - eltwise_test_params{CASE_ELTWISE_FP16_3}, - eltwise_test_params{CASE_ELTWISE_FP32_1}, - eltwise_test_params{CASE_ELTWISE_FP32_2}, - eltwise_test_params{CASE_ELTWISE_FP32_3}, - eltwise_test_params{CASE_ELTWISE_FP32_FP16_1}, - eltwise_test_params{CASE_ELTWISE_FP32_FP16_2}, - eltwise_test_params{CASE_ELTWISE_FP32_FP16_3}, - eltwise_test_params{CASE_ELTWISE_FP16_FP32_1}, - eltwise_test_params{CASE_ELTWISE_FP16_FP32_2}, - eltwise_test_params{CASE_ELTWISE_FP16_FP32_3}, - eltwise_test_params{CASE_ELTWISE_I8_FP32_1}, - eltwise_test_params{CASE_ELTWISE_I8_FP32_2}, - eltwise_test_params{CASE_ELTWISE_I8_FP32_3}, - eltwise_test_params{CASE_ELTWISE_U8_FP32_1}, - eltwise_test_params{CASE_ELTWISE_U8_FP32_2}, - eltwise_test_params{CASE_ELTWISE_U8_FP32_3}, - eltwise_test_params{CASE_ELTWISE_I8_FP16_1}, - eltwise_test_params{CASE_ELTWISE_I8_FP16_2}, - eltwise_test_params{CASE_ELTWISE_I8_FP16_3}, - eltwise_test_params{CASE_ELTWISE_U8_FP16_1}, - eltwise_test_params{CASE_ELTWISE_U8_FP16_2}, - eltwise_test_params{CASE_ELTWISE_U8_FP16_3}, + eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_I8_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_I8_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_I8_FP32_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_U8_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_U8_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_U8_FP32_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_I8_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_I8_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_I8_FP16_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_U8_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_U8_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 4}, + }), ); + +class eltwise_fp32_fused_prims : public EltwiseFusingTest {}; +TEST_P(eltwise_fp32_fused_prims, scale_activation) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type), + scale("scale", "eltwise", "scale_data"), + activation("activation", "scale", activation_func::abs), + reorder("out", "activation", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(eltwise_fp32_fused_prims, eltwise_activation) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + data("eltwise_data", get_mem(get_input_layout2(p), -10, 10)), + eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type), + eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type), + activation("activation", "eltwise2", activation_func::abs), + reorder("out", "activation", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(eltwise_fp32_fused_prims, eltwise_activation_with_broadcast) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)), + eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type), + eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type), + activation("activation", "eltwise2", activation_func::abs), + reorder("out", "activation", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, + eltwise_fp32_fused_prims, + ::testing::ValuesIn(std::vector{ + eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_I8_FP32_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_I8_FP32_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_I8_FP32_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_U8_FP32_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_U8_FP32_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_U8_FP32_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_I8_FP16_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_I8_FP16_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_I8_FP16_3, 3, 5}, + eltwise_test_params{CASE_ELTWISE_U8_FP16_1, 3, 5}, + eltwise_test_params{CASE_ELTWISE_U8_FP16_2, 3, 5}, + eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 5}, + }), ); + +class eltwise_fp32_scale : public EltwiseFusingTest {}; +TEST_P(eltwise_fp32_scale, 6d) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type), + scale("scale", "eltwise", "scale_data"), + reorder("out", "scale", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, + eltwise_fp32_scale, + ::testing::ValuesIn(std::vector{ + eltwise_test_params{CASE_ELTWISE_FP32_4, 3, 4}, + }), ); + +/* ----------------------------------------------------------------------------------------------------- */ +/* ---------------------------------------- Scale cases ------------------------------------------------ */ +/* ----------------------------------------------------------------------------------------------------- */ +struct scale_test_params { + tensor input_size; + data_types input_type; + format input_format; + data_types default_type; + format default_format; + size_t expected_fused_primitives; + size_t expected_not_fused_primitives; +}; + +// Scale uses the same kernel as eltwise primitive, so the kernel is well covered by the eltwise tests above +// So here we can just check that fused scale kernel is constructed correctly (inputs are set correctly, fused precision is propagated, etc) +// and fusing conditions in the graph are correct +#define CASE_SCALE_FP32_1 {2, 16, 4, 4}, data_types::f32, format::bfyx, data_types::f32, format::bfyx +#define CASE_SCALE_FP32_2 {2, 16, 4, 4}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx +#define CASE_SCALE_FP32_3 {2, 16, 4, 4}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16 + +class ScaleFusingTest : public ::BaseFusingTest { +public: + void execute(scale_test_params& p) { + auto input_prim = get_mem(get_input_layout(p)); + + network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused); + network network_fused(this->engine, this->topology_fused, bo_fused); + + network_fused.set_input_data("input", input_prim); + network_not_fused.set_input_data("input", input_prim); + + compare(network_not_fused, network_fused, p); + } + + layout get_input_layout(scale_test_params& p) { return layout{p.input_type, p.input_format, p.input_size}; } + + layout get_per_channel_layout(scale_test_params& p) { + return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}}; + } +}; + +class scale_basic : public ScaleFusingTest {}; +TEST_P(scale_basic, no_bias_act_eltwise) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + scale("scale", "input", "scale_data"), + activation("activation", "scale", activation_func::negative), + data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)), + eltwise("eltwise", {"activation", "eltwise_data"}, eltwise_mode::prod, p.default_type), + reorder("out", "eltwise", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(scale_basic, bias_act_eltwise) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)), + scale("scale", "input", "scale_data", "bias_data"), + activation("activation", "scale", activation_func::negative), + data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)), + eltwise("eltwise", {"activation", "eltwise_data"}, eltwise_mode::prod, p.default_type), + reorder("out", "eltwise", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(scale_basic, bias_act_scale) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)), + scale("scale", "input", "scale_data", "bias_data"), + activation("activation", "scale", activation_func::negative), + data("scale_data2", get_mem(get_per_channel_layout(p), -10, 10)), + scale("scale2", "activation", "scale_data2"), + reorder("out", "scale2", p.default_format, data_types::f32)); + + tolerance = 1e-5f; + execute(p); +} + +TEST_P(scale_basic, bias_act_quantize) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)), + data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)), + scale("scale", "input", "scale_data", "bias_data"), + activation("activation", "scale", activation_func::negative), + data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)), + data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)), + data("out_lo", get_mem(get_single_element_layout(p), -128)), + data("out_hi", get_mem(get_single_element_layout(p), 127)), + quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::i8), + reorder("out", "quantize", p.default_format, data_types::f32)); + + tolerance = 1.f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, + scale_basic, + ::testing::ValuesIn(std::vector{ + scale_test_params{CASE_SCALE_FP32_1, 2, 4}, + scale_test_params{CASE_SCALE_FP32_2, 2, 4}, + scale_test_params{CASE_SCALE_FP32_3, 2, 4}, }), ); class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {}; @@ -6363,12 +6568,12 @@ TEST_P(eltwise_no_pitches_same_dims_quantize, quantize_f32_output) { INSTANTIATE_TEST_CASE_P(fusings_gpu, eltwise_no_pitches_same_dims_quantize, ::testing::ValuesIn(std::vector{ - eltwise_test_params{CASE_ELTWISE_FP16_1}, - eltwise_test_params{CASE_ELTWISE_FP16_2}, - eltwise_test_params{CASE_ELTWISE_FP16_3}, - eltwise_test_params{CASE_ELTWISE_FP32_1}, - eltwise_test_params{CASE_ELTWISE_FP32_2}, - eltwise_test_params{CASE_ELTWISE_FP32_3}, + eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4}, }), ); class eltwise_activation : public EltwiseFusingTest {}; @@ -6399,18 +6604,18 @@ TEST_P(eltwise_activation, fp16_out) { INSTANTIATE_TEST_CASE_P(fusings_gpu, eltwise_activation, ::testing::ValuesIn(std::vector{ - eltwise_test_params{CASE_ELTWISE_FP16_1}, - eltwise_test_params{CASE_ELTWISE_FP16_2}, - eltwise_test_params{CASE_ELTWISE_FP16_3}, - eltwise_test_params{CASE_ELTWISE_FP32_1}, - eltwise_test_params{CASE_ELTWISE_FP32_2}, - eltwise_test_params{CASE_ELTWISE_FP32_3}, - eltwise_test_params{CASE_ELTWISE_FP32_FP16_1}, - eltwise_test_params{CASE_ELTWISE_FP32_FP16_2}, - eltwise_test_params{CASE_ELTWISE_FP32_FP16_3}, - eltwise_test_params{CASE_ELTWISE_FP16_FP32_1}, - eltwise_test_params{CASE_ELTWISE_FP16_FP32_2}, - eltwise_test_params{CASE_ELTWISE_FP16_FP32_3} + eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 4}, + eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 4} }), ); /* ----------------------------------------------------------------------------------------------------- */