[IE CLDNN] Activation with fused quantize bug fix (#613)

fixed bug connected with quantization fusing to activation
added scale and activation fusing support
added corresponding tests
This commit is contained in:
Mikołaj Życzyński 2020-06-03 08:30:49 +02:00 committed by GitHub
parent cdd31da1c7
commit 3ea1657e4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 304 additions and 128 deletions

View File

@ -24,6 +24,7 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::F16);
@ -81,8 +82,7 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
return false;
if (!params.fused_ops.empty() &&
((params.output.GetLayout() != DataLayout::bfyx && params.output.GetLayout() != DataLayout::bfzyx) ||
((params.output.X().v * params.output.Y().v) % 4 != 0)))
(params.output.GetLayout() != DataLayout::bfyx && params.output.GetLayout() != DataLayout::bfzyx))
return false;
return true;
@ -94,15 +94,56 @@ JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& param
jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
if (!params.fused_ops.empty()) {
bool can_use_vector = params.inputs[0].X().v % 4 == 0;
jit.AddConstant(MakeJitConstant("CAN_USE_VECTOR", can_use_vector));
std::vector<std::string> idx_order;
if (params.inputs[0].GetDims().size() <= 4) {
idx_order = {"fo_b", "fo_f", "fo_y", "fo_x"};
} else if (params.inputs[0].GetDims().size() == 5) {
idx_order = {"fo_b", "fo_f", "fo_z", "fo_y", "fo_x"};
if (can_use_vector) {
if (params.inputs[0].GetDims().size() <= 4) {
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM)",
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM",
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
"x % OUTPUT_SIZE_X"};
} else if (params.inputs[0].GetDims().size() == 5) {
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
"x % OUTPUT_SIZE_X"};
}
} else {
if (params.inputs[0].GetDims().size() <= 4) {
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM)",
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM",
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
"(x + i) % OUTPUT_SIZE_X"};
} else if (params.inputs[0].GetDims().size() == 5) {
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
"(x + i) % OUTPUT_SIZE_X"};
}
}
FusedOpsConfiguration conf =
{"", idx_order, "v", input_dt, 4, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED, IndexType::TENSOR_COORD};
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
FusedOpsConfiguration conf_vector = {"_VECTOR",
idx_order,
"v",
input_dt,
4,
LoadType::LT_UNALIGNED,
BoundaryCheck::DISABLED,
IndexType::TENSOR_COORD,
Tensor::DataChannelName::X};
FusedOpsConfiguration conf_scalar = {"_SCALAR",
idx_order,
"v[i]",
input_dt,
1,
LoadType::LT_UNALIGNED,
BoundaryCheck::DISABLED,
IndexType::TENSOR_COORD};
jit.Merge(MakeFusedOpsJitConstants(params, {conf_vector, conf_scalar}));
}
jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));

View File

@ -35,7 +35,9 @@ protected:
bool Validate(const Params& p, const optional_params& o) const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
std::vector<FusedOpType> GetSupportedFusedOps() const override {
return { FusedOpType::QUANTIZE };
return {FusedOpType::QUANTIZE,
FusedOpType::SCALE,
FusedOpType::ACTIVATION};
}
};
} // namespace kernel_selector

View File

@ -29,7 +29,9 @@ public:
ParamsKey GetSupportedKey() const override;
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
std::vector<FusedOpType> GetSupportedFusedOps() const override {
return { FusedOpType::QUANTIZE };
return {FusedOpType::QUANTIZE,
FusedOpType::SCALE,
FusedOpType::ACTIVATION};
}
};
} // namespace kernel_selector

View File

@ -18,7 +18,7 @@
#include "include/data_types.cl"
KERNEL(activation)(
__global INPUT0_TYPE* input,
__global INPUT0_TYPE* input,
__global OUTPUT_TYPE* output
#if HAS_FUSED_OPS_DECLS
, FUSED_OPS_DECLS
@ -26,21 +26,8 @@ KERNEL(activation)(
)
{
const unsigned int x = (uint)get_global_id(0) * NUM_COLS_WI;
#if OUTPUT_DIMS == 5
const unsigned int fo_x = x % OUTPUT_SIZE_X;
const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
const unsigned int fo_z = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM;
const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM);
#elif OUTPUT_DIMS == 4
const unsigned int fo_x = x % OUTPUT_SIZE_X;
const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM);
#endif
unsigned int input_offset = x + INPUT0_OFFSET;
unsigned int output_offset = x + OUTPUT_OFFSET;
unsigned int input_offset = x + INPUT0_OFFSET;
unsigned int output_offset = x + OUTPUT_OFFSET;
typedef CAT(INPUT0_TYPE, 4) input_t;
typedef CAT(OUTPUT_TYPE, 4) output_t;
@ -48,9 +35,19 @@ KERNEL(activation)(
input_t v = ((__global input_t*) (input + input_offset))[0];
v = ACTIVATION_KERNEL(v, ACTIVATION_PARAMS_KERNEL);
#if HAS_FUSED_OPS
FUSED_OPS;
*((__global output_t*)(output + output_offset)) = FUSED_OPS_RESULT;
output_t result;
#if !CAN_USE_VECTOR
for (int i = 0; i < 4; i++) {
FUSED_OPS_SCALAR;
result[i] = FUSED_OPS_RESULT_SCALAR;
}
#else
FUSED_OPS_VECTOR;
result = FUSED_OPS_RESULT_VECTOR;
#endif
*((__global output_t*)(output + output_offset)) = result;
#else
*((__global output_t*)(output + output_offset)) = v;
#endif

View File

@ -180,8 +180,10 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
// - primitives input cannot be output
// - no activation additional input
// - input was optimized
// - can't have fused primitives
if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
node.get_dependencies().size() != 1 || input.can_be_optimized() || node.is_constant())
node.get_dependencies().size() != 1 || input.can_be_optimized() || node.is_constant() ||
node.has_fused_primitives())
return;
// - limit to primitives which implementations support activation fusing
@ -353,6 +355,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
should_fuse |= input_data.is_type<deconvolution>();
should_fuse |= input_data.is_type<activation>();
if (!should_fuse)
return;
@ -390,6 +394,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
should_fuse |= input_data.is_type<deconvolution>();
should_fuse |= input_data.is_type<activation>();
if (!should_fuse)
return;

View File

@ -2932,7 +2932,6 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu,
lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"},
}), );
/* ----------------------------------------------------------------------------------------------------- */
/* -------------------------------- Activation cases --------------------------------------------------- */
/* ----------------------------------------------------------------------------------------------------- */
@ -2940,43 +2939,62 @@ struct activation_test_params {
tensor input_size;
data_types input_type;
format input_format;
activation_func activation_function;
activation_additional_params additional_params;
data_types default_type;
format default_format;
size_t expected_fused_primitives;
size_t expected_not_fused_primitives;
std::string kernel_name;
};
#define CASE_ACTIVATION_F32_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_2 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_3 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_4 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_5 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_6 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F32_1 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F32_2 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F32_3 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_1 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_2 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_3 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_4 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_5 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_6 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F16_1 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F16_2 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F16_3 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_U8_1 {1, 16, 8, 8}, data_types::u8, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_U8_2 {1, 16, 8, 8}, data_types::u8, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_U8_1 {1, 16, 8, 8, 8}, data_types::u8, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_0 {7, 32, 3, 3}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_2 {7, 3, 7, 7}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_3 {1, 14, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_4 {1, 17, 31, 29}, data_types::f32, format::yxfb, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_5 {1, 17, 31, 29}, data_types::f32, format::byxf_af32, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_6 {1, 17, 31, 29}, data_types::f32, format::b_fs_yx_fsv32, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F32_7 {1, 17, 31, 29}, data_types::f32, format::fyxb, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F32_0 {3, 16, 13, 13, 13}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F32_1 {2, 16, 8, 8, 8}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F32_2 {1, 16, 7, 7, 7}, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F32_3 {1, 17, 7, 7, 7}, data_types::f32, format::b_fs_zyx_fsv32, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F32_4 {1, 17, 7, 7, 7}, data_types::f32, format::bs_fs_yx_bsv16_fsv16, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F32_5 {1, 17, 7, 7, 7}, data_types::f32, format::fs_b_yx_fsv32, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F32_6 {1, 17, 7, 7, 7}, data_types::f32, format::fs_bs_yx_bsv4_fsv32, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_F16_0 {7, 32, 5, 5}, data_types::f16, format::bfyx, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_1 {1, 16, 8, 8}, data_types::f16, format::bfyx, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_2 {7, 16, 7, 7}, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_3 {1, 14, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_4 {1, 17, 31, 29}, data_types::f16, format::yxfb, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_5 {1, 17, 31, 29}, data_types::f16, format::byxf_af32, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_6 {1, 17, 31, 29}, data_types::f16, format::b_fs_yx_fsv32, data_types::f32, format::bfyx
#define CASE_ACTIVATION_F16_7 {1, 17, 31, 29}, data_types::f16, format::fyxb, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_F16_0 {3, 16, 13, 13, 13}, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F16_1 {2, 16, 8, 8, 8}, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F16_2 {1, 16, 7, 7, 7}, data_types::f16, format::b_fs_zyx_fsv16, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F16_3 {1, 17, 7, 7, 7}, data_types::f16, format::b_fs_zyx_fsv32, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F16_4 {1, 17, 7, 7, 7}, data_types::f16, format::bs_fs_yx_bsv16_fsv16, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F16_5 {1, 17, 7, 7, 7}, data_types::f16, format::fs_b_yx_fsv32, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_3D_F16_6 {1, 17, 7, 7, 7}, data_types::f16, format::fs_bs_yx_bsv4_fsv32, data_types::f32, format::bfzyx
#define CASE_ACTIVATION_U8_1 {1, 16, 8, 8}, data_types::u8, format::bfyx, data_types::f32, format::bfyx
#define CASE_ACTIVATION_U8_2 {1, 12, 8, 8}, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_ACTIVATION_I8_1 {1, 16, 8, 8}, data_types::i8, format::bfyx, data_types::f32, format::bfyx
#define CASE_ACTIVATION_I8_2 {1, 14, 8, 8}, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
#define CASE_ACTIVATION_3D_I8_1 {1, 17, 8, 8, 8}, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
class ActivationFusingTest : public ::BaseFusingTest<activation_test_params> {
public:
void execute(activation_test_params& p) {
auto input_prim = get_mem(get_input_layout(p));
build_options options;
implementation_desc activation_impl = {p.input_format, p.kernel_name};
options.set_option(build_option::optimize_data(true));
options.set_option(build_option::force_implementations({{"act", activation_impl}}));
network network_fused(this->engine, this->topology_fused, options);
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
network network_fused(this->engine, this->topology_fused, bo_fused);
network_fused.set_input_data("input", input_prim);
network_not_fused.set_input_data("input", input_prim);
@ -2989,8 +3007,7 @@ public:
layout get_per_channel_layout(activation_test_params& p) {
return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
}
activation_func get_activation_function(activation_test_params& p) { return p.activation_function; }
activation_additional_params get_activation_additional_params(activation_test_params& p) { return p.additional_params; }
format get_input_format(activation_test_params &p) { return p.input_format; }
};
@ -2998,68 +3015,97 @@ class activation_quantize_i8 : public ActivationFusingTest {};
TEST_P(activation_quantize_i8, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
activation("act", "input", activation_func::relu),
data("in_low", get_mem(get_single_element_layout(p), min_random, 0)),
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
tolerance = 1.f;
tolerance = 1.0f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(
fusings_gpu,
activation_quantize_i8,
::testing::ValuesIn(std::vector<activation_test_params>{
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_4, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_5, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_6, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
}), );
INSTANTIATE_TEST_CASE_P(
DISABLED_fusings_gpu,
activation_quantize_i8,
::testing::ValuesIn(std::vector<activation_test_params>{
// fp16 cases
activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_4, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_5, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_6, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
// u8 cases
activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
activation_test_params{CASE_ACTIVATION_U8_2, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
}), );
class activation_opt_quantize_i8 : public ActivationFusingTest {};
TEST_P(activation_opt_quantize_i8, basic) {
TEST_P(activation_quantize_i8, per_channel) {
auto p = GetParam();
implementation_desc activation_impl = {get_input_format(p), "activation_opt"};
this->bo_fused.set_option(build_option::force_implementations({{"act", activation_impl}}));
create_topologies(input_layout("input", get_input_layout(p)),
activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
activation("act", "input", activation_func::relu),
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
tolerance = 1.0f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(
fusings_gpu,
activation_quantize_i8,
::testing::ValuesIn(std::vector<activation_test_params>{
// InputDataType = FP32
activation_test_params{CASE_ACTIVATION_F32_0, 2, 3, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 3, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F32_0, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_2, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_3, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_4, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3, "activation_ref"},
}), );
INSTANTIATE_TEST_CASE_P(
DISABLED_fusings_gpu,
activation_quantize_i8,
::testing::ValuesIn(std::vector<activation_test_params>{
activation_test_params{CASE_ACTIVATION_F32_5, 2, 3, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_F32_6, 2, 3, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_F32_7, 2, 3, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F32_5, 2, 3, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F32_6, 2, 3, "activation_ref"}, // FIXME - accuracy bug
}), );
class activation_scale_activation_quantize_i8 : public ActivationFusingTest {};
TEST_P(activation_scale_activation_quantize_i8, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
activation("act", "input", activation_func::relu),
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
data("in_low", get_mem(get_single_element_layout(p), min_random, 0)),
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
data("out_low", get_mem(get_single_element_layout(p), 0)),
data("out_high", get_mem(get_single_element_layout(p), 255)),
scale("scale", "act", "scale_data"),
activation("act2", "scale", activation_func::softsign),
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
tolerance = 1.f;
execute(p);
}
TEST_P(activation_scale_activation_quantize_i8, per_channel) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
activation("act", "input", activation_func::relu),
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
data("out_low", get_mem(get_single_element_layout(p), 0)),
data("out_high", get_mem(get_single_element_layout(p), 255)),
scale("scale", "act", "scale_data"),
activation("act2", "scale", activation_func::softsign),
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
tolerance = 1.f;
execute(p);
@ -3067,31 +3113,113 @@ TEST_P(activation_opt_quantize_i8, basic) {
INSTANTIATE_TEST_CASE_P(
fusings_gpu,
activation_opt_quantize_i8,
activation_scale_activation_quantize_i8,
::testing::ValuesIn(std::vector<activation_test_params>{
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
// InputDataType = FP32
activation_test_params{CASE_ACTIVATION_F32_0, 2, 5, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F32_1, 2, 5, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 5, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 5, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F32_0, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_1, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_2, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_3, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_4, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_5, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_6, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_7, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 5, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 5, "activation_ref"},
}), );
INSTANTIATE_TEST_CASE_P(
DISABLED_fusings_gpu,
activation_opt_quantize_i8,
activation_scale_activation_quantize_i8,
::testing::ValuesIn(std::vector<activation_test_params>{
// fp16 cases
activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_F32_5, 2, 5, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F32_6, 2, 5, "activation_ref"}, // FIXME - accuracy bug
}), );
// u8 cases
activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
class activation_scale_activation : public ActivationFusingTest {};
TEST_P(activation_scale_activation, basic) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
activation("act", "input", activation_func::relu),
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
scale("scale", "act", "scale_data"),
activation("act2", "scale", activation_func::exp),
reorder("reorder_bfyx", "act2", p.default_format, data_types::f32));
tolerance = 1e-05f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(
fusings_gpu,
activation_scale_activation,
::testing::ValuesIn(std::vector<activation_test_params>{
// InputDataType = FP32
activation_test_params{CASE_ACTIVATION_F32_0, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F32_1, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F32_0, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_1, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_2, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_3, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_4, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_5, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_6, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F32_7, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 4, "activation_ref"},
// InputDataType = FP16
activation_test_params{CASE_ACTIVATION_F16_0, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F16_1, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F16_0, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_F16_0, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_1, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_2, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_3, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_4, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_5, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_6, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_F16_7, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F16_0, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F16_4, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_F16_5, 2, 4, "activation_ref"},
// InputDataType = UINT8
activation_test_params{CASE_ACTIVATION_U8_1, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_U8_2, 2, 4, "activation_ref"},
// InputDataType = INT8
activation_test_params{CASE_ACTIVATION_I8_1, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_3D_I8_1, 2, 4, "activation_opt"},
activation_test_params{CASE_ACTIVATION_I8_1, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_I8_2, 2, 4, "activation_ref"},
activation_test_params{CASE_ACTIVATION_3D_I8_1, 2, 4, "activation_ref"}
}), );
INSTANTIATE_TEST_CASE_P(
DISABLED_fusings_gpu,
activation_scale_activation,
::testing::ValuesIn(std::vector<activation_test_params>{
activation_test_params{CASE_ACTIVATION_3D_F32_4, 2, 4, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F32_5, 2, 4, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F32_6, 2, 4, "activation_ref"}, // FIXME - accuracy bug
activation_test_params{CASE_ACTIVATION_3D_F16_6, 2, 4, "activation_ref"}, // FIXME - accuracy bug
}), );
/* ----------------------------------------------------------------------------------------------------- */
@ -3392,7 +3520,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
::testing::ValuesIn(std::vector<deconv_test_params>{
// Some fusings disabled under deconvolution -> convolution optimization
// Quantize fusing disabled for fp16/fp32 for performance reasons
// deconv_test_params{ CASE_DECONV_FP32_1, 4, 5 }, FIXME Failure due to activation + quantization fusing
deconv_test_params{ CASE_DECONV_FP32_1, 4, 5 },
deconv_test_params{ CASE_DECONV_FP32_2, 3, 5 },
deconv_test_params{ CASE_DECONV_FP32_3, 3, 5 },
deconv_test_params{ CASE_DECONV_FP32_4, 3, 5 },
@ -3438,7 +3566,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
deconv_test_params{ CASE_DECONV_FP32_3D_8, 3, 5 },
// FIXME no quantize implementation for bs_fs_yx_bsv16_fsv16 format AND add_required_reorders pass completely ruins data types
// add_required_reorders pass tries to reorder everything to output type if no format exists, this ruins fp32 -> int8 quantize
// deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
//deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
deconv_test_params{ CASE_DECONV_FP16_3D_1, 3, 5 },
deconv_test_params{ CASE_DECONV_FP16_3D_2, 3, 5 },
@ -3448,7 +3576,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
deconv_test_params{ CASE_DECONV_FP16_3D_6, 3, 5 },
deconv_test_params{ CASE_DECONV_FP16_3D_7, 3, 5 },
deconv_test_params{ CASE_DECONV_FP16_3D_8, 3, 5 },
// deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
//deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
@ -3505,7 +3633,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
::testing::ValuesIn(std::vector<deconv_test_params>{
// Some fusings disabled under deconvolution -> convolution optimization
// Quantize fusing disabled for fp16/fp32 for performance reasons
// deconv_test_params{ CASE_DECONV_FP32_1, 7, 9 }, FIXME Failure due to activation + quantization fusing
deconv_test_params{ CASE_DECONV_FP32_1, 7, 9 },
deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
@ -3541,7 +3669,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
// deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 }, FIXME Failure due to activation + quantization fusing
deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },