[IE CLDNN] Activation with fused quantize bug fix (#613)
fixed bug connected with quantization fusing to activation added scale and activation fusing support added corresponding tests
This commit is contained in:
parent
cdd31da1c7
commit
3ea1657e4f
@ -24,6 +24,7 @@ ParamsKey ActivationKernelOpt::GetSupportedKey() const {
|
|||||||
k.EnableInputDataType(Datatype::INT32);
|
k.EnableInputDataType(Datatype::INT32);
|
||||||
k.EnableInputDataType(Datatype::F16);
|
k.EnableInputDataType(Datatype::F16);
|
||||||
k.EnableInputDataType(Datatype::F32);
|
k.EnableInputDataType(Datatype::F32);
|
||||||
|
k.EnableOutputDataType(Datatype::UINT8);
|
||||||
k.EnableOutputDataType(Datatype::INT8);
|
k.EnableOutputDataType(Datatype::INT8);
|
||||||
k.EnableOutputDataType(Datatype::INT32);
|
k.EnableOutputDataType(Datatype::INT32);
|
||||||
k.EnableOutputDataType(Datatype::F16);
|
k.EnableOutputDataType(Datatype::F16);
|
||||||
@ -81,8 +82,7 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!params.fused_ops.empty() &&
|
if (!params.fused_ops.empty() &&
|
||||||
((params.output.GetLayout() != DataLayout::bfyx && params.output.GetLayout() != DataLayout::bfzyx) ||
|
(params.output.GetLayout() != DataLayout::bfyx && params.output.GetLayout() != DataLayout::bfzyx))
|
||||||
((params.output.X().v * params.output.Y().v) % 4 != 0)))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -94,15 +94,56 @@ JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& param
|
|||||||
|
|
||||||
jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
|
jit.AddConstant(MakeJitConstant("NUM_COLS_WI", NUM_COLS_WI));
|
||||||
if (!params.fused_ops.empty()) {
|
if (!params.fused_ops.empty()) {
|
||||||
|
bool can_use_vector = params.inputs[0].X().v % 4 == 0;
|
||||||
|
jit.AddConstant(MakeJitConstant("CAN_USE_VECTOR", can_use_vector));
|
||||||
|
|
||||||
std::vector<std::string> idx_order;
|
std::vector<std::string> idx_order;
|
||||||
if (params.inputs[0].GetDims().size() <= 4) {
|
|
||||||
idx_order = {"fo_b", "fo_f", "fo_y", "fo_x"};
|
if (can_use_vector) {
|
||||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
if (params.inputs[0].GetDims().size() <= 4) {
|
||||||
idx_order = {"fo_b", "fo_f", "fo_z", "fo_y", "fo_x"};
|
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM)",
|
||||||
|
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM",
|
||||||
|
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||||
|
"x % OUTPUT_SIZE_X"};
|
||||||
|
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||||
|
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
|
||||||
|
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
|
||||||
|
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||||
|
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||||
|
"x % OUTPUT_SIZE_X"};
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (params.inputs[0].GetDims().size() <= 4) {
|
||||||
|
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM)",
|
||||||
|
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM",
|
||||||
|
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||||
|
"(x + i) % OUTPUT_SIZE_X"};
|
||||||
|
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||||
|
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
|
||||||
|
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
|
||||||
|
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||||
|
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||||
|
"(x + i) % OUTPUT_SIZE_X"};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
FusedOpsConfiguration conf =
|
FusedOpsConfiguration conf_vector = {"_VECTOR",
|
||||||
{"", idx_order, "v", input_dt, 4, LoadType::LT_UNALIGNED, BoundaryCheck::DISABLED, IndexType::TENSOR_COORD};
|
idx_order,
|
||||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
"v",
|
||||||
|
input_dt,
|
||||||
|
4,
|
||||||
|
LoadType::LT_UNALIGNED,
|
||||||
|
BoundaryCheck::DISABLED,
|
||||||
|
IndexType::TENSOR_COORD,
|
||||||
|
Tensor::DataChannelName::X};
|
||||||
|
FusedOpsConfiguration conf_scalar = {"_SCALAR",
|
||||||
|
idx_order,
|
||||||
|
"v[i]",
|
||||||
|
input_dt,
|
||||||
|
1,
|
||||||
|
LoadType::LT_UNALIGNED,
|
||||||
|
BoundaryCheck::DISABLED,
|
||||||
|
IndexType::TENSOR_COORD};
|
||||||
|
jit.Merge(MakeFusedOpsJitConstants(params, {conf_vector, conf_scalar}));
|
||||||
}
|
}
|
||||||
jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));
|
jit.Merge(MakeActivationJitConstants(params.activations, input_dt, "_KERNEL"));
|
||||||
|
|
||||||
|
@ -35,7 +35,9 @@ protected:
|
|||||||
bool Validate(const Params& p, const optional_params& o) const override;
|
bool Validate(const Params& p, const optional_params& o) const override;
|
||||||
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
|
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
|
||||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||||
return { FusedOpType::QUANTIZE };
|
return {FusedOpType::QUANTIZE,
|
||||||
|
FusedOpType::SCALE,
|
||||||
|
FusedOpType::ACTIVATION};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace kernel_selector
|
} // namespace kernel_selector
|
||||||
|
@ -29,7 +29,9 @@ public:
|
|||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
|
JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
|
||||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||||
return { FusedOpType::QUANTIZE };
|
return {FusedOpType::QUANTIZE,
|
||||||
|
FusedOpType::SCALE,
|
||||||
|
FusedOpType::ACTIVATION};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace kernel_selector
|
} // namespace kernel_selector
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
#include "include/data_types.cl"
|
#include "include/data_types.cl"
|
||||||
|
|
||||||
KERNEL(activation)(
|
KERNEL(activation)(
|
||||||
__global INPUT0_TYPE* input,
|
__global INPUT0_TYPE* input,
|
||||||
__global OUTPUT_TYPE* output
|
__global OUTPUT_TYPE* output
|
||||||
#if HAS_FUSED_OPS_DECLS
|
#if HAS_FUSED_OPS_DECLS
|
||||||
, FUSED_OPS_DECLS
|
, FUSED_OPS_DECLS
|
||||||
@ -26,21 +26,8 @@ KERNEL(activation)(
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
const unsigned int x = (uint)get_global_id(0) * NUM_COLS_WI;
|
const unsigned int x = (uint)get_global_id(0) * NUM_COLS_WI;
|
||||||
#if OUTPUT_DIMS == 5
|
unsigned int input_offset = x + INPUT0_OFFSET;
|
||||||
const unsigned int fo_x = x % OUTPUT_SIZE_X;
|
unsigned int output_offset = x + OUTPUT_OFFSET;
|
||||||
const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
|
|
||||||
const unsigned int fo_z = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z;
|
|
||||||
const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM;
|
|
||||||
const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM);
|
|
||||||
#elif OUTPUT_DIMS == 4
|
|
||||||
const unsigned int fo_x = x % OUTPUT_SIZE_X;
|
|
||||||
const unsigned int fo_y = x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
|
|
||||||
const unsigned int fo_f = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_FEATURE_NUM;
|
|
||||||
const unsigned int fo_b = x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_FEATURE_NUM);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
unsigned int input_offset = x + INPUT0_OFFSET;
|
|
||||||
unsigned int output_offset = x + OUTPUT_OFFSET;
|
|
||||||
|
|
||||||
typedef CAT(INPUT0_TYPE, 4) input_t;
|
typedef CAT(INPUT0_TYPE, 4) input_t;
|
||||||
typedef CAT(OUTPUT_TYPE, 4) output_t;
|
typedef CAT(OUTPUT_TYPE, 4) output_t;
|
||||||
@ -48,9 +35,19 @@ KERNEL(activation)(
|
|||||||
input_t v = ((__global input_t*) (input + input_offset))[0];
|
input_t v = ((__global input_t*) (input + input_offset))[0];
|
||||||
|
|
||||||
v = ACTIVATION_KERNEL(v, ACTIVATION_PARAMS_KERNEL);
|
v = ACTIVATION_KERNEL(v, ACTIVATION_PARAMS_KERNEL);
|
||||||
|
|
||||||
#if HAS_FUSED_OPS
|
#if HAS_FUSED_OPS
|
||||||
FUSED_OPS;
|
output_t result;
|
||||||
*((__global output_t*)(output + output_offset)) = FUSED_OPS_RESULT;
|
#if !CAN_USE_VECTOR
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
FUSED_OPS_SCALAR;
|
||||||
|
result[i] = FUSED_OPS_RESULT_SCALAR;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
FUSED_OPS_VECTOR;
|
||||||
|
result = FUSED_OPS_RESULT_VECTOR;
|
||||||
|
#endif
|
||||||
|
*((__global output_t*)(output + output_offset)) = result;
|
||||||
#else
|
#else
|
||||||
*((__global output_t*)(output + output_offset)) = v;
|
*((__global output_t*)(output + output_offset)) = v;
|
||||||
#endif
|
#endif
|
||||||
|
@ -180,8 +180,10 @@ void prepare_primitive_fusing::fuse_activations(program_impl &p) {
|
|||||||
// - primitives input cannot be output
|
// - primitives input cannot be output
|
||||||
// - no activation additional input
|
// - no activation additional input
|
||||||
// - input was optimized
|
// - input was optimized
|
||||||
|
// - can't have fused primitives
|
||||||
if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
|
if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
|
||||||
node.get_dependencies().size() != 1 || input.can_be_optimized() || node.is_constant())
|
node.get_dependencies().size() != 1 || input.can_be_optimized() || node.is_constant() ||
|
||||||
|
node.has_fused_primitives())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// - limit to primitives which implementations support activation fusing
|
// - limit to primitives which implementations support activation fusing
|
||||||
@ -353,6 +355,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
|||||||
|
|
||||||
should_fuse |= input_data.is_type<deconvolution>();
|
should_fuse |= input_data.is_type<deconvolution>();
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<activation>();
|
||||||
|
|
||||||
if (!should_fuse)
|
if (!should_fuse)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -390,6 +394,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
|||||||
|
|
||||||
should_fuse |= input_data.is_type<deconvolution>();
|
should_fuse |= input_data.is_type<deconvolution>();
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<activation>();
|
||||||
|
|
||||||
if (!should_fuse)
|
if (!should_fuse)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -2932,7 +2932,6 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
|||||||
lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"},
|
lrn_test_params{CASE_LRN_FP16_5, 2, 4, lrn_norm_region_across_channel, "lrn_gpu_across_channel_multiple_features"},
|
||||||
}), );
|
}), );
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------------------------------------- */
|
||||||
/* -------------------------------- Activation cases --------------------------------------------------- */
|
/* -------------------------------- Activation cases --------------------------------------------------- */
|
||||||
/* ----------------------------------------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------------------------------------- */
|
||||||
@ -2940,43 +2939,62 @@ struct activation_test_params {
|
|||||||
tensor input_size;
|
tensor input_size;
|
||||||
data_types input_type;
|
data_types input_type;
|
||||||
format input_format;
|
format input_format;
|
||||||
activation_func activation_function;
|
|
||||||
activation_additional_params additional_params;
|
|
||||||
data_types default_type;
|
data_types default_type;
|
||||||
format default_format;
|
format default_format;
|
||||||
size_t expected_fused_primitives;
|
size_t expected_fused_primitives;
|
||||||
size_t expected_not_fused_primitives;
|
size_t expected_not_fused_primitives;
|
||||||
|
std::string kernel_name;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CASE_ACTIVATION_F32_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_0 {7, 32, 3, 3}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_F32_2 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_1 {1, 16, 8, 8}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_F32_3 {1, 16, 8, 8}, data_types::f32, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_2 {7, 3, 7, 7}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_F32_4 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_3 {1, 14, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_F32_5 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_4 {1, 17, 31, 29}, data_types::f32, format::yxfb, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_F32_6 {1, 16, 8, 8}, data_types::f32, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_5 {1, 17, 31, 29}, data_types::f32, format::byxf_af32, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_3D_F32_1 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_6 {1, 17, 31, 29}, data_types::f32, format::b_fs_yx_fsv32, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_3D_F32_2 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F32_7 {1, 17, 31, 29}, data_types::f32, format::fyxb, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_3D_F32_3 {1, 16, 8, 8, 8}, data_types::f32, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_0 {3, 16, 13, 13, 13}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_F16_1 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_1 {2, 16, 8, 8, 8}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_F16_2 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_2 {1, 16, 7, 7, 7}, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_F16_3 {1, 16, 8, 8}, data_types::f16, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_3 {1, 17, 7, 7, 7}, data_types::f32, format::b_fs_zyx_fsv32, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_F16_4 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_4 {1, 17, 7, 7, 7}, data_types::f32, format::bs_fs_yx_bsv16_fsv16, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_F16_5 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::hard_sigmoid, {1.f, 1.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_5 {1, 17, 7, 7, 7}, data_types::f32, format::fs_b_yx_fsv32, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_F16_6 {1, 16, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_3D_F32_6 {1, 17, 7, 7, 7}, data_types::f32, format::fs_bs_yx_bsv4_fsv32, data_types::f32, format::bfzyx
|
||||||
#define CASE_ACTIVATION_3D_F16_1 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hyperbolic_tan, {0.f, 0.f}, data_types::f32, format::bfyx
|
|
||||||
#define CASE_ACTIVATION_3D_F16_2 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::hard_sigmoid, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F16_0 {7, 32, 5, 5}, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_3D_F16_3 {1, 16, 8, 8, 8}, data_types::f16, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F16_1 {1, 16, 8, 8}, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_U8_1 {1, 16, 8, 8}, data_types::u8, format::bfyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F16_2 {7, 16, 7, 7}, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_U8_2 {1, 16, 8, 8}, data_types::u8, format::b_fs_yx_fsv16, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F16_3 {1, 14, 8, 8}, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||||
#define CASE_ACTIVATION_3D_U8_1 {1, 16, 8, 8, 8}, data_types::u8, format::bfzyx, activation_func::relu, {0.f, 0.f}, data_types::f32, format::bfyx
|
#define CASE_ACTIVATION_F16_4 {1, 17, 31, 29}, data_types::f16, format::yxfb, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_F16_5 {1, 17, 31, 29}, data_types::f16, format::byxf_af32, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_F16_6 {1, 17, 31, 29}, data_types::f16, format::b_fs_yx_fsv32, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_F16_7 {1, 17, 31, 29}, data_types::f16, format::fyxb, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_0 {3, 16, 13, 13, 13}, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_1 {2, 16, 8, 8, 8}, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_2 {1, 16, 7, 7, 7}, data_types::f16, format::b_fs_zyx_fsv16, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_3 {1, 17, 7, 7, 7}, data_types::f16, format::b_fs_zyx_fsv32, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_4 {1, 17, 7, 7, 7}, data_types::f16, format::bs_fs_yx_bsv16_fsv16, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_5 {1, 17, 7, 7, 7}, data_types::f16, format::fs_b_yx_fsv32, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_ACTIVATION_3D_F16_6 {1, 17, 7, 7, 7}, data_types::f16, format::fs_bs_yx_bsv4_fsv32, data_types::f32, format::bfzyx
|
||||||
|
|
||||||
|
#define CASE_ACTIVATION_U8_1 {1, 16, 8, 8}, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_U8_2 {1, 12, 8, 8}, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_I8_1 {1, 16, 8, 8}, data_types::i8, format::bfyx, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_I8_2 {1, 14, 8, 8}, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||||
|
#define CASE_ACTIVATION_3D_I8_1 {1, 17, 8, 8, 8}, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx
|
||||||
|
|
||||||
class ActivationFusingTest : public ::BaseFusingTest<activation_test_params> {
|
class ActivationFusingTest : public ::BaseFusingTest<activation_test_params> {
|
||||||
public:
|
public:
|
||||||
void execute(activation_test_params& p) {
|
void execute(activation_test_params& p) {
|
||||||
auto input_prim = get_mem(get_input_layout(p));
|
auto input_prim = get_mem(get_input_layout(p));
|
||||||
|
|
||||||
|
build_options options;
|
||||||
|
implementation_desc activation_impl = {p.input_format, p.kernel_name};
|
||||||
|
options.set_option(build_option::optimize_data(true));
|
||||||
|
options.set_option(build_option::force_implementations({{"act", activation_impl}}));
|
||||||
|
network network_fused(this->engine, this->topology_fused, options);
|
||||||
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
|
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
|
||||||
network network_fused(this->engine, this->topology_fused, bo_fused);
|
|
||||||
|
|
||||||
network_fused.set_input_data("input", input_prim);
|
network_fused.set_input_data("input", input_prim);
|
||||||
network_not_fused.set_input_data("input", input_prim);
|
network_not_fused.set_input_data("input", input_prim);
|
||||||
@ -2989,8 +3007,7 @@ public:
|
|||||||
layout get_per_channel_layout(activation_test_params& p) {
|
layout get_per_channel_layout(activation_test_params& p) {
|
||||||
return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
|
return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
|
||||||
}
|
}
|
||||||
activation_func get_activation_function(activation_test_params& p) { return p.activation_function; }
|
|
||||||
activation_additional_params get_activation_additional_params(activation_test_params& p) { return p.additional_params; }
|
|
||||||
format get_input_format(activation_test_params &p) { return p.input_format; }
|
format get_input_format(activation_test_params &p) { return p.input_format; }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2998,68 +3015,97 @@ class activation_quantize_i8 : public ActivationFusingTest {};
|
|||||||
TEST_P(activation_quantize_i8, basic) {
|
TEST_P(activation_quantize_i8, basic) {
|
||||||
auto p = GetParam();
|
auto p = GetParam();
|
||||||
create_topologies(input_layout("input", get_input_layout(p)),
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
|
activation("act", "input", activation_func::relu),
|
||||||
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
|
data("in_low", get_mem(get_single_element_layout(p), min_random, 0)),
|
||||||
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
|
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
|
||||||
data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
|
data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
|
||||||
data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
|
data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
|
||||||
quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
|
quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
|
||||||
reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
|
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
|
||||||
|
|
||||||
tolerance = 1.f;
|
tolerance = 1.0f;
|
||||||
execute(p);
|
execute(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
TEST_P(activation_quantize_i8, per_channel) {
|
||||||
fusings_gpu,
|
|
||||||
activation_quantize_i8,
|
|
||||||
::testing::ValuesIn(std::vector<activation_test_params>{
|
|
||||||
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F32_4, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F32_5, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F32_6, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
|
|
||||||
}), );
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
|
||||||
DISABLED_fusings_gpu,
|
|
||||||
activation_quantize_i8,
|
|
||||||
::testing::ValuesIn(std::vector<activation_test_params>{
|
|
||||||
// fp16 cases
|
|
||||||
activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F16_4, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F16_5, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_F16_6, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
|
|
||||||
|
|
||||||
// u8 cases
|
|
||||||
activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_U8_2, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
|
|
||||||
}), );
|
|
||||||
|
|
||||||
class activation_opt_quantize_i8 : public ActivationFusingTest {};
|
|
||||||
TEST_P(activation_opt_quantize_i8, basic) {
|
|
||||||
auto p = GetParam();
|
auto p = GetParam();
|
||||||
implementation_desc activation_impl = {get_input_format(p), "activation_opt"};
|
|
||||||
this->bo_fused.set_option(build_option::force_implementations({{"act", activation_impl}}));
|
|
||||||
|
|
||||||
create_topologies(input_layout("input", get_input_layout(p)),
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
activation("act", "input", get_activation_function(p), get_activation_additional_params(p)),
|
activation("act", "input", activation_func::relu),
|
||||||
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
|
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
|
||||||
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
|
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||||
data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
|
data("out_low", get_mem(get_single_element_layout(p), -127, 0)),
|
||||||
data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
|
data("out_high", get_mem(get_single_element_layout(p), 0, 127)),
|
||||||
quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
|
quantize("quant", "act", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
|
||||||
reorder("reorder_bfyx", "quant", format::bfyx, data_types::f32));
|
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1.0f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
fusings_gpu,
|
||||||
|
activation_quantize_i8,
|
||||||
|
::testing::ValuesIn(std::vector<activation_test_params>{
|
||||||
|
// InputDataType = FP32
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_0, 2, 3, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 3, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3, "activation_opt"},
|
||||||
|
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_0, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_2, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_3, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_4, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3, "activation_ref"},
|
||||||
|
}), );
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
DISABLED_fusings_gpu,
|
||||||
|
activation_quantize_i8,
|
||||||
|
::testing::ValuesIn(std::vector<activation_test_params>{
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_5, 2, 3, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_6, 2, 3, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_7, 2, 3, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_5, 2, 3, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_6, 2, 3, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
}), );
|
||||||
|
|
||||||
|
class activation_scale_activation_quantize_i8 : public ActivationFusingTest {};
|
||||||
|
TEST_P(activation_scale_activation_quantize_i8, basic) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
activation("act", "input", activation_func::relu),
|
||||||
|
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||||
|
data("in_low", get_mem(get_single_element_layout(p), min_random, 0)),
|
||||||
|
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
|
||||||
|
data("out_low", get_mem(get_single_element_layout(p), 0)),
|
||||||
|
data("out_high", get_mem(get_single_element_layout(p), 255)),
|
||||||
|
scale("scale", "act", "scale_data"),
|
||||||
|
activation("act2", "scale", activation_func::softsign),
|
||||||
|
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
|
||||||
|
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1.f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(activation_scale_activation_quantize_i8, per_channel) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
activation("act", "input", activation_func::relu),
|
||||||
|
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||||
|
data("in_low", get_mem(get_per_channel_layout(p), min_random, 0)),
|
||||||
|
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||||
|
data("out_low", get_mem(get_single_element_layout(p), 0)),
|
||||||
|
data("out_high", get_mem(get_single_element_layout(p), 255)),
|
||||||
|
scale("scale", "act", "scale_data"),
|
||||||
|
activation("act2", "scale", activation_func::softsign),
|
||||||
|
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 255, data_types::i8),
|
||||||
|
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32));
|
||||||
|
|
||||||
tolerance = 1.f;
|
tolerance = 1.f;
|
||||||
execute(p);
|
execute(p);
|
||||||
@ -3067,31 +3113,113 @@ TEST_P(activation_opt_quantize_i8, basic) {
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
fusings_gpu,
|
fusings_gpu,
|
||||||
activation_opt_quantize_i8,
|
activation_scale_activation_quantize_i8,
|
||||||
::testing::ValuesIn(std::vector<activation_test_params>{
|
::testing::ValuesIn(std::vector<activation_test_params>{
|
||||||
activation_test_params{CASE_ACTIVATION_F32_1, 2, 3},
|
// InputDataType = FP32
|
||||||
activation_test_params{CASE_ACTIVATION_F32_2, 2, 3},
|
activation_test_params{CASE_ACTIVATION_F32_0, 2, 5, "activation_opt"},
|
||||||
activation_test_params{CASE_ACTIVATION_F32_3, 2, 3},
|
activation_test_params{CASE_ACTIVATION_F32_1, 2, 5, "activation_opt"},
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 3},
|
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 5, "activation_opt"},
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 3},
|
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 5, "activation_opt"},
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F32_3, 2, 3}
|
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_0, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_1, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_2, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_3, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_4, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_5, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_6, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_7, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 5, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 5, "activation_ref"},
|
||||||
}), );
|
}), );
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
DISABLED_fusings_gpu,
|
DISABLED_fusings_gpu,
|
||||||
activation_opt_quantize_i8,
|
activation_scale_activation_quantize_i8,
|
||||||
::testing::ValuesIn(std::vector<activation_test_params>{
|
::testing::ValuesIn(std::vector<activation_test_params>{
|
||||||
// fp16 cases
|
activation_test_params{CASE_ACTIVATION_3D_F32_5, 2, 5, "activation_ref"}, // FIXME - accuracy bug
|
||||||
activation_test_params{CASE_ACTIVATION_F16_1, 2, 3},
|
activation_test_params{CASE_ACTIVATION_3D_F32_6, 2, 5, "activation_ref"}, // FIXME - accuracy bug
|
||||||
activation_test_params{CASE_ACTIVATION_F16_2, 2, 3},
|
}), );
|
||||||
activation_test_params{CASE_ACTIVATION_F16_3, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 3},
|
|
||||||
activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 3},
|
|
||||||
|
|
||||||
// u8 cases
|
class activation_scale_activation : public ActivationFusingTest {};
|
||||||
activation_test_params{CASE_ACTIVATION_U8_1, 2, 3},
|
TEST_P(activation_scale_activation, basic) {
|
||||||
activation_test_params{CASE_ACTIVATION_3D_U8_1, 2, 3}
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
activation("act", "input", activation_func::relu),
|
||||||
|
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||||
|
scale("scale", "act", "scale_data"),
|
||||||
|
activation("act2", "scale", activation_func::exp),
|
||||||
|
reorder("reorder_bfyx", "act2", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-05f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
fusings_gpu,
|
||||||
|
activation_scale_activation,
|
||||||
|
::testing::ValuesIn(std::vector<activation_test_params>{
|
||||||
|
// InputDataType = FP32
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_0, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_1, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 4, "activation_opt"},
|
||||||
|
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_0, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_1, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_2, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_3, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_4, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_5, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_6, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F32_7, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_0, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_1, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_2, 2, 4, "activation_ref"},
|
||||||
|
|
||||||
|
// InputDataType = FP16
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_0, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_1, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_0, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 4, "activation_opt"},
|
||||||
|
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_0, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_1, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_2, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_3, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_4, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_5, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_6, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_F16_7, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_0, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_1, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_2, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_3, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_4, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_5, 2, 4, "activation_ref"},
|
||||||
|
|
||||||
|
// InputDataType = UINT8
|
||||||
|
activation_test_params{CASE_ACTIVATION_U8_1, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_U8_2, 2, 4, "activation_ref"},
|
||||||
|
|
||||||
|
// InputDataType = INT8
|
||||||
|
activation_test_params{CASE_ACTIVATION_I8_1, 2, 4, "activation_opt"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_I8_1, 2, 4, "activation_opt"},
|
||||||
|
|
||||||
|
activation_test_params{CASE_ACTIVATION_I8_1, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_I8_2, 2, 4, "activation_ref"},
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_I8_1, 2, 4, "activation_ref"}
|
||||||
|
}), );
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
DISABLED_fusings_gpu,
|
||||||
|
activation_scale_activation,
|
||||||
|
::testing::ValuesIn(std::vector<activation_test_params>{
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_4, 2, 4, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_5, 2, 4, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F32_6, 2, 4, "activation_ref"}, // FIXME - accuracy bug
|
||||||
|
activation_test_params{CASE_ACTIVATION_3D_F16_6, 2, 4, "activation_ref"}, // FIXME - accuracy bug
|
||||||
}), );
|
}), );
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------------------------------------- */
|
||||||
@ -3392,7 +3520,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
|
|||||||
::testing::ValuesIn(std::vector<deconv_test_params>{
|
::testing::ValuesIn(std::vector<deconv_test_params>{
|
||||||
// Some fusings disabled under deconvolution -> convolution optimization
|
// Some fusings disabled under deconvolution -> convolution optimization
|
||||||
// Quantize fusing disabled for fp16/fp32 for performance reasons
|
// Quantize fusing disabled for fp16/fp32 for performance reasons
|
||||||
// deconv_test_params{ CASE_DECONV_FP32_1, 4, 5 }, FIXME Failure due to activation + quantization fusing
|
deconv_test_params{ CASE_DECONV_FP32_1, 4, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_2, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP32_2, 3, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP32_3, 3, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_4, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP32_4, 3, 5 },
|
||||||
@ -3438,7 +3566,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
|
|||||||
deconv_test_params{ CASE_DECONV_FP32_3D_8, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_8, 3, 5 },
|
||||||
// FIXME no quantize implementation for bs_fs_yx_bsv16_fsv16 format AND add_required_reorders pass completely ruins data types
|
// FIXME no quantize implementation for bs_fs_yx_bsv16_fsv16 format AND add_required_reorders pass completely ruins data types
|
||||||
// add_required_reorders pass tries to reorder everything to output type if no format exists, this ruins fp32 -> int8 quantize
|
// add_required_reorders pass tries to reorder everything to output type if no format exists, this ruins fp32 -> int8 quantize
|
||||||
// deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
|
//deconv_test_params{ CASE_DECONV_FP32_3D_9, 3, 5 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_1, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_1, 3, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_2, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_2, 3, 5 },
|
||||||
@ -3448,7 +3576,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_i8,
|
|||||||
deconv_test_params{ CASE_DECONV_FP16_3D_6, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_6, 3, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_7, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_7, 3, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_8, 3, 5 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_8, 3, 5 },
|
||||||
// deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
|
//deconv_test_params{ CASE_DECONV_FP16_3D_9, 3, 5 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
|
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 5 },
|
||||||
deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
|
deconv_test_params{ CASE_DECONV_U8S8_3D_2, 2, 5 },
|
||||||
@ -3505,7 +3633,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
|
|||||||
::testing::ValuesIn(std::vector<deconv_test_params>{
|
::testing::ValuesIn(std::vector<deconv_test_params>{
|
||||||
// Some fusings disabled under deconvolution -> convolution optimization
|
// Some fusings disabled under deconvolution -> convolution optimization
|
||||||
// Quantize fusing disabled for fp16/fp32 for performance reasons
|
// Quantize fusing disabled for fp16/fp32 for performance reasons
|
||||||
// deconv_test_params{ CASE_DECONV_FP32_1, 7, 9 }, FIXME Failure due to activation + quantization fusing
|
deconv_test_params{ CASE_DECONV_FP32_1, 7, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
|
||||||
@ -3541,7 +3669,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
|
|||||||
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
|
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
|
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
|
||||||
|
|
||||||
// deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 }, FIXME Failure due to activation + quantization fusing
|
deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },
|
||||||
|
Loading…
Reference in New Issue
Block a user