[GPU] enable validate after adding reorder (#9594)

This commit is contained in:
Kelvin Choi 2022-01-27 13:08:35 +09:00 committed by GitHub
parent c7eeda0247
commit 00b7f58152
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 75 additions and 34 deletions

View File

@ -30,6 +30,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_
auto new_reorder = std::make_shared<reorder>(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout);
auto& new_reorder_node = p.get_or_create(new_reorder);
new_reorder_node.set_output_layout(reorder_layout, false);
// ToDo: add a method to program class which adds an intermediate node given a node and its user
auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);

View File

@ -56,4 +56,13 @@ KernelsData ActivationKernelRef::GetKernelsData(const Params& params, const opti
KernelsPriority ActivationKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
}
bool ActivationKernelRef::Validate(const Params& p, const optional_params& o) const {
if (!Parent::Validate(p, o)) return false;
const auto& params = static_cast<const activation_params&>(p);
if (params.inputs[0].GetDims().size() != params.output.GetDims().size())
return false;
return true;
}
} // namespace kernel_selector

View File

@ -11,6 +11,9 @@
namespace kernel_selector {
class ActivationKernelRef : public ActivationKernelBase {
public:
using Parent = ActivationKernelBase;
using Parent::Parent;
ActivationKernelRef() : ActivationKernelBase("activation_ref") {}
virtual ~ActivationKernelRef() {}
@ -23,5 +26,7 @@ public:
FusedOpType::SCALE,
FusedOpType::ACTIVATION};
}
bool Validate(const Params& p, const optional_params& o) const override;
};
} // namespace kernel_selector

View File

@ -113,6 +113,29 @@ public:
}
};
class ConvReorderFusingTest : public BaseFusingTest<convolution_test_params> {
public:
void execute(convolution_test_params& p) {
auto input_prim = get_mem(get_input_layout(p));
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
network network_fused(this->engine, this->topology_fused, bo_fused);
network_fused.set_input_data("input", input_prim);
network_not_fused.set_input_data("input", input_prim);
compare(network_not_fused, network_fused, p, true);
}
layout get_input_layout(convolution_test_params& p) {
auto pad = p.pad;
std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
}
layout get_per_channel_layout(convolution_test_params& p) {
return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
}
};
class ConvEltwTest : public ::BaseFusingTest<conv_eltw_test_params> {
public:
@ -2543,10 +2566,14 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp16_scale, ::testing::ValuesIn(std::
bc_force_kernel_params{ CASE_CONV_FP16_13, 2, 3, "convolution_gpu_fs_byx_fsv32" },
}));
// reorder(bfyx to fs_b_yx_fsv32) + conv
/* ----------------------------------------------------------------------------------------------------- */
/* ---------------------- reorder(bfyx to fs_b_yx_fsv32) + convolution kernel cases -------------------- */
/* ----------------------------------------------------------------------------------------------------- */
#define FSV32_CASE_CONV_FP32_1 { 1, 32, 4, 5 }, { 1, 32, 2, 3 }, { 1, 1, 3, 3 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvFusingTest {};
// 'reorder_fsv32' is being removed from "remove_redundant_reorders" in the current impl
class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvReorderFusingTest {};
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) {
auto p = GetParam();
@ -2554,21 +2581,22 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) {
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p), -127, 127)),
reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32),
convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
activation("activation", "conv_output", activation_func::abs)
convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
activation("activation", "conv_prim", activation_func::abs),
reorder("reorder_out", "activation", format::bfyx, data_types::f32)
);
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_basic, ::testing::ValuesIn(std::vector<convolution_test_params>{
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 3 }
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 4 }
}));
class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvFusingTest {};
// 'reorder_fsv32' is not being fused in the current impl, since it has 'mean'
class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvReorderFusingTest {};
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) {
auto p = GetParam();
memory::ptr mul = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } });
@ -2579,21 +2607,21 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) {
data("mul", mul),
data("weights", get_mem(get_weights_layout(p), -127, 127)),
reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32, "mul", reorder_mean_mode::mul),
convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
activation("activation", "conv_output", activation_func::abs)
convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
activation("activation", "conv_prim", activation_func::abs)
);
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_mean, ::testing::ValuesIn(std::vector<convolution_test_params>{
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 3 }
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 4 }
}));
class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvFusingTest {};
// 'reorder_fsv32' is not being fused in the current impl, since it has 'subtract'
class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvReorderFusingTest {};
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) {
auto p = GetParam();
const std::vector<float>& values_to_subtract = {
@ -2603,7 +2631,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
0.1f, 0.2f, 0.1f, 0.1f, 0.1f, 0.2f, 0.1f, 0.1f
};
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
auto dw_stride = tensor{ 0, 0, 1, 1 };
@ -2613,12 +2641,11 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract),
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
activation("activation", "conv_output", activation_func::abs)
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation)
);
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
execute(p);
}
@ -2626,12 +2653,12 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_subtr
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 4 }
}));
class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvFusingTest {};
// 'reorder_fsv32' is not being fused in the current impl, since it has 'fused_activation'
class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvReorderFusingTest {};
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activation) {
auto p = GetParam();
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
auto dw_stride = tensor{ 0, 0, 1, 1 };
@ -2642,25 +2669,26 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32),
activation("activation_quantize", "reorder_fsv32", activation_func::relu),
convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
activation("activation", "conv_output", activation_func::abs)
convolution("conv_prim2", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
activation("activation", "conv_prim2", activation_func::abs)
);
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } }));
bo_fused.set_option(build_option::force_implementations({ { "activation", conv_impl } }));
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, ::testing::ValuesIn(std::vector<convolution_test_params>{
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 5 }
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 6 }
}));
class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvFusingTest {};
// 'reorder_fsv32' is being fused even if it has 'padding'
class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvReorderFusingTest {};
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
auto p = GetParam();
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
auto dw_stride = tensor{ 0, 0, 1, 1 };
@ -2670,19 +2698,17 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ { 0, 0, 1, 1 }, 0 })),
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
activation("activation", "conv_output", activation_func::abs),
activation("activation2", "conv_prim", activation_func::abs),
eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum)
convolution("conv_prim2", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
reorder("reorder_out", "conv_prim2", format::fs_b_yx_fsv32, data_types::f32)
);
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } }));
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, ::testing::ValuesIn(std::vector<convolution_test_params>{
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 6 }
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 5 }
}));
#ifdef ENABLE_ONEDNN_FOR_GPU