[GPU] enable validate after adding reorder (#9594)
This commit is contained in:
parent
c7eeda0247
commit
00b7f58152
@ -30,6 +30,7 @@ void add_required_reorders::add_reorder(program& p, program_node* node, program_
|
||||
|
||||
auto new_reorder = std::make_shared<reorder>(node->id() + "_reorder_" + usr->id(), node->id(), reorder_layout);
|
||||
auto& new_reorder_node = p.get_or_create(new_reorder);
|
||||
new_reorder_node.set_output_layout(reorder_layout, false);
|
||||
|
||||
// ToDo: add a method to program class which adds an intermediate node given a node and its user
|
||||
auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
|
||||
|
@ -56,4 +56,13 @@ KernelsData ActivationKernelRef::GetKernelsData(const Params& params, const opti
|
||||
KernelsPriority ActivationKernelRef::GetKernelsPriority(const Params& /*params*/, const optional_params& /*options*/) const {
|
||||
return DONT_USE_IF_HAVE_SOMETHING_ELSE;
|
||||
}
|
||||
|
||||
bool ActivationKernelRef::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!Parent::Validate(p, o)) return false;
|
||||
const auto& params = static_cast<const activation_params&>(p);
|
||||
if (params.inputs[0].GetDims().size() != params.output.GetDims().size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel_selector
|
||||
|
@ -11,6 +11,9 @@
|
||||
namespace kernel_selector {
|
||||
class ActivationKernelRef : public ActivationKernelBase {
|
||||
public:
|
||||
using Parent = ActivationKernelBase;
|
||||
using Parent::Parent;
|
||||
|
||||
ActivationKernelRef() : ActivationKernelBase("activation_ref") {}
|
||||
virtual ~ActivationKernelRef() {}
|
||||
|
||||
@ -23,5 +26,7 @@ public:
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ACTIVATION};
|
||||
}
|
||||
|
||||
bool Validate(const Params& p, const optional_params& o) const override;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -113,6 +113,29 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class ConvReorderFusingTest : public BaseFusingTest<convolution_test_params> {
|
||||
public:
|
||||
void execute(convolution_test_params& p) {
|
||||
auto input_prim = get_mem(get_input_layout(p));
|
||||
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, bo_fused);
|
||||
network_fused.set_input_data("input", input_prim);
|
||||
network_not_fused.set_input_data("input", input_prim);
|
||||
|
||||
compare(network_not_fused, network_fused, p, true);
|
||||
}
|
||||
|
||||
layout get_input_layout(convolution_test_params& p) {
|
||||
auto pad = p.pad;
|
||||
std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
|
||||
return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
|
||||
}
|
||||
|
||||
layout get_per_channel_layout(convolution_test_params& p) {
|
||||
return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
|
||||
}
|
||||
};
|
||||
|
||||
class ConvEltwTest : public ::BaseFusingTest<conv_eltw_test_params> {
|
||||
public:
|
||||
|
||||
@ -2543,10 +2566,14 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp16_scale, ::testing::ValuesIn(std::
|
||||
bc_force_kernel_params{ CASE_CONV_FP16_13, 2, 3, "convolution_gpu_fs_byx_fsv32" },
|
||||
}));
|
||||
|
||||
// reorder(bfyx to fs_b_yx_fsv32) + conv
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* ---------------------- reorder(bfyx to fs_b_yx_fsv32) + convolution kernel cases -------------------- */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
#define FSV32_CASE_CONV_FP32_1 { 1, 32, 4, 5 }, { 1, 32, 2, 3 }, { 1, 1, 3, 3 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
|
||||
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvFusingTest {};
|
||||
// 'reorder_fsv32' is being removed from "remove_redundant_reorders" in the current impl
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_basic : public ConvReorderFusingTest {};
|
||||
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) {
|
||||
auto p = GetParam();
|
||||
|
||||
@ -2554,21 +2581,22 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_basic, basic) {
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_weights_layout(p), -127, 127)),
|
||||
reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs)
|
||||
convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
|
||||
activation("activation", "conv_prim", activation_func::abs),
|
||||
reorder("reorder_out", "activation", format::bfyx, data_types::f32)
|
||||
);
|
||||
|
||||
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
|
||||
|
||||
execute(p);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_basic, ::testing::ValuesIn(std::vector<convolution_test_params>{
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 3 }
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 4 }
|
||||
}));
|
||||
|
||||
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvFusingTest {};
|
||||
// 'reorder_fsv32' is not being fused in the current impl, since it has 'mean'
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_mean : public ConvReorderFusingTest {};
|
||||
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) {
|
||||
auto p = GetParam();
|
||||
memory::ptr mul = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 1, 3, 1, 2 } });
|
||||
@ -2579,21 +2607,21 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_mean, have_mean) {
|
||||
data("mul", mul),
|
||||
data("weights", get_mem(get_weights_layout(p), -127, 127)),
|
||||
reorder("reorder_fsv32", "input", format::fs_b_yx_fsv32, data_types::f32, "mul", reorder_mean_mode::mul),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs)
|
||||
convolution("conv_prim", "reorder_fsv32", { "weights" }, 1, tensor{ 0, 0, 1, 1 }, p.pad, p.dilation),
|
||||
activation("activation", "conv_prim", activation_func::abs)
|
||||
);
|
||||
|
||||
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
|
||||
|
||||
execute(p);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_mean, ::testing::ValuesIn(std::vector<convolution_test_params>{
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 3, 3 }
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 4 }
|
||||
}));
|
||||
|
||||
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvFusingTest {};
|
||||
// 'reorder_fsv32' is not being fused in the current impl, since it has 'subtract'
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_subtract : public ConvReorderFusingTest {};
|
||||
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) {
|
||||
auto p = GetParam();
|
||||
const std::vector<float>& values_to_subtract = {
|
||||
@ -2603,7 +2631,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
|
||||
0.1f, 0.2f, 0.1f, 0.1f, 0.1f, 0.2f, 0.1f, 0.1f
|
||||
};
|
||||
|
||||
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
|
||||
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
|
||||
auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
|
||||
auto dw_stride = tensor{ 0, 0, 1, 1 };
|
||||
|
||||
@ -2613,12 +2641,11 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature)
|
||||
data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
|
||||
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs)
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation)
|
||||
);
|
||||
|
||||
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
|
||||
|
||||
execute(p);
|
||||
}
|
||||
@ -2626,12 +2653,12 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_subtr
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 4 }
|
||||
}));
|
||||
|
||||
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvFusingTest {};
|
||||
// 'reorder_fsv32' is not being fused in the current impl, since it has 'fused_activation'
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation : public ConvReorderFusingTest {};
|
||||
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activation) {
|
||||
auto p = GetParam();
|
||||
|
||||
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
|
||||
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
|
||||
auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
|
||||
auto dw_stride = tensor{ 0, 0, 1, 1 };
|
||||
|
||||
@ -2642,25 +2669,26 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat
|
||||
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32),
|
||||
activation("activation_quantize", "reorder_fsv32", activation_func::relu),
|
||||
convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs)
|
||||
convolution("conv_prim2", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_prim2", activation_func::abs)
|
||||
);
|
||||
|
||||
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } }));
|
||||
bo_fused.set_option(build_option::force_implementations({ { "activation", conv_impl } }));
|
||||
|
||||
execute(p);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, ::testing::ValuesIn(std::vector<convolution_test_params>{
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 4, 5 }
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 6 }
|
||||
}));
|
||||
|
||||
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvFusingTest {};
|
||||
// 'reorder_fsv32' is being fused even if it has 'padding'
|
||||
class conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding : public ConvReorderFusingTest {};
|
||||
TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
|
||||
auto p = GetParam();
|
||||
|
||||
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(3, 3));
|
||||
auto dw_tensor = cldnn::tensor(group(p.out_shape.feature[0]), batch(1), feature(1), spatial(2, 2));
|
||||
auto dw_weights_layout = layout{ p.default_type, format::goiyx, dw_tensor };
|
||||
auto dw_stride = tensor{ 0, 0, 1, 1 };
|
||||
|
||||
@ -2670,19 +2698,17 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) {
|
||||
data("weights_dw", get_mem(dw_weights_layout, -127, 127)),
|
||||
convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation),
|
||||
reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ { 0, 0, 1, 1 }, 0 })),
|
||||
convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
activation("activation", "conv_output", activation_func::abs),
|
||||
activation("activation2", "conv_prim", activation_func::abs),
|
||||
eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum)
|
||||
convolution("conv_prim2", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation),
|
||||
reorder("reorder_out", "conv_prim2", format::fs_b_yx_fsv32, data_types::f32)
|
||||
);
|
||||
|
||||
implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" };
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_output", conv_impl } }));
|
||||
bo_fused.set_option(build_option::force_implementations({ { "conv_prim2", conv_impl } }));
|
||||
|
||||
execute(p);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, ::testing::ValuesIn(std::vector<convolution_test_params>{
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 6 }
|
||||
convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 5 }
|
||||
}));
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
|
Loading…
Reference in New Issue
Block a user