[IE CLDNN] Prod mode support in eltwise fusings (#1491)

This commit is contained in:
Vladimir Paramuzov 2020-07-30 18:16:37 +03:00 committed by GitHub
parent 861bcc2949
commit 8f966887d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 113 additions and 53 deletions

View File

@ -100,7 +100,9 @@ struct eltwise_optional_params : optional_params {
// fuse_params
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
struct eltwise_fuse_params : fuse_params {
eltwise_fuse_params() : fuse_params(KernelType::ELTWISE) {}
EltwiseMode mode;
eltwise_fuse_params(EltwiseMode mode) : fuse_params(KernelType::ELTWISE), mode(mode) {}
};
struct scale_fuse_params : fuse_params {

View File

@ -1142,8 +1142,25 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
break;
}
case KernelType::ELTWISE: {
auto p = desc.GetOpParams<eltwise_fuse_params>();
if (!p)
throw std::runtime_error("[clDNN] Eltwise fuse params can't be nullptr");
std::string op = "";
switch (p->mode)
{
case kernel_selector::EltwiseMode::ADD:
op = "+";
break;
case kernel_selector::EltwiseMode::MUL:
op = "*";
break;
default:
throw std::runtime_error("[clDNN] Eltwise mode is not supported in fused ops codegen");
}
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_vars_converted[0] +
" + " + ConvertToOutputType(in_var, vec_size) + ";";
op + ConvertToOutputType(in_var, vec_size) + ";";
break;
}
case KernelType::QUANTIZE: {

View File

@ -26,53 +26,6 @@
namespace cldnn {
namespace gpu {
namespace {
inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) {
switch (mode) {
case eltwise_mode::sum:
return kernel_selector::eltwise_mode::ADD;
case eltwise_mode::sub:
return kernel_selector::eltwise_mode::SUB;
case eltwise_mode::max:
return kernel_selector::eltwise_mode::MAX;
case eltwise_mode::prod:
return kernel_selector::eltwise_mode::MUL;
case eltwise_mode::div:
return kernel_selector::eltwise_mode::DIV;
case eltwise_mode::min:
return kernel_selector::eltwise_mode::MIN;
case eltwise_mode::pow:
return kernel_selector::eltwise_mode::POW;
case eltwise_mode::mod:
return kernel_selector::eltwise_mode::MODULU;
case eltwise_mode::eq:
return kernel_selector::eltwise_mode::EQ;
case eltwise_mode::ne:
return kernel_selector::eltwise_mode::NE;
case eltwise_mode::lt:
return kernel_selector::eltwise_mode::LT;
case eltwise_mode::le:
return kernel_selector::eltwise_mode::LE;
case eltwise_mode::gt:
return kernel_selector::eltwise_mode::GT;
case eltwise_mode::ge:
return kernel_selector::eltwise_mode::GE;
case eltwise_mode::logic_and:
return kernel_selector::eltwise_mode::LOGIC_AND;
case eltwise_mode::logic_or:
return kernel_selector::eltwise_mode::LOGIC_OR;
case eltwise_mode::logic_xor:
return kernel_selector::eltwise_mode::LOGIC_XOR;
case eltwise_mode::squared_diff:
return kernel_selector::eltwise_mode::SQUARED_DIFF;
case eltwise_mode::floor_mod:
return kernel_selector::eltwise_mode::FLOOR_MOD;
default:
return kernel_selector::eltwise_mode::ADD;
}
}
} // namespace
struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
using parent = typed_primitive_gpu_impl<eltwise>;
using parent::parent;

View File

@ -518,8 +518,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
auto fuse_eltwise_f = [&](eltwise_node& node) {
std::shared_ptr<const cldnn::eltwise> prim = node.get_primitive();
const std::vector<eltwise_mode> supported_modes = {
eltwise_mode::sum,
eltwise_mode::prod
};
if (node.is_output() || node.inputs_count() != 2 ||
prim->mode != eltwise_mode::sum || !prim->stride.empty())
std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() ||
!prim->stride.empty())
return;
std::vector<cldnn::program_node*> parents = node.get_dependencies();

View File

@ -24,6 +24,52 @@
#include <string>
namespace cldnn {
inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode) {
switch (mode) {
case eltwise_mode::sum:
return kernel_selector::eltwise_mode::ADD;
case eltwise_mode::sub:
return kernel_selector::eltwise_mode::SUB;
case eltwise_mode::max:
return kernel_selector::eltwise_mode::MAX;
case eltwise_mode::prod:
return kernel_selector::eltwise_mode::MUL;
case eltwise_mode::div:
return kernel_selector::eltwise_mode::DIV;
case eltwise_mode::min:
return kernel_selector::eltwise_mode::MIN;
case eltwise_mode::pow:
return kernel_selector::eltwise_mode::POW;
case eltwise_mode::mod:
return kernel_selector::eltwise_mode::MODULU;
case eltwise_mode::eq:
return kernel_selector::eltwise_mode::EQ;
case eltwise_mode::ne:
return kernel_selector::eltwise_mode::NE;
case eltwise_mode::lt:
return kernel_selector::eltwise_mode::LT;
case eltwise_mode::le:
return kernel_selector::eltwise_mode::LE;
case eltwise_mode::gt:
return kernel_selector::eltwise_mode::GT;
case eltwise_mode::ge:
return kernel_selector::eltwise_mode::GE;
case eltwise_mode::logic_and:
return kernel_selector::eltwise_mode::LOGIC_AND;
case eltwise_mode::logic_or:
return kernel_selector::eltwise_mode::LOGIC_OR;
case eltwise_mode::logic_xor:
return kernel_selector::eltwise_mode::LOGIC_XOR;
case eltwise_mode::squared_diff:
return kernel_selector::eltwise_mode::SQUARED_DIFF;
case eltwise_mode::floor_mod:
return kernel_selector::eltwise_mode::FLOOR_MOD;
default:
return kernel_selector::eltwise_mode::ADD;
}
}
template <>
struct typed_program_node<eltwise> : public typed_program_node_base<eltwise> {
using parent = typed_program_node_base<eltwise>;
@ -38,7 +84,8 @@ public:
size_t inputs_count() const { return get_primitive()->input.size(); }
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
return std::make_shared<kernel_selector::eltwise_fuse_params>();
kernel_selector::eltwise_mode mode = convert_to_eltwise_mode(get_primitive()->mode);
return std::make_shared<kernel_selector::eltwise_fuse_params>(mode);
}
};

View File

@ -588,7 +588,7 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, conv_fp32_scale,
}), );
class conv_fp32_prelu_eltwise : public ConvFusingTest {};
TEST_P(conv_fp32_prelu_eltwise, basic) {
TEST_P(conv_fp32_prelu_eltwise, basic_sum) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
@ -605,7 +605,24 @@ TEST_P(conv_fp32_prelu_eltwise, basic) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) {
TEST_P(conv_fp32_prelu_eltwise, basic_prod) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_per_channel_layout(p))),
data("eltwise_data", get_mem(get_output_layout(p))),
convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_sum) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1};
create_topologies(input_layout("input", get_input_layout(p)),
@ -623,6 +640,24 @@ TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast) {
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, eltw_broadcast_prod) {
auto p = GetParam();
tensor eltw_shape = p.default_format.spatial_num() == 2 ? tensor{1, 1, 1, 1} : tensor{1, 1, 1, 1, 1};
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("slope_data", get_mem(get_per_channel_layout(p))),
data("eltwise_data", get_mem(layout{ p.data_type, p.input_format, eltw_shape })),
convolution("conv_prim", "input", {"weights"}, {"bias"}, p.groups, p.stride, p.pad, p.dilation),
activation("activation", "conv_prim", "slope_data", activation_func::relu_negative_slope),
eltwise("eltwise", "activation", "eltwise_data", eltwise_mode::prod),
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(conv_fp32_prelu_eltwise, vector_ops) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),