[IE CLDNN] Extend supported fusing cases for scale and eltwise (#1960)
This commit is contained in:
parent
d901bbfce3
commit
4519097e47
@ -25,7 +25,12 @@ public:
|
|||||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||||
return { FusedOpType::QUANTIZE };
|
return {
|
||||||
|
FusedOpType::QUANTIZE,
|
||||||
|
FusedOpType::ACTIVATION,
|
||||||
|
FusedOpType::SCALE,
|
||||||
|
FusedOpType::ELTWISE
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2019 Intel Corporation
|
// Copyright (c) 2019-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// y ou may not use this file except in compliance with the License.
|
// y ou may not use this file except in compliance with the License.
|
||||||
@ -70,9 +70,13 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con
|
|||||||
idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"};
|
idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"};
|
||||||
}
|
}
|
||||||
|
|
||||||
FusedOpsConfiguration tensor_coord = {"_TENSOR", idx_order, "res", input_dt, 1};
|
if (!params.layoutBased && !params.int8_quantization && !params.broadcast && CheckInputsOutputNoPitchSameDims(params)) {
|
||||||
FusedOpsConfiguration linear_coord = {"_LINEAR", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET};
|
FusedOpsConfiguration conf = {"", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET};
|
||||||
jit.Merge(MakeFusedOpsJitConstants(params, {tensor_coord, linear_coord}));
|
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||||
|
} else {
|
||||||
|
FusedOpsConfiguration conf = {"", idx_order, "res", input_dt, 1};
|
||||||
|
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return jit;
|
return jit;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2016 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -26,7 +26,12 @@ public:
|
|||||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||||
return { FusedOpType::QUANTIZE };
|
return {
|
||||||
|
FusedOpType::QUANTIZE,
|
||||||
|
FusedOpType::ACTIVATION,
|
||||||
|
FusedOpType::SCALE,
|
||||||
|
FusedOpType::ELTWISE
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
JitConstants GetJitConstants(const eltwise_params& params) const;
|
JitConstants GetJitConstants(const eltwise_params& params) const;
|
||||||
@ -34,4 +39,4 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
bool Validate(const Params& p, const optional_params& o) const override;
|
bool Validate(const Params& p, const optional_params& o) const override;
|
||||||
};
|
};
|
||||||
} // namespace kernel_selector
|
} // namespace kernel_selector
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2016-2017 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -15,18 +15,17 @@
|
|||||||
|
|
||||||
#include "include/include_all.cl"
|
#include "include/include_all.cl"
|
||||||
|
|
||||||
KERNEL(eltwise_gpu_vload8)(
|
KERNEL(eltwise_gpu_vload8)(INPUTS_DECLS
|
||||||
INPUTS_DECLS
|
__global OUTPUT_TYPE* output)
|
||||||
__global UNIT_TYPE* output)
|
|
||||||
{
|
{
|
||||||
const uint global_id = get_global_id(0);
|
const uint global_id = get_global_id(0);
|
||||||
|
|
||||||
VLOAD_DECLS
|
VLOAD_DECLS
|
||||||
|
|
||||||
MAKE_VECTOR_TYPE(UNIT_TYPE, 8) res;
|
MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8) res;
|
||||||
|
|
||||||
DO_ELTWISE
|
DO_ELTWISE
|
||||||
|
|
||||||
res = ACTIVATION(res, ACTIVATION_PARAMS);
|
res = ACTIVATION(res, ACTIVATION_PARAMS);
|
||||||
|
|
||||||
vstore8(res, global_id, output);
|
vstore8(res, global_id, output);
|
||||||
|
@ -57,10 +57,10 @@ KERNEL(eltwise)(
|
|||||||
#else
|
#else
|
||||||
const uint d1 = get_global_id(0);
|
const uint d1 = get_global_id(0);
|
||||||
const uint d2 = (uint)get_global_id(1) % OUTPUT_SIZES[1];
|
const uint d2 = (uint)get_global_id(1) % OUTPUT_SIZES[1];
|
||||||
const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZE[2];
|
const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZES[2];
|
||||||
const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZE[2];
|
const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZES[2];
|
||||||
const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[3];
|
const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[4];
|
||||||
const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[3];
|
const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[4];
|
||||||
|
|
||||||
uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
|
uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
|
||||||
#endif
|
#endif
|
||||||
@ -117,13 +117,8 @@ KERNEL(eltwise)(
|
|||||||
DO_ELTWISE;
|
DO_ELTWISE;
|
||||||
|
|
||||||
#if HAS_FUSED_OPS
|
#if HAS_FUSED_OPS
|
||||||
#if ELTWISE_NO_PITCH_SAME_DIMS
|
FUSED_OPS;
|
||||||
FUSED_OPS_LINEAR;
|
OUTPUT_TYPE out = FUSED_OPS_RESULT;
|
||||||
OUTPUT_TYPE out = FUSED_OPS_RESULT_LINEAR;
|
|
||||||
#else
|
|
||||||
FUSED_OPS_TENSOR;
|
|
||||||
OUTPUT_TYPE out = FUSED_OPS_RESULT_TENSOR;
|
|
||||||
#endif
|
|
||||||
#else
|
#else
|
||||||
#define out res
|
#define out res
|
||||||
#endif
|
#endif
|
||||||
|
@ -1395,6 +1395,7 @@ bool FusedOpsCodeGenerator::CanPreloadData(const FusedOpsConfiguration& conf) co
|
|||||||
switch (d) {
|
switch (d) {
|
||||||
case Tensor::DataChannelName::BATCH: can_preload &= idx.b == "0"; break;
|
case Tensor::DataChannelName::BATCH: can_preload &= idx.b == "0"; break;
|
||||||
case Tensor::DataChannelName::FEATURE: can_preload &= idx.f == "0"; break;
|
case Tensor::DataChannelName::FEATURE: can_preload &= idx.f == "0"; break;
|
||||||
|
case Tensor::DataChannelName::W: can_preload &= idx.w == "0"; break;
|
||||||
case Tensor::DataChannelName::Z: can_preload &= idx.z == "0"; break;
|
case Tensor::DataChannelName::Z: can_preload &= idx.z == "0"; break;
|
||||||
case Tensor::DataChannelName::Y: can_preload &= idx.y == "0"; break;
|
case Tensor::DataChannelName::Y: can_preload &= idx.y == "0"; break;
|
||||||
case Tensor::DataChannelName::X: can_preload &= idx.x == "0"; break;
|
case Tensor::DataChannelName::X: can_preload &= idx.x == "0"; break;
|
||||||
@ -1619,7 +1620,7 @@ JitConstants FusedOpsCodeGenerator::MakeOpJitConstants(const FusedOpsConfigurati
|
|||||||
case KernelType::ACTIVATION: {
|
case KernelType::ACTIVATION: {
|
||||||
auto p = desc.GetOpParams<activation_fuse_params>();
|
auto p = desc.GetOpParams<activation_fuse_params>();
|
||||||
base_activation_params activation_p = p->param;
|
base_activation_params activation_p = p->param;
|
||||||
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + in_var + ";";
|
op_decls += "\\\n\t" + GetOutputType(vec_size) + " " + out_var + " = " + ConvertToOutputType(in_var, vec_size) + ";";
|
||||||
if (activation_p.function != ActivationFunction::NONE) {
|
if (activation_p.function != ActivationFunction::NONE) {
|
||||||
auto suffix = "_FUSED_OP"+std::to_string(desc.op_id) + conf.suffix;
|
auto suffix = "_FUSED_OP"+std::to_string(desc.op_id) + conf.suffix;
|
||||||
std::string nl_m = std::to_string(activation_p.m);
|
std::string nl_m = std::to_string(activation_p.m);
|
||||||
@ -1678,6 +1679,8 @@ std::string FusedOpsCodeGenerator::GetIdx(size_t input_id, idx_desc idx, bool sh
|
|||||||
idx_order = idx.b + "," + idx.f + "," + idx.y + "," + idx.x;
|
idx_order = idx.b + "," + idx.f + "," + idx.y + "," + idx.x;
|
||||||
} else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 5) {
|
} else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 5) {
|
||||||
idx_order = idx.b + "," + idx.f + "," + idx.z + "," + idx.y + "," + idx.x;
|
idx_order = idx.b + "," + idx.f + "," + idx.z + "," + idx.y + "," + idx.x;
|
||||||
|
} else if (DataTensor::ChannelsCount(desc.tensors[input_id].GetLayout()) == 6) {
|
||||||
|
idx_order = idx.b + "," + idx.f + "," + idx.w + "," + idx.z + "," + idx.y + "," + idx.x;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (should_be_safe) {
|
if (should_be_safe) {
|
||||||
|
@ -308,12 +308,13 @@ public:
|
|||||||
struct idx_desc {
|
struct idx_desc {
|
||||||
std::string b;
|
std::string b;
|
||||||
std::string f;
|
std::string f;
|
||||||
|
std::string w;
|
||||||
std::string z;
|
std::string z;
|
||||||
std::string y;
|
std::string y;
|
||||||
std::string x;
|
std::string x;
|
||||||
size_t dims;
|
size_t dims;
|
||||||
explicit idx_desc(std::vector<std::string> idx, DataTensor t)
|
explicit idx_desc(std::vector<std::string> idx, DataTensor t)
|
||||||
: b("0"), f("0"), z("0"), y("0"), x("0"), dims(0) {
|
: b("0"), f("0"), w("0"), z("0"), y("0"), x("0"), dims(0) {
|
||||||
dims = idx.size();
|
dims = idx.size();
|
||||||
switch (dims) {
|
switch (dims) {
|
||||||
case 1: f = idx[0]; break;
|
case 1: f = idx[0]; break;
|
||||||
@ -321,7 +322,8 @@ public:
|
|||||||
case 3: b = idx[0]; f = idx[1]; y = idx[2]; break;
|
case 3: b = idx[0]; f = idx[1]; y = idx[2]; break;
|
||||||
case 4: b = idx[0]; f = idx[1]; y = idx[2]; x = idx[3]; break;
|
case 4: b = idx[0]; f = idx[1]; y = idx[2]; x = idx[3]; break;
|
||||||
case 5: b = idx[0]; f = idx[1]; z = idx[2]; y = idx[3]; x = idx[4]; break;
|
case 5: b = idx[0]; f = idx[1]; z = idx[2]; y = idx[3]; x = idx[4]; break;
|
||||||
default: throw std::runtime_error("More than 5 dimenstions is not supported in fused op generator");
|
case 6: b = idx[0]; f = idx[1]; w = idx[2]; z = idx[3]; y = idx[4]; x = idx[5]; break;
|
||||||
|
default: throw std::runtime_error("More than 6 dimenstions is not supported in fused op generator");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (t.Batch().v == 1) {
|
if (t.Batch().v == 1) {
|
||||||
@ -330,6 +332,9 @@ public:
|
|||||||
if (t.Feature().v == 1) {
|
if (t.Feature().v == 1) {
|
||||||
f = "0";
|
f = "0";
|
||||||
}
|
}
|
||||||
|
if (t.W().v == 1) {
|
||||||
|
w = "0";
|
||||||
|
}
|
||||||
if (t.Z().v == 1) {
|
if (t.Z().v == 1) {
|
||||||
z = "0";
|
z = "0";
|
||||||
}
|
}
|
||||||
|
@ -171,6 +171,7 @@ attach_eltwise_gpu::attach_eltwise_gpu() {
|
|||||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
|
|
||||||
|
@ -423,6 +423,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
|||||||
|
|
||||||
should_fuse |= input_data.is_type<reduce>() && reduce_supports_fusings(input_data.as<reduce>());
|
should_fuse |= input_data.is_type<reduce>() && reduce_supports_fusings(input_data.as<reduce>());
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<scale>();
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<eltwise>();
|
||||||
|
|
||||||
if (!should_fuse)
|
if (!should_fuse)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -478,6 +482,10 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
|||||||
|
|
||||||
should_fuse |= input_data.is_type<reduce>() && reduce_supports_fusings(input_data.as<reduce>());
|
should_fuse |= input_data.is_type<reduce>() && reduce_supports_fusings(input_data.as<reduce>());
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<scale>();
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<eltwise>();
|
||||||
|
|
||||||
if (!should_fuse)
|
if (!should_fuse)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -562,12 +570,14 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
|||||||
|
|
||||||
should_fuse |= input_data.is_type<space_to_batch>() && quantize_node.get_scale_shift_opt();
|
should_fuse |= input_data.is_type<space_to_batch>() && quantize_node.get_scale_shift_opt();
|
||||||
|
|
||||||
should_fuse |= input_data.is_type<eltwise>() && quantize_node.get_scale_shift_opt();
|
|
||||||
|
|
||||||
should_fuse |= input_data.is_type<reduce>() &&
|
should_fuse |= input_data.is_type<reduce>() &&
|
||||||
reduce_supports_fusings(input_data.as<reduce>())
|
reduce_supports_fusings(input_data.as<reduce>())
|
||||||
&& quantize_node.get_scale_shift_opt();
|
&& quantize_node.get_scale_shift_opt();
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<eltwise>() && quantize_node.get_scale_shift_opt();
|
||||||
|
|
||||||
|
should_fuse |= input_data.is_type<scale>() && quantize_node.get_scale_shift_opt();
|
||||||
|
|
||||||
if (!should_fuse)
|
if (!should_fuse)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -589,31 +599,26 @@ void prepare_primitive_fusing::fuse_simple_primitives(program_impl &p) {
|
|||||||
std::vector<cldnn::program_node*> parents = node.get_dependencies();
|
std::vector<cldnn::program_node*> parents = node.get_dependencies();
|
||||||
std::list<cldnn::program_node*> users = node.get_users();
|
std::list<cldnn::program_node*> users = node.get_users();
|
||||||
|
|
||||||
|
std::vector<bool> can_fuse_parents = { false, false };
|
||||||
|
|
||||||
|
for (size_t i = 0; i < parents.size(); i++) {
|
||||||
|
can_fuse_parents[i] = (parents[i]->is_type<convolution>() && conv_supports_fusings(parents[i]->as<convolution>())) ||
|
||||||
|
(parents[i]->is_type<mvn>() && mvn_supports_fusings(parents[i]->as<mvn>())) ||
|
||||||
|
(parents[i]->is_type<deconvolution>()) ||
|
||||||
|
(parents[i]->is_type<permute>()) ||
|
||||||
|
(parents[i]->is_type<space_to_depth>()) ||
|
||||||
|
(parents[i]->is_type<gemm>() && gemm_supports_fusings(parents[i]->as<gemm>())) ||
|
||||||
|
(parents[i]->is_type<batch_to_space>()) ||
|
||||||
|
(parents[i]->is_type<space_to_batch>()) ||
|
||||||
|
(parents[i]->is_type<eltwise>()) ||
|
||||||
|
(parents[i]->is_type<scale>()) ||
|
||||||
|
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
||||||
|
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>()));
|
||||||
|
}
|
||||||
|
|
||||||
auto parent1 = parents[0];
|
auto parent1 = parents[0];
|
||||||
auto parent2 = parents[1];
|
auto parent2 = parents[1];
|
||||||
|
|
||||||
bool can_fuse_parent1 = (parent1->is_type<convolution>() && conv_supports_fusings(parent1->as<convolution>())) ||
|
|
||||||
(parent1->is_type<mvn>() && mvn_supports_fusings(parent1->as<mvn>())) ||
|
|
||||||
(parent1->is_type<deconvolution>()) || (parent1->is_type<permute>()) ||
|
|
||||||
(parent1->is_type<space_to_depth>()) ||
|
|
||||||
(parent1->is_type<gemm>() && gemm_supports_fusings(parent1->as<gemm>())) ||
|
|
||||||
(parent1->is_type<batch_to_space>()) || (parent1->is_type<space_to_batch>()) ||
|
|
||||||
(parent1->is_type<depth_to_space>() && dts_supports_fusings(parent1->as<depth_to_space>())) ||
|
|
||||||
(parent1->is_type<batch_to_space>()) || (parent1->is_type<space_to_batch>()) ||
|
|
||||||
(parent1->is_type<reduce>() && reduce_supports_fusings(parent1->as<reduce>()));
|
|
||||||
|
|
||||||
bool can_fuse_parent2 = (parent2->is_type<convolution>() && conv_supports_fusings(parent2->as<convolution>())) ||
|
|
||||||
(parent2->is_type<mvn>() && mvn_supports_fusings(parent2->as<mvn>())) ||
|
|
||||||
(parent2->is_type<deconvolution>()) || (parent2->is_type<permute>()) ||
|
|
||||||
(parent2->is_type<space_to_depth>()) ||
|
|
||||||
(parent2->is_type<gemm>() && gemm_supports_fusings(parent2->as<gemm>())) ||
|
|
||||||
(parent2->is_type<batch_to_space>()) || (parent2->is_type<space_to_batch>()) ||
|
|
||||||
(parent2->is_type<depth_to_space>() && dts_supports_fusings(parent2->as<depth_to_space>())) ||
|
|
||||||
(parent2->is_type<batch_to_space>()) || (parent2->is_type<space_to_batch>()) ||
|
|
||||||
(parent2->is_type<reduce>() && reduce_supports_fusings(parent2->as<reduce>()));
|
|
||||||
|
|
||||||
std::vector<bool> can_fuse_parents = { can_fuse_parent1, can_fuse_parent2 };
|
|
||||||
|
|
||||||
auto p1_raw_size = parent1->get_output_layout().size.sizes();
|
auto p1_raw_size = parent1->get_output_layout().size.sizes();
|
||||||
auto p2_raw_size = parent2->get_output_layout().size.sizes();
|
auto p2_raw_size = parent2->get_output_layout().size.sizes();
|
||||||
for (unsigned k = 0; k < p1_raw_size.size(); k++) {
|
for (unsigned k = 0; k < p1_raw_size.size(); k++) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2016 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -39,7 +39,7 @@ public:
|
|||||||
program_node& scale_in() const { return get_dependency(1); }
|
program_node& scale_in() const { return get_dependency(1); }
|
||||||
program_node& bias() const { return get_dependency(2); }
|
program_node& bias() const { return get_dependency(2); }
|
||||||
|
|
||||||
bool bias_term() const { return get_dependencies().size() > 2; }
|
bool bias_term() const { return get_primitive()->bias.length() != 0; }
|
||||||
|
|
||||||
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
|
std::shared_ptr<kernel_selector::fuse_params> get_fuse_params() const override {
|
||||||
return std::make_shared<kernel_selector::scale_fuse_params>();
|
return std::make_shared<kernel_selector::scale_fuse_params>();
|
||||||
@ -62,7 +62,7 @@ public:
|
|||||||
memory_impl& scale_memory() const { return dep_memory(1); }
|
memory_impl& scale_memory() const { return dep_memory(1); }
|
||||||
memory_impl& bias_memory() const { return dep_memory(2); }
|
memory_impl& bias_memory() const { return dep_memory(2); }
|
||||||
|
|
||||||
bool bias_term() const { return _deps.size() > 2; }
|
bool bias_term() const { return _node.as<scale>().bias_term(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
using scale_inst = typed_primitive_inst<scale>;
|
using scale_inst = typed_primitive_inst<scale>;
|
||||||
|
@ -49,6 +49,10 @@ layout scale_inst::calc_output_layout(scale_node const& node) {
|
|||||||
if (desc->output_data_type)
|
if (desc->output_data_type)
|
||||||
result.data_type = *desc->output_data_type;
|
result.data_type = *desc->output_data_type;
|
||||||
|
|
||||||
|
if (node.has_fused_primitives()) {
|
||||||
|
result.data_type = node.get_fused_output_layout().data_type;
|
||||||
|
}
|
||||||
|
|
||||||
if (scale_x_size != 1) {
|
if (scale_x_size != 1) {
|
||||||
CLDNN_ERROR_NOT_EQUAL(node.id(), "Scale x size", scale_x_size, "input x size", input_x_size, "");
|
CLDNN_ERROR_NOT_EQUAL(node.id(), "Scale x size", scale_x_size, "input x size", input_x_size, "");
|
||||||
}
|
}
|
||||||
|
@ -4314,23 +4314,23 @@ TEST_P(deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8, basic) {
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8,
|
INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_quant_i8,
|
||||||
::testing::ValuesIn(std::vector<deconv_test_params>{
|
::testing::ValuesIn(std::vector<deconv_test_params>{
|
||||||
deconv_test_params{ CASE_DECONV_FP32_1, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_1, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_2, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_2, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_4, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_4, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_5, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_5, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_6, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_6, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_7, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_7, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_8, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_8, 4, 9 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_FP16_1, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_1, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_2, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_2, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_4, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_4, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_5, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_5, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_6, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_6, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_7, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_7, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_8, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_8, 4, 9 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 },
|
deconv_test_params{ CASE_DECONV_U8S8_1, 2, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 },
|
deconv_test_params{ CASE_DECONV_U8S8_2, 2, 9 },
|
||||||
@ -4350,24 +4350,24 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv_
|
|||||||
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
|
deconv_test_params{ CASE_DECONV_S8S8_7, 2, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
|
deconv_test_params{ CASE_DECONV_S8S8_8, 2, 9 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_1, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_1, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_2, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_2, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_3, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_3, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_4, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_4, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_5, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_5, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_6, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_6, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_7, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_7, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP32_3D_8, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP32_3D_8, 4, 9 },
|
||||||
// deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 },
|
// deconv_test_params{ CASE_DECONV_FP32_3D_9, 6, 9 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_1, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_1, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_2, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_2, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_3, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_3, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_4, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_4, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_5, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_5, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_6, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_6, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_7, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_7, 4, 9 },
|
||||||
deconv_test_params{ CASE_DECONV_FP16_3D_8, 6, 9 },
|
deconv_test_params{ CASE_DECONV_FP16_3D_8, 4, 9 },
|
||||||
// deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 },
|
// deconv_test_params{ CASE_DECONV_FP16_3D_9, 6, 9 },
|
||||||
|
|
||||||
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 },
|
deconv_test_params{ CASE_DECONV_U8S8_3D_1, 2, 9 },
|
||||||
@ -6224,36 +6224,37 @@ struct eltwise_test_params {
|
|||||||
size_t expected_not_fused_primitives;
|
size_t expected_not_fused_primitives;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_1 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_2 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_3 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_4 {2, 16, 4, 4}, data_types::f32, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP16_1 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP16_2 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP16_3 {2, 16, 4, 4}, data_types::f16, data_types::f16, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_1 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_2 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_3 {2, 16, 4, 4}, data_types::i8, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_1 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_2 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_3 {2, 16, 4, 4}, data_types::u8, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_FP16_1 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_FP16_2 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP32_FP16_3 {2, 16, 4, 4}, data_types::f32, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP16_FP32_1 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP16_FP32_2 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_FP16_FP32_3 {2, 16, 4, 4}, data_types::f16, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_FP16_1 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_FP16_2 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_FP16_3 {2, 16, 4, 4}, data_types::i8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_FP32_1 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_FP32_2 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_I8_FP32_3 {2, 16, 4, 4}, data_types::i8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_FP16_1 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfyx, data_types::f32, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_FP16_2 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::bfzyx, data_types::f32, format::bfzyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_FP16_3 {2, 16, 4, 4}, data_types::u8, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_FP32_1 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::sum
|
||||||
#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum, 3, 4
|
#define CASE_ELTWISE_U8_FP32_2 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::bfzyx, data_types::f16, format::bfzyx, eltwise_mode::sum
|
||||||
|
#define CASE_ELTWISE_U8_FP32_3 {2, 16, 4, 4}, data_types::u8, data_types::f32, format::b_fs_yx_fsv16, data_types::f16, format::b_fs_yx_fsv16, eltwise_mode::sum
|
||||||
|
|
||||||
|
|
||||||
class EltwiseFusingTest : public ::BaseFusingTest<eltwise_test_params> {
|
class EltwiseFusingTest : public ::BaseFusingTest<eltwise_test_params> {
|
||||||
@ -6317,30 +6318,234 @@ TEST_P(eltwise_quantize, i8_per_channel) {
|
|||||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||||
eltwise_quantize,
|
eltwise_quantize,
|
||||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3},
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_I8_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_I8_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_I8_FP32_3},
|
eltwise_test_params{CASE_ELTWISE_I8_FP32_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_U8_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_U8_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_U8_FP32_3},
|
eltwise_test_params{CASE_ELTWISE_U8_FP32_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_I8_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_I8_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_I8_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_I8_FP16_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_U8_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_U8_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 4},
|
||||||
|
}), );
|
||||||
|
|
||||||
|
class eltwise_fp32_fused_prims : public EltwiseFusingTest {};
|
||||||
|
TEST_P(eltwise_fp32_fused_prims, scale_activation) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
input_layout("input2", get_input_layout2(p)),
|
||||||
|
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
|
||||||
|
scale("scale", "eltwise", "scale_data"),
|
||||||
|
activation("activation", "scale", activation_func::abs),
|
||||||
|
reorder("out", "activation", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(eltwise_fp32_fused_prims, eltwise_activation) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
input_layout("input2", get_input_layout2(p)),
|
||||||
|
data("eltwise_data", get_mem(get_input_layout2(p), -10, 10)),
|
||||||
|
eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type),
|
||||||
|
eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||||
|
activation("activation", "eltwise2", activation_func::abs),
|
||||||
|
reorder("out", "activation", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(eltwise_fp32_fused_prims, eltwise_activation_with_broadcast) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
input_layout("input2", get_input_layout2(p)),
|
||||||
|
data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
eltwise("eltwise1", {"input", "input2"}, p.mode, p.default_type),
|
||||||
|
eltwise("eltwise2", {"eltwise1", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||||
|
activation("activation", "eltwise2", activation_func::abs),
|
||||||
|
reorder("out", "activation", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||||
|
eltwise_fp32_fused_prims,
|
||||||
|
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_I8_FP32_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_I8_FP32_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_I8_FP32_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_U8_FP32_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_U8_FP32_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_U8_FP32_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_I8_FP16_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_I8_FP16_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_I8_FP16_3, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_U8_FP16_1, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_U8_FP16_2, 3, 5},
|
||||||
|
eltwise_test_params{CASE_ELTWISE_U8_FP16_3, 3, 5},
|
||||||
|
}), );
|
||||||
|
|
||||||
|
class eltwise_fp32_scale : public EltwiseFusingTest {};
|
||||||
|
TEST_P(eltwise_fp32_scale, 6d) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
input_layout("input2", get_input_layout2(p)),
|
||||||
|
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
|
||||||
|
scale("scale", "eltwise", "scale_data"),
|
||||||
|
reorder("out", "scale", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||||
|
eltwise_fp32_scale,
|
||||||
|
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||||
|
eltwise_test_params{CASE_ELTWISE_FP32_4, 3, 4},
|
||||||
|
}), );
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------------------------------------- */
|
||||||
|
/* ---------------------------------------- Scale cases ------------------------------------------------ */
|
||||||
|
/* ----------------------------------------------------------------------------------------------------- */
|
||||||
|
struct scale_test_params {
|
||||||
|
tensor input_size;
|
||||||
|
data_types input_type;
|
||||||
|
format input_format;
|
||||||
|
data_types default_type;
|
||||||
|
format default_format;
|
||||||
|
size_t expected_fused_primitives;
|
||||||
|
size_t expected_not_fused_primitives;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Scale uses the same kernel as eltwise primitive, so the kernel is well covered by the eltwise tests above
|
||||||
|
// So here we can just check that fused scale kernel is constructed correctly (inputs are set correctly, fused precision is propagated, etc)
|
||||||
|
// and fusing conditions in the graph are correct
|
||||||
|
#define CASE_SCALE_FP32_1 {2, 16, 4, 4}, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||||
|
#define CASE_SCALE_FP32_2 {2, 16, 4, 4}, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||||
|
#define CASE_SCALE_FP32_3 {2, 16, 4, 4}, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::b_fs_yx_fsv16
|
||||||
|
|
||||||
|
class ScaleFusingTest : public ::BaseFusingTest<scale_test_params> {
|
||||||
|
public:
|
||||||
|
void execute(scale_test_params& p) {
|
||||||
|
auto input_prim = get_mem(get_input_layout(p));
|
||||||
|
|
||||||
|
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
|
||||||
|
network network_fused(this->engine, this->topology_fused, bo_fused);
|
||||||
|
|
||||||
|
network_fused.set_input_data("input", input_prim);
|
||||||
|
network_not_fused.set_input_data("input", input_prim);
|
||||||
|
|
||||||
|
compare(network_not_fused, network_fused, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
layout get_input_layout(scale_test_params& p) { return layout{p.input_type, p.input_format, p.input_size}; }
|
||||||
|
|
||||||
|
layout get_per_channel_layout(scale_test_params& p) {
|
||||||
|
return layout{p.default_type, p.default_format, tensor{1, p.input_size.feature[0], 1, 1}};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class scale_basic : public ScaleFusingTest {};
|
||||||
|
TEST_P(scale_basic, no_bias_act_eltwise) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
scale("scale", "input", "scale_data"),
|
||||||
|
activation("activation", "scale", activation_func::negative),
|
||||||
|
data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
eltwise("eltwise", {"activation", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||||
|
reorder("out", "eltwise", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(scale_basic, bias_act_eltwise) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
scale("scale", "input", "scale_data", "bias_data"),
|
||||||
|
activation("activation", "scale", activation_func::negative),
|
||||||
|
data("eltwise_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
eltwise("eltwise", {"activation", "eltwise_data"}, eltwise_mode::prod, p.default_type),
|
||||||
|
reorder("out", "eltwise", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(scale_basic, bias_act_scale) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
scale("scale", "input", "scale_data", "bias_data"),
|
||||||
|
activation("activation", "scale", activation_func::negative),
|
||||||
|
data("scale_data2", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
scale("scale2", "activation", "scale_data2"),
|
||||||
|
reorder("out", "scale2", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1e-5f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(scale_basic, bias_act_quantize) {
|
||||||
|
auto p = GetParam();
|
||||||
|
create_topologies(input_layout("input", get_input_layout(p)),
|
||||||
|
data("scale_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
data("bias_data", get_mem(get_per_channel_layout(p), -10, 10)),
|
||||||
|
scale("scale", "input", "scale_data", "bias_data"),
|
||||||
|
activation("activation", "scale", activation_func::negative),
|
||||||
|
data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
|
||||||
|
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||||
|
data("out_lo", get_mem(get_single_element_layout(p), -128)),
|
||||||
|
data("out_hi", get_mem(get_single_element_layout(p), 127)),
|
||||||
|
quantize("quantize", "activation", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::i8),
|
||||||
|
reorder("out", "quantize", p.default_format, data_types::f32));
|
||||||
|
|
||||||
|
tolerance = 1.f;
|
||||||
|
execute(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||||
|
scale_basic,
|
||||||
|
::testing::ValuesIn(std::vector<scale_test_params>{
|
||||||
|
scale_test_params{CASE_SCALE_FP32_1, 2, 4},
|
||||||
|
scale_test_params{CASE_SCALE_FP32_2, 2, 4},
|
||||||
|
scale_test_params{CASE_SCALE_FP32_3, 2, 4},
|
||||||
}), );
|
}), );
|
||||||
|
|
||||||
class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {};
|
class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {};
|
||||||
@ -6363,12 +6568,12 @@ TEST_P(eltwise_no_pitches_same_dims_quantize, quantize_f32_output) {
|
|||||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||||
eltwise_no_pitches_same_dims_quantize,
|
eltwise_no_pitches_same_dims_quantize,
|
||||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4},
|
||||||
}), );
|
}), );
|
||||||
|
|
||||||
class eltwise_activation : public EltwiseFusingTest {};
|
class eltwise_activation : public EltwiseFusingTest {};
|
||||||
@ -6399,18 +6604,18 @@ TEST_P(eltwise_activation, fp16_out) {
|
|||||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||||
eltwise_activation,
|
eltwise_activation,
|
||||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_FP16_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
eltwise_test_params{CASE_ELTWISE_FP32_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1},
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2},
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3},
|
eltwise_test_params{CASE_ELTWISE_FP32_FP16_3, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1},
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_1, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2},
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_2, 3, 4},
|
||||||
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3}
|
eltwise_test_params{CASE_ELTWISE_FP16_FP32_3, 3, 4}
|
||||||
}), );
|
}), );
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------------------------------------- */
|
||||||
|
Loading…
Reference in New Issue
Block a user