This reverts commit 3334e8933c
.
This commit is contained in:
parent
a230eebcc1
commit
dc7efafd7c
@ -976,8 +976,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
(parents[i]->is_type<depth_to_space>() && dts_supports_fusings(parents[i]->as<depth_to_space>())) ||
|
||||
(parents[i]->is_type<gather>()) ||
|
||||
(parents[i]->is_type<reduce>() && reduce_supports_fusings(parents[i]->as<reduce>())) ||
|
||||
(parents[i]->is_type<lrn>()) ||
|
||||
(parents[i]->is_type<activation>());
|
||||
(parents[i]->is_type<lrn>());
|
||||
}
|
||||
|
||||
// Disable fusion to a node on constant path when second input is in data flow
|
||||
|
@ -28,8 +28,8 @@ ActivationKernelBase::DispatchData ActivationKernelBase::SetDefault(const activa
|
||||
dispatchData.gws = {out.X().v * out.Y().v, Align(out.Feature().v, 16), Align(out.Batch().v, 16)};
|
||||
dispatchData.lws = {1, 16, 16};
|
||||
} else {
|
||||
dispatchData.gws = {out.X().v * out.W().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{Tensor::DataChannelName::X, Tensor::DataChannelName::W},
|
||||
dispatchData.gws = {out.X().v, out.Y().v * out.Z().v, out.Feature().v * out.Batch().v};
|
||||
std::vector<std::vector<Tensor::DataChannelName>> dims_by_gws = {{Tensor::DataChannelName::X},
|
||||
{Tensor::DataChannelName::Y, Tensor::DataChannelName::Z},
|
||||
{Tensor::DataChannelName::FEATURE, Tensor::DataChannelName::BATCH}};
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, arg.engineInfo, in_layout, out_layout, dims_by_gws);
|
||||
|
@ -85,8 +85,7 @@ bool ActivationKernelOpt::Validate(const Params& p, const optional_params& o) co
|
||||
return false;
|
||||
|
||||
if (!params.fused_ops.empty() &&
|
||||
(params.outputs[0].GetLayout() != DataLayout::bfyx && params.outputs[0].GetLayout() != DataLayout::bfzyx &&
|
||||
params.outputs[0].GetLayout() != DataLayout::bfwzyx))
|
||||
(params.outputs[0].GetLayout() != DataLayout::bfyx && params.outputs[0].GetLayout() != DataLayout::bfzyx))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -110,20 +109,11 @@ JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& param
|
||||
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"x % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_FEATURE_NUM)",
|
||||
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"x % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 6) {
|
||||
idx_order = {"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W * OUTPUT_FEATURE_NUM)",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W) % OUTPUT_FEATURE_NUM",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_SIZE_W",
|
||||
"x / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"x / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"x % OUTPUT_SIZE_X"};
|
||||
} else {
|
||||
IE_THROW() << "Unknown dimension";
|
||||
}
|
||||
} else {
|
||||
if (params.inputs[0].GetDims().size() <= 4) {
|
||||
@ -132,20 +122,11 @@ JitConstants ActivationKernelOpt::GetJitConstants(const activation_params& param
|
||||
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"(x + i) % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_FEATURE_NUM)",
|
||||
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z* OUTPUT_FEATURE_NUM)",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_FEATURE_NUM",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"(x + i) % OUTPUT_SIZE_X"};
|
||||
} else if (params.inputs[0].GetDims().size() == 6) {
|
||||
idx_order = {"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W * OUTPUT_FEATURE_NUM)",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W) % OUTPUT_FEATURE_NUM",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) % OUTPUT_SIZE_W",
|
||||
"(x + i) / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) % OUTPUT_SIZE_Z",
|
||||
"(x + i) / OUTPUT_SIZE_X % OUTPUT_SIZE_Y",
|
||||
"(x + i) % OUTPUT_SIZE_X"};
|
||||
} else {
|
||||
IE_THROW() << "Unknown dimension";
|
||||
}
|
||||
}
|
||||
FusedOpsConfiguration conf_vector = {"_VECTOR",
|
||||
|
@ -26,7 +26,7 @@ protected:
|
||||
JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return {FusedOpType::QUANTIZE,
|
||||
FusedOpType::ELTWISE,
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ACTIVATION};
|
||||
}
|
||||
};
|
||||
|
@ -40,10 +40,6 @@ JitConstants ActivationKernelRef::GetJitConstants(const activation_params& param
|
||||
idx_order = {"batch", "feature", "y", "x"};
|
||||
} else if (params.inputs[0].GetDims().size() == 5) {
|
||||
idx_order = {"batch", "feature", "z", "y", "x"};
|
||||
} else if (params.inputs[0].GetDims().size() == 6) {
|
||||
idx_order = {"batch", "feature", "w", "z", "y", "x"};
|
||||
} else {
|
||||
IE_THROW() << "unknown dimension";
|
||||
}
|
||||
FusedOpsConfiguration conf = {"", idx_order, "dst", input_dt, 1};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
|
@ -23,7 +23,7 @@ public:
|
||||
JitConstants GetJitConstants(const activation_params& params, DispatchData dispatchData) const override;
|
||||
std::vector<FusedOpType> GetSupportedFusedOps() const override {
|
||||
return {FusedOpType::QUANTIZE,
|
||||
FusedOpType::ELTWISE,
|
||||
FusedOpType::SCALE,
|
||||
FusedOpType::ACTIVATION};
|
||||
}
|
||||
|
||||
|
@ -23,27 +23,13 @@ KERNEL(activation)(
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#if OUTPUT_DIMS == 6
|
||||
#define ORDER batch,feature,w,z,y,x
|
||||
#elif OUTPUT_DIMS == 5
|
||||
#if OUTPUT_DIMS == 5
|
||||
#define ORDER batch,feature,z,y,x
|
||||
#elif OUTPUT_DIMS == 4
|
||||
#define ORDER batch,feature,y,x
|
||||
#endif
|
||||
|
||||
#if OUTPUT_DIMS == 6
|
||||
const uint x = (uint)get_global_id(0) % OUTPUT_SIZE_X;
|
||||
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
|
||||
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
|
||||
const uint w = (uint)get_global_id(0) / OUTPUT_SIZE_X;
|
||||
#if OUTPUT_BATCH_NUM == 1
|
||||
const unsigned feature = (uint)get_global_id(2);
|
||||
const unsigned batch = 0;
|
||||
#else
|
||||
const unsigned feature = (uint)get_global_id(2) % OUTPUT_FEATURE_NUM;
|
||||
const unsigned batch = (uint)get_global_id(2) / OUTPUT_FEATURE_NUM;
|
||||
#endif
|
||||
#elif OUTPUT_DIMS == 5
|
||||
#if OUTPUT_DIMS == 5
|
||||
const unsigned x = get_global_id(0);
|
||||
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
|
||||
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
|
||||
|
@ -158,19 +158,19 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_quantize_i8, ::testing
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 2, 3, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
||||
class activation_eltwise_activation_quantize_u8 : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise_activation_quantize_u8, basic) {
|
||||
class activation_scale_activation_quantize_u8 : public ActivationFusingTest {};
|
||||
TEST_P(activation_scale_activation_quantize_u8, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("in_low", get_mem(get_single_element_layout(p), 0)),
|
||||
data("in_high", get_mem(get_single_element_layout(p), 1, max_random)),
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", "eltwise", activation_func::softsign),
|
||||
scale("scale", "act", "scale_data"),
|
||||
activation("act2", "scale", activation_func::softsign),
|
||||
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 256, data_types::u8),
|
||||
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32)
|
||||
);
|
||||
@ -179,18 +179,18 @@ TEST_P(activation_eltwise_activation_quantize_u8, basic) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
|
||||
TEST_P(activation_scale_activation_quantize_u8, per_channel) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
data("in_low", get_mem(get_per_channel_layout(p), 0)),
|
||||
data("in_high", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||
data("out_low", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_high", get_mem(get_single_element_layout(p), 127)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", "eltwise", activation_func::softsign),
|
||||
scale("scale", "act", "scale_data"),
|
||||
activation("act2", "scale", activation_func::softsign),
|
||||
quantize("quant", "act2", "in_low", "in_high", "out_low", "out_high", 256, data_types::u8),
|
||||
reorder("reorder_bfyx", "quant", p.default_format, data_types::f32)
|
||||
);
|
||||
@ -199,7 +199,7 @@ TEST_P(activation_eltwise_activation_quantize_u8, per_channel) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_scale_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_F32_0, 2, 5, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_F32_1, 2, 5, "activation_opt" },
|
||||
@ -219,19 +219,19 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation_quantize_u8,
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_2, 2, 5, "activation_ref" },
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_scale_activation_quantize_u8, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 2, 5, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
||||
class activation_eltwise_activation : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise_activation, basic) {
|
||||
class activation_scale_activation : public ActivationFusingTest {};
|
||||
TEST_P(activation_scale_activation, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::relu),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
activation("act2", "eltwise", activation_func::exp),
|
||||
data("scale_data", get_mem(get_single_element_layout(p), 1.0f / 255)),
|
||||
scale("scale", "act", "scale_data"),
|
||||
activation("act2", "scale", activation_func::exp),
|
||||
reorder("reorder_bfyx", "act2", p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
@ -239,7 +239,7 @@ TEST_P(activation_eltwise_activation, basic) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_scale_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_F32_0, 2, 4, "activation_opt" },
|
||||
activation_test_params{ CASE_ACTIVATION_F32_1, 2, 4, "activation_opt" },
|
||||
@ -291,28 +291,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise_activation, ::testing::
|
||||
activation_test_params{ CASE_ACTIVATION_3D_I8_1, 2, 4, "activation_ref" }
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_eltwise_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
INSTANTIATE_TEST_SUITE_P(DISABLED_fusings_gpu, activation_scale_activation, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_4, 2, 4, "activation_ref" }, // FIXME - accuracy bug
|
||||
activation_test_params{ CASE_ACTIVATION_3D_F32_5, 2, 4, "activation_ref" }, // FIXME - accuracy bug
|
||||
}));
|
||||
|
||||
class activation_eltwise : public ActivationFusingTest {};
|
||||
TEST_P(activation_eltwise, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
activation("act", "input", activation_func::abs),
|
||||
data("eltwise_data", get_mem(get_single_element_layout(p), 10.0f)),
|
||||
eltwise("eltwise", { "act", "eltwise_data" }, eltwise_mode::prod, p.default_type),
|
||||
reorder("reorder_bfyx", "eltwise", p.default_format, data_types::f32)
|
||||
);
|
||||
tolerance = 1e-05f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
#define CASE_ACTIVATION_4D_F32_0 { 1, 2, 3, 2, 3, 2 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfwzyx
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, activation_eltwise, ::testing::ValuesIn(std::vector<activation_test_params>{
|
||||
// InputDataType = FP32
|
||||
activation_test_params{ CASE_ACTIVATION_4D_F32_0, 2, 3, "activation_ref" },
|
||||
activation_test_params{ CASE_ACTIVATION_4D_F32_0, 2, 3, "activation_opt" }
|
||||
}));
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <intel_gpu/primitives/activation.hpp>
|
||||
#include <intel_gpu/primitives/data.hpp>
|
||||
#include <intel_gpu/primitives/reorder.hpp>
|
||||
#include <intel_gpu/primitives/eltwise.hpp>
|
||||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
@ -16,50 +15,6 @@
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
static void test_abs_basic_bfwzyx(const char* kernel_name){
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
std::vector<int> shape = {2, 3, 7, 6, 5, 4};
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, tensor(format::bfwzyx, shape)});
|
||||
auto input_raw = generate_random_1d<float>(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()), -9, 9);
|
||||
set_values(input, input_raw);
|
||||
|
||||
auto elt_dat = engine.allocate_memory({data_types::f32, format::bfwzyx, tensor(format::bfwzyx, shape)});
|
||||
auto elt_dat_raw = generate_random_1d<float>(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()), -9, 9);
|
||||
set_values(elt_dat, elt_dat_raw);
|
||||
|
||||
topology topo(input_layout("input", input->get_layout()),
|
||||
activation("act", "input", activation_func::abs),
|
||||
data("elt_dat", elt_dat),
|
||||
eltwise("elt", {"act", "elt_dat"}, eltwise_mode::prod));
|
||||
build_options bo;
|
||||
implementation_desc act_impl = { format::bfwzyx, kernel_name };
|
||||
bo.set_option(build_option::force_implementations({{"act",act_impl}}));
|
||||
network net(engine, topo, bo);
|
||||
net.set_input_data("input", input);
|
||||
auto res = net.execute();
|
||||
EXPECT_EQ(res.size(), size_t(1));
|
||||
EXPECT_EQ(res.begin()->first, "elt");
|
||||
|
||||
auto output_memory = res.at("elt").get_memory();
|
||||
auto output_layout = output_memory->get_layout();
|
||||
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
|
||||
|
||||
int w_size = output_layout.spatial(3);
|
||||
int z_size = output_layout.spatial(2);
|
||||
int y_size = output_layout.spatial(1);
|
||||
int x_size = output_layout.spatial(0);
|
||||
int f_size = output_layout.feature();
|
||||
int b_size = output_layout.batch();
|
||||
auto bfwzyx = std::vector<int>{b_size, f_size, w_size, z_size, y_size, x_size};
|
||||
EXPECT_EQ(output_layout.format, format::bfwzyx);
|
||||
EXPECT_EQ(bfwzyx, shape);
|
||||
for (size_t i = 0; i < input_raw.size(); ++i)
|
||||
EXPECT_FLOAT_EQ(abs(input_raw[i])*elt_dat_raw[i], output_ptr[i]);
|
||||
}
|
||||
TEST(activation_f32_fw_gpu, abs_basic_bfwzyx_opt) {test_abs_basic_bfwzyx("activation_ref");}
|
||||
TEST(activation_f32_fw_gpu, abs_basic_bfwzyx_ref) {test_abs_basic_bfwzyx("activation_opt");}
|
||||
|
||||
TEST(activation_f32_fw_gpu, not_basic_yxfb) {
|
||||
// Input:
|
||||
// 1 0 -3 4 5
|
||||
|
Loading…
Reference in New Issue
Block a user