[GPU] Fix twin tranformer functional regression. (#16111)

* [GPU] Fix twin tranformer functional regression.

gemm/FC select_preferred_format select simple format depends on out rank size.

Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
hyunback kim 2023-03-14 17:34:41 +09:00 committed by GitHub
parent 1268bfdca2
commit 164db3def9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 158 additions and 4 deletions

View File

@ -1824,13 +1824,18 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
for (size_t idx = 0 ; idx < node.get_dependencies().size() ; idx++) {
if (node.get_dependency(idx).is_constant())
continue;
node.set_preferred_input_fmt(idx, cldnn::format::bfyx);
size_t out_rank = node.get_output_layout().get_rank();
auto target_format = format::get_default_format(out_rank);
node.set_preferred_input_fmt(idx, target_format);
if (node.get_preferred_output_fmt() == format::any) {
for (size_t usr = 0; usr < std::max<size_t>(1, node.get_users().size()); usr++)
node.set_preferred_output_fmt(usr, cldnn::format::bfyx);
for (size_t usr = 0; usr < std::max<size_t>(1, node.get_users().size()); usr++) {
node.set_preferred_output_fmt(usr, target_format);
}
}
GPU_DEBUG_LOG << "select_preferred_formats:" << node.id() << ": " << fmt_to_str(cldnn::format::bfyx) << " --> " << fmt_to_str(cldnn::format::bfyx)
GPU_DEBUG_LOG << "select_preferred_formats:" << node.id() << ": " << fmt_to_str(target_format) << " --> " << fmt_to_str(target_format)
<< " For index : " << idx << std::endl;
}
}

View File

@ -1350,6 +1350,138 @@ public:
};
#ifdef ENABLE_ONEDNN_FOR_GPU
struct gemm_onednn_test_params {
std::vector<tensor> in_shapes;
tensor out_shape;
tensor kernel;
tensor pad;
data_types data_type_in0;
data_types data_type_in1;
data_types data_type_in2;
format input_format;
data_types default_type;
format default_format;
};
template <typename T>
class GemmOneDNNTest : public ::testing::TestWithParam<T> {
public:
cldnn::engine& engine = get_test_engine();
topology topology_ocl;
topology topology_onednn;
ExecutionConfig config_ocl;
ExecutionConfig config_onednn;
float tolerance = 0.0f;
void SetUp() override {
config_ocl.set_property(ov::intel_gpu::optimize_data(true));
config_ocl.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
if (engine.get_device_info().supports_immad) {
config_onednn.set_property(ov::intel_gpu::optimize_data(true));
config_onednn.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
}
}
void execute(T& p) {
auto input0_prim = get_generated_random_1d_mem(engine, get_input_layout(p, 0));
auto input1_prim = get_generated_random_1d_mem(engine, get_input_layout(p, 1));
network network_ocl(engine, topology_ocl, config_ocl);
network network_onednn(engine, topology_onednn, config_onednn);
network_ocl.set_input_data("input0", input0_prim);
network_ocl.set_input_data("input1", input1_prim);
network_onednn.set_input_data("input0", input0_prim);
network_onednn.set_input_data("input1", input1_prim);
compare(network_ocl, network_onednn, p);
}
void compare(network& network_ocl, network& network_onednn, T& p) {
auto outputs_ocl = network_ocl.execute();
auto outputs_onednn = network_onednn.execute();
ASSERT_EQ(outputs_ocl.size(), outputs_onednn.size());
ASSERT_EQ(outputs_ocl.size(), size_t(1));
auto val_ocl = get_output_values_to_float(network_ocl, outputs_ocl.begin()->first);
auto val_onednn = get_output_values_to_float(network_onednn, outputs_onednn.begin()->first);
ASSERT_EQ(val_ocl.size(), val_onednn.size());
for (size_t i = 0; i < val_ocl.size(); i++) {
ASSERT_NEAR(val_ocl[i], val_onednn[i], tolerance)
<< "tolerance = " << tolerance
<< "\ni = " << i
<< "\nocl[i] = " << val_ocl[i]
<< "\nonednn[i] = " << val_onednn[i];
}
}
layout get_input_layout(T& p, int in_no) {
auto pad = p.pad;
std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
if (in_no == 0)
return layout{ p.data_type_in0, p.input_format, p.in_shapes.at(0), padding{ pad_ } };
else if (in_no == 1)
return layout{ p.data_type_in1, p.input_format, p.in_shapes.at(1), padding{ pad_ } };
else
return layout{ p.data_type_in2, p.input_format, p.in_shapes.at(2), padding{ pad_ } };
}
};
class gemm_onednn_ndims : public GemmOneDNNTest<gemm_onednn_test_params> {};
TEST_P(gemm_onednn_ndims, basic) {
if (!engine.get_device_info().supports_immad)
return;
auto p = GetParam();
auto in_layout0 = get_input_layout(p, 0);
auto in_layout1 = get_input_layout(p, 1);
topology_ocl.add(input_layout("input0", in_layout0));
topology_ocl.add(input_layout("input1", in_layout1));
topology_ocl.add(gemm("gemm0_ocl", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()));
topology_ocl.add(reorder("reorder0", input_info("gemm0_ocl"), p.default_format, data_types::f32));
topology_onednn.add(input_layout("input0", get_input_layout(p, 0)));
topology_onednn.add(input_layout("input1", get_input_layout(p, 1)));
topology_onednn.add(gemm("gemm0_onednn", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()));
topology_onednn.add(reorder("reorder0", input_info("gemm0_onednn"), p.default_format, data_types::f32));
ov::intel_gpu::ImplementationDesc gemm_impl_ocl = { p.default_format, "", impl_types::ocl };
config_ocl.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm0_ocl", gemm_impl_ocl } }));
ov::intel_gpu::ImplementationDesc gemm_impl_onednn = { p.default_format, "", impl_types::onednn };
config_onednn.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm0_onednn", gemm_impl_onednn } }));
tolerance = default_tolerance(p.default_type);
execute(p);
}
#define CASE_GEMM_ONEDNN_FP16_4D { { 2, 3, 2, 2 }, { 2, 3, 2, 2 } }, { 2, 3, 2, 2 }, tensor{ 1 }, tensor{ 0 }, \
data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GEMM_ONEDNN_FP16_5D { { 1, 3, 4, 4, 4 }, { 1, 3, 4, 4, 4 } }, { 1, 3, 4, 4, 4 }, tensor{ 1 }, tensor{ 0 }, \
data_types::f16, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GEMM_ONEDNN_FP16_6D { { 2, 3, 5, 4, 3, 2 }, { 2, 3, 4, 5, 3, 2 } }, { 2, 3, 5, 5, 3, 2 }, tensor{ 1 }, tensor{ 0 }, \
data_types::f16, data_types::f16, data_types::f16, format::bfwzyx, data_types::f16, format::bfwzyx
#define CASE_GEMM_ONEDNN_I8_4D { { 2, 3, 2, 2 }, { 2, 3, 2, 2 } }, { 2, 3, 2, 2 }, tensor{ 1 }, tensor{ 0 }, \
data_types::i8, data_types::i8, data_types::i8, format::bfyx, data_types::i8, format::bfyx
#define CASE_GEMM_ONEDNN_I8_5D { { 1, 3, 4, 4, 4 }, { 1, 3, 4, 4, 4 } }, { 1, 3, 4, 4, 4 }, tensor{ 1 }, tensor{ 0 }, \
data_types::i8, data_types::i8, data_types::i8, format::bfzyx, data_types::i8, format::bfzyx
#define CASE_GEMM_ONEDNN_I8_6D { { 2, 3, 5, 4, 3, 2 }, { 2, 3, 4, 5, 3, 2 } }, { 2, 3, 5, 5, 3, 2 }, tensor{ 1 }, tensor{ 0 }, \
data_types::i8, data_types::i8, data_types::i8, format::bfwzyx, data_types::i8, format::bfwzyx
INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_onednn_ndims, ::testing::ValuesIn(std::vector<gemm_onednn_test_params>{
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_FP16_4D },
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_FP16_5D },
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_FP16_6D },
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_4D },
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_5D },
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_6D },
}));
class gemm_int8_simple_tests_onednn : public ::GemmBaseTest<gemm_base_test_params, int8_t, int8_t, float, float, int32_t> {};
TEST_P(gemm_int8_simple_tests_onednn, basic) { auto p = GetParam(); execute(p); }

View File

@ -609,6 +609,23 @@ inline std::vector<float> get_output_values_to_float(cldnn::network& net, const
}
}
inline cldnn::memory::ptr get_generated_random_1d_mem(cldnn::engine& engine, cldnn::layout l) {
auto prim = engine.allocate_memory(l);
cldnn::tensor s = l.get_tensor();
if (l.data_type == cldnn::data_types::i8 || l.data_type == cldnn::data_types::u8) {
VF<uint8_t> rnd_vec = generate_random_1d<uint8_t>(s.count(), -200, 200);
set_values(prim, rnd_vec);
} else if (l.data_type == cldnn::data_types::f16) {
VF<FLOAT16> rnd_vec = generate_random_1d<FLOAT16>(s.count(), -1, 1);
set_values(prim, rnd_vec);
} else {
VF<float> rnd_vec = generate_random_1d<float>(s.count(), -1, 1);
set_values(prim, rnd_vec);
}
return prim;
}
double default_tolerance(cldnn::data_types dt);
// inline void print_bin_blob(cldnn::memory& mem, std::string name)
// {