[GPU] Fix twin tranformer functional regression. (#16111)
* [GPU] Fix twin tranformer functional regression. gemm/FC select_preferred_format select simple format depends on out rank size. Signed-off-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
parent
1268bfdca2
commit
164db3def9
@ -1824,13 +1824,18 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
|
||||
for (size_t idx = 0 ; idx < node.get_dependencies().size() ; idx++) {
|
||||
if (node.get_dependency(idx).is_constant())
|
||||
continue;
|
||||
node.set_preferred_input_fmt(idx, cldnn::format::bfyx);
|
||||
|
||||
size_t out_rank = node.get_output_layout().get_rank();
|
||||
auto target_format = format::get_default_format(out_rank);
|
||||
|
||||
node.set_preferred_input_fmt(idx, target_format);
|
||||
|
||||
if (node.get_preferred_output_fmt() == format::any) {
|
||||
for (size_t usr = 0; usr < std::max<size_t>(1, node.get_users().size()); usr++)
|
||||
node.set_preferred_output_fmt(usr, cldnn::format::bfyx);
|
||||
for (size_t usr = 0; usr < std::max<size_t>(1, node.get_users().size()); usr++) {
|
||||
node.set_preferred_output_fmt(usr, target_format);
|
||||
}
|
||||
}
|
||||
GPU_DEBUG_LOG << "select_preferred_formats:" << node.id() << ": " << fmt_to_str(cldnn::format::bfyx) << " --> " << fmt_to_str(cldnn::format::bfyx)
|
||||
GPU_DEBUG_LOG << "select_preferred_formats:" << node.id() << ": " << fmt_to_str(target_format) << " --> " << fmt_to_str(target_format)
|
||||
<< " For index : " << idx << std::endl;
|
||||
}
|
||||
}
|
||||
|
@ -1350,6 +1350,138 @@ public:
|
||||
};
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
struct gemm_onednn_test_params {
|
||||
std::vector<tensor> in_shapes;
|
||||
tensor out_shape;
|
||||
tensor kernel;
|
||||
tensor pad;
|
||||
data_types data_type_in0;
|
||||
data_types data_type_in1;
|
||||
data_types data_type_in2;
|
||||
format input_format;
|
||||
data_types default_type;
|
||||
format default_format;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class GemmOneDNNTest : public ::testing::TestWithParam<T> {
|
||||
public:
|
||||
cldnn::engine& engine = get_test_engine();
|
||||
topology topology_ocl;
|
||||
topology topology_onednn;
|
||||
|
||||
ExecutionConfig config_ocl;
|
||||
ExecutionConfig config_onednn;
|
||||
|
||||
float tolerance = 0.0f;
|
||||
|
||||
void SetUp() override {
|
||||
config_ocl.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config_ocl.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
|
||||
if (engine.get_device_info().supports_immad) {
|
||||
config_onednn.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config_onednn.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order));
|
||||
}
|
||||
}
|
||||
|
||||
void execute(T& p) {
|
||||
auto input0_prim = get_generated_random_1d_mem(engine, get_input_layout(p, 0));
|
||||
auto input1_prim = get_generated_random_1d_mem(engine, get_input_layout(p, 1));
|
||||
|
||||
network network_ocl(engine, topology_ocl, config_ocl);
|
||||
network network_onednn(engine, topology_onednn, config_onednn);
|
||||
|
||||
network_ocl.set_input_data("input0", input0_prim);
|
||||
network_ocl.set_input_data("input1", input1_prim);
|
||||
network_onednn.set_input_data("input0", input0_prim);
|
||||
network_onednn.set_input_data("input1", input1_prim);
|
||||
|
||||
compare(network_ocl, network_onednn, p);
|
||||
}
|
||||
|
||||
void compare(network& network_ocl, network& network_onednn, T& p) {
|
||||
auto outputs_ocl = network_ocl.execute();
|
||||
auto outputs_onednn = network_onednn.execute();
|
||||
|
||||
ASSERT_EQ(outputs_ocl.size(), outputs_onednn.size());
|
||||
ASSERT_EQ(outputs_ocl.size(), size_t(1));
|
||||
|
||||
auto val_ocl = get_output_values_to_float(network_ocl, outputs_ocl.begin()->first);
|
||||
auto val_onednn = get_output_values_to_float(network_onednn, outputs_onednn.begin()->first);
|
||||
|
||||
ASSERT_EQ(val_ocl.size(), val_onednn.size());
|
||||
|
||||
for (size_t i = 0; i < val_ocl.size(); i++) {
|
||||
ASSERT_NEAR(val_ocl[i], val_onednn[i], tolerance)
|
||||
<< "tolerance = " << tolerance
|
||||
<< "\ni = " << i
|
||||
<< "\nocl[i] = " << val_ocl[i]
|
||||
<< "\nonednn[i] = " << val_onednn[i];
|
||||
}
|
||||
}
|
||||
|
||||
layout get_input_layout(T& p, int in_no) {
|
||||
auto pad = p.pad;
|
||||
std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
|
||||
if (in_no == 0)
|
||||
return layout{ p.data_type_in0, p.input_format, p.in_shapes.at(0), padding{ pad_ } };
|
||||
else if (in_no == 1)
|
||||
return layout{ p.data_type_in1, p.input_format, p.in_shapes.at(1), padding{ pad_ } };
|
||||
else
|
||||
return layout{ p.data_type_in2, p.input_format, p.in_shapes.at(2), padding{ pad_ } };
|
||||
}
|
||||
};
|
||||
|
||||
class gemm_onednn_ndims : public GemmOneDNNTest<gemm_onednn_test_params> {};
|
||||
TEST_P(gemm_onednn_ndims, basic) {
|
||||
if (!engine.get_device_info().supports_immad)
|
||||
return;
|
||||
|
||||
auto p = GetParam();
|
||||
|
||||
auto in_layout0 = get_input_layout(p, 0);
|
||||
auto in_layout1 = get_input_layout(p, 1);
|
||||
|
||||
topology_ocl.add(input_layout("input0", in_layout0));
|
||||
topology_ocl.add(input_layout("input1", in_layout1));
|
||||
topology_ocl.add(gemm("gemm0_ocl", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()));
|
||||
topology_ocl.add(reorder("reorder0", input_info("gemm0_ocl"), p.default_format, data_types::f32));
|
||||
|
||||
topology_onednn.add(input_layout("input0", get_input_layout(p, 0)));
|
||||
topology_onednn.add(input_layout("input1", get_input_layout(p, 1)));
|
||||
topology_onednn.add(gemm("gemm0_onednn", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()));
|
||||
topology_onednn.add(reorder("reorder0", input_info("gemm0_onednn"), p.default_format, data_types::f32));
|
||||
|
||||
ov::intel_gpu::ImplementationDesc gemm_impl_ocl = { p.default_format, "", impl_types::ocl };
|
||||
config_ocl.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm0_ocl", gemm_impl_ocl } }));
|
||||
|
||||
ov::intel_gpu::ImplementationDesc gemm_impl_onednn = { p.default_format, "", impl_types::onednn };
|
||||
config_onednn.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm0_onednn", gemm_impl_onednn } }));
|
||||
|
||||
tolerance = default_tolerance(p.default_type);
|
||||
execute(p);
|
||||
}
|
||||
#define CASE_GEMM_ONEDNN_FP16_4D { { 2, 3, 2, 2 }, { 2, 3, 2, 2 } }, { 2, 3, 2, 2 }, tensor{ 1 }, tensor{ 0 }, \
|
||||
data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GEMM_ONEDNN_FP16_5D { { 1, 3, 4, 4, 4 }, { 1, 3, 4, 4, 4 } }, { 1, 3, 4, 4, 4 }, tensor{ 1 }, tensor{ 0 }, \
|
||||
data_types::f16, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GEMM_ONEDNN_FP16_6D { { 2, 3, 5, 4, 3, 2 }, { 2, 3, 4, 5, 3, 2 } }, { 2, 3, 5, 5, 3, 2 }, tensor{ 1 }, tensor{ 0 }, \
|
||||
data_types::f16, data_types::f16, data_types::f16, format::bfwzyx, data_types::f16, format::bfwzyx
|
||||
#define CASE_GEMM_ONEDNN_I8_4D { { 2, 3, 2, 2 }, { 2, 3, 2, 2 } }, { 2, 3, 2, 2 }, tensor{ 1 }, tensor{ 0 }, \
|
||||
data_types::i8, data_types::i8, data_types::i8, format::bfyx, data_types::i8, format::bfyx
|
||||
#define CASE_GEMM_ONEDNN_I8_5D { { 1, 3, 4, 4, 4 }, { 1, 3, 4, 4, 4 } }, { 1, 3, 4, 4, 4 }, tensor{ 1 }, tensor{ 0 }, \
|
||||
data_types::i8, data_types::i8, data_types::i8, format::bfzyx, data_types::i8, format::bfzyx
|
||||
#define CASE_GEMM_ONEDNN_I8_6D { { 2, 3, 5, 4, 3, 2 }, { 2, 3, 4, 5, 3, 2 } }, { 2, 3, 5, 5, 3, 2 }, tensor{ 1 }, tensor{ 0 }, \
|
||||
data_types::i8, data_types::i8, data_types::i8, format::bfwzyx, data_types::i8, format::bfwzyx
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_onednn_ndims, ::testing::ValuesIn(std::vector<gemm_onednn_test_params>{
|
||||
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_FP16_4D },
|
||||
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_FP16_5D },
|
||||
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_FP16_6D },
|
||||
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_4D },
|
||||
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_5D },
|
||||
gemm_onednn_test_params{ CASE_GEMM_ONEDNN_I8_6D },
|
||||
}));
|
||||
|
||||
class gemm_int8_simple_tests_onednn : public ::GemmBaseTest<gemm_base_test_params, int8_t, int8_t, float, float, int32_t> {};
|
||||
TEST_P(gemm_int8_simple_tests_onednn, basic) { auto p = GetParam(); execute(p); }
|
||||
|
@ -609,6 +609,23 @@ inline std::vector<float> get_output_values_to_float(cldnn::network& net, const
|
||||
}
|
||||
}
|
||||
|
||||
inline cldnn::memory::ptr get_generated_random_1d_mem(cldnn::engine& engine, cldnn::layout l) {
|
||||
auto prim = engine.allocate_memory(l);
|
||||
cldnn::tensor s = l.get_tensor();
|
||||
if (l.data_type == cldnn::data_types::i8 || l.data_type == cldnn::data_types::u8) {
|
||||
VF<uint8_t> rnd_vec = generate_random_1d<uint8_t>(s.count(), -200, 200);
|
||||
set_values(prim, rnd_vec);
|
||||
} else if (l.data_type == cldnn::data_types::f16) {
|
||||
VF<FLOAT16> rnd_vec = generate_random_1d<FLOAT16>(s.count(), -1, 1);
|
||||
set_values(prim, rnd_vec);
|
||||
} else {
|
||||
VF<float> rnd_vec = generate_random_1d<float>(s.count(), -1, 1);
|
||||
set_values(prim, rnd_vec);
|
||||
}
|
||||
|
||||
return prim;
|
||||
}
|
||||
|
||||
double default_tolerance(cldnn::data_types dt);
|
||||
// inline void print_bin_blob(cldnn::memory& mem, std::string name)
|
||||
// {
|
||||
|
Loading…
Reference in New Issue
Block a user