[GPU] Optimization for gemm & fc in iGPU. (#19780)
* Optimization for gemm & fc in iGPU. FC: fake alignment for 16 is better in iGPU. Gemm: permute + gemm_tiled_opt is better than transposed_input + gemm_ref kernel for unaligned shapes to 16. Note that this is an temporal optimization and will be removed once the final solution (i.e., support unaligned transposed input shape in gemm_tiled_opt kernel) is availalbe. * Fix unittest * Fix for model_cache * Fix unittest
This commit is contained in:
parent
efe54362fd
commit
f1b8abe55a
@ -34,6 +34,7 @@ struct kernel_impl_params {
|
||||
|
||||
bool has_runtime_layouts = false;
|
||||
const program *prog;
|
||||
cldnn::device_type dev_type;
|
||||
stream::ptr strm;
|
||||
std::shared_ptr<const primitive> desc;
|
||||
size_t unique_id;
|
||||
@ -63,9 +64,11 @@ struct kernel_impl_params {
|
||||
std::vector<size_t> output_size;
|
||||
std::vector<size_t> img_size;
|
||||
|
||||
kernel_impl_params() : prog(nullptr), strm(nullptr), desc(nullptr), unique_id(0) {}
|
||||
kernel_impl_params() : prog(nullptr), dev_type(cldnn::device_type::integrated_gpu), strm(nullptr), desc(nullptr), unique_id(0) {
|
||||
}
|
||||
|
||||
kernel_impl_params(program& _prog,
|
||||
cldnn::device_type _dev_type,
|
||||
stream::ptr _strm,
|
||||
std::shared_ptr<const primitive> _desc,
|
||||
size_t _uid,
|
||||
@ -74,6 +77,7 @@ struct kernel_impl_params {
|
||||
const std::vector<cldnn::fused_primitive_desc>& _fused_descs)
|
||||
: has_runtime_layouts(true)
|
||||
, prog(&_prog)
|
||||
, dev_type(_dev_type)
|
||||
, strm(std::move(_strm))
|
||||
, desc(std::move(_desc))
|
||||
, unique_id(_uid)
|
||||
@ -135,7 +139,7 @@ struct kernel_impl_params {
|
||||
return std::static_pointer_cast<const PType>(desc)->type == PType::type_id();
|
||||
}
|
||||
|
||||
virtual primitive_type_id type() const { return desc->type; }
|
||||
virtual primitive_type_id type() const { return desc->type; }
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const;
|
||||
void load(BinaryInputBuffer& ib);
|
||||
|
@ -187,8 +187,9 @@ kernel_impl_params fully_connected_inst::get_fake_aligned_params(kernel_impl_par
|
||||
return std::move(orig_impl_param);
|
||||
}
|
||||
|
||||
input_shape[input_row_idx] = align_to(input_shape[input_row_idx], 8);
|
||||
output_shape[output_row_idx] = align_to(output_shape[output_row_idx], 8);
|
||||
size_t fake_align_base = (orig_impl_param.dev_type == cldnn::device_type::integrated_gpu) ? 16 : 8;
|
||||
input_shape[input_row_idx] = align_to(input_shape[input_row_idx], fake_align_base);
|
||||
output_shape[output_row_idx] = align_to(output_shape[output_row_idx], fake_align_base);
|
||||
|
||||
updated_param.input_layouts[0] = layout(ov::PartialShape(input_shape),
|
||||
orig_input_layout.data_type,
|
||||
|
@ -127,7 +127,8 @@ public:
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<kernel_impl_params> get_kernel_impl_params(const std::vector<layout>& in_layouts, const std::vector<layout>& out_layouts) const {
|
||||
auto params = std::unique_ptr<kernel_impl_params>(new kernel_impl_params(get_program(), get_program().get_stream_ptr(), get_primitive(),
|
||||
auto params = std::unique_ptr<kernel_impl_params>(new kernel_impl_params(get_program(), get_program().get_engine().get_device_info().dev_type,
|
||||
get_program().get_stream_ptr(), get_primitive(),
|
||||
get_unique_id(), in_layouts, out_layouts, get_fused_primitives()));
|
||||
params->memory_deps = get_const_memory_deps();
|
||||
params->_can_be_optimized = this->optimized;
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "intel_gpu/graph/serialization/layout_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
|
||||
#include "intel_gpu/runtime/device_info.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -71,6 +72,7 @@ bool kernel_impl_params::operator==(const kernel_impl_params& rhs) const {
|
||||
|
||||
void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
|
||||
ob << desc;
|
||||
ob << static_cast<uint64_t>(dev_type);
|
||||
ob << has_runtime_layouts;
|
||||
ob << unique_id;
|
||||
ob << input_layouts;
|
||||
@ -135,6 +137,9 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
|
||||
void kernel_impl_params::load(BinaryInputBuffer& ib) {
|
||||
prog = nullptr;
|
||||
ib >> desc;
|
||||
size_t dev_type_id = 0;
|
||||
ib >> dev_type_id;
|
||||
dev_type = static_cast<cldnn::device_type>(dev_type_id);
|
||||
ib >> has_runtime_layouts;
|
||||
ib >> unique_id;
|
||||
ib >> input_layouts;
|
||||
|
@ -46,8 +46,18 @@ static void CreateMatMulOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v0::
|
||||
return false;
|
||||
|
||||
// dynamic shapes and 1D tensors are not transposed
|
||||
if (shapes[0].is_dynamic() || shapes[1].is_dynamic() ||
|
||||
shapes[0].size() < 2 || shapes[1].size() < 2)
|
||||
if (shapes[0].is_dynamic() || shapes[1].is_dynamic()) {
|
||||
// Currently, cldnn optimized gemm kernel (gemm_tiled_opt) does not support transposed input with shape unaligned for 16.
|
||||
// If the shape is not aligned for 16, gemm_ref_kernel will be selected,
|
||||
// but the perf is worse than permute + gemm_tiled_opt.
|
||||
// So we'll use this permute + gemm_tiled_opt strategy as a temporal solution,
|
||||
// until we have an essential solution, i.e., fixing the gemm_tiled_opt kernel to support unaligned shape.
|
||||
if (p.get_engine().get_device_info().dev_type == cldnn::device_type::integrated_gpu)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
if (shapes[0].size() < 2 || shapes[1].size() < 2)
|
||||
return false;
|
||||
|
||||
// don't transpose inputs if they're aligned to 16
|
||||
|
@ -24,8 +24,11 @@ struct fc_fake_align_params {
|
||||
layout input_layout;
|
||||
layout weight_layout;
|
||||
data_types data_type;
|
||||
layout expected_input_layout;
|
||||
layout expected_output_layout;
|
||||
layout expected_input_layout_igpu;
|
||||
layout expected_output_layout_igpu;
|
||||
layout expected_input_layout_dgpu;
|
||||
layout expected_output_layout_dgpu;
|
||||
|
||||
};
|
||||
|
||||
class fully_connected_fake_align_test : public testing::TestWithParam<fc_fake_align_params> {};
|
||||
@ -54,8 +57,13 @@ TEST_P(fully_connected_fake_align_test, fake_alignment) {
|
||||
EXPECT_THROW(fully_connected_inst::get_fake_aligned_params(*impl_param), std::exception);
|
||||
} else {
|
||||
auto updated_param = fully_connected_inst::get_fake_aligned_params(*impl_param);
|
||||
ASSERT_EQ(updated_param.get_input_layout(), p.expected_input_layout);
|
||||
ASSERT_EQ(updated_param.get_output_layout(), p.expected_output_layout);
|
||||
if (!engine.get_device_info().supports_immad) {
|
||||
ASSERT_EQ(updated_param.get_input_layout(), p.expected_input_layout_igpu);
|
||||
ASSERT_EQ(updated_param.get_output_layout(), p.expected_output_layout_igpu);
|
||||
} else {
|
||||
ASSERT_EQ(updated_param.get_input_layout(), p.expected_input_layout_dgpu);
|
||||
ASSERT_EQ(updated_param.get_output_layout(), p.expected_output_layout_dgpu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,29 +73,38 @@ INSTANTIATE_TEST_SUITE_P(smoke, fully_connected_fake_align_test,
|
||||
layout{ov::PartialShape{0, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // input_layout
|
||||
layout{ov::PartialShape{1000, 1024}, data_types::i8, format::bfyx}, // weight layout
|
||||
data_types::f16,
|
||||
layout{ov::PartialShape{0, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout
|
||||
layout{ov::PartialShape{0, 1000}, data_types::f16, format::bfyx} // fake_aligned output layout
|
||||
layout{ov::PartialShape{0, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout_igpu
|
||||
layout{ov::PartialShape{0, 1000}, data_types::f16, format::bfyx}, // fake_aligned output layout_igpu
|
||||
layout{ov::PartialShape{0, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout_dgpu
|
||||
layout{ov::PartialShape{0, 1000}, data_types::f16, format::bfyx} // fake_aligned output layout_dgpu
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape{11, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // input_layout
|
||||
layout{ov::PartialShape{1000, 1024}, data_types::i8, format::bfyx}, // weight layout
|
||||
data_types::f16,
|
||||
layout{ov::PartialShape{16, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout
|
||||
layout{ov::PartialShape{16, 1000}, data_types::f16, format::bfyx} // fake_aligned output layout
|
||||
layout{ov::PartialShape{16, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout_igpu
|
||||
layout{ov::PartialShape{16, 1000}, data_types::f16, format::bfyx}, // fake_aligned output layout_igpu
|
||||
layout{ov::PartialShape{16, 1024}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout_dgpu
|
||||
layout{ov::PartialShape{16, 1000}, data_types::f16, format::bfyx} // fake_aligned output layout_dgpu
|
||||
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape{133, 511}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // input_layout
|
||||
layout{ov::PartialShape{800, 511}, data_types::i8, format::bfyx}, // weight layout
|
||||
data_types::f16,
|
||||
layout{ov::PartialShape{136, 511}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout
|
||||
layout{ov::PartialShape{136, 800}, data_types::f16, format::bfyx} // fake_aligned output layout
|
||||
layout{ov::PartialShape{144, 511}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout_igpu
|
||||
layout{ov::PartialShape{144, 800}, data_types::f16, format::bfyx}, // fake_aligned output layout_igpu
|
||||
layout{ov::PartialShape{136, 511}, data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // fake_aligned input layout_dgpu
|
||||
layout{ov::PartialShape{136, 800}, data_types::f16, format::bfyx} // fake_aligned output layout_dgpu
|
||||
},
|
||||
{
|
||||
layout{ov::PartialShape::dynamic(2), data_types::i8, format::bfyx, padding{{1,1,1,1}, 0}}, // input_layout
|
||||
layout{ov::PartialShape{1000, 1024}, data_types::i8, format::bfyx}, // weight layout
|
||||
data_types::f16,
|
||||
layout{ov::PartialShape{-1, -1}, data_types::i8, format::bfyx}, // fake_aligned input layout // dummy
|
||||
layout{ov::PartialShape{-1, -1}, data_types::f16, format::bfyx} // fake_aligned output layout // dummy
|
||||
layout{ov::PartialShape{-1, -1}, data_types::i8, format::bfyx}, // fake_aligned input layout_igpu // dummy
|
||||
layout{ov::PartialShape{-1, -1}, data_types::f16, format::bfyx}, // fake_aligned output layout_igpu // dummy
|
||||
layout{ov::PartialShape{-1, -1}, data_types::i8, format::bfyx}, // fake_aligned input layout_dgpu // dummy
|
||||
layout{ov::PartialShape{-1, -1}, data_types::f16, format::bfyx} // fake_aligned output layout_dgpu // dummy
|
||||
},
|
||||
|
||||
}));
|
||||
|
@ -1874,6 +1874,7 @@ TEST(fully_connected_onednn, impl_replacement_with_cldnn) {
|
||||
|
||||
const int32_t input_f = 3, input_b = 1, weight_b = 4;
|
||||
|
||||
auto fake_alignment_size = engine.get_device_info().supports_immad ? 8 : 16;
|
||||
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f }, data_types::f32,format::bfyx };
|
||||
auto input_data = engine.allocate_memory(layout{ ov::PartialShape{ input_b, input_f }, data_types::f32,format::bfyx });
|
||||
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f }, data_types::f32,format::bfyx });
|
||||
@ -1909,7 +1910,7 @@ TEST(fully_connected_onednn, impl_replacement_with_cldnn) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2045,6 +2046,7 @@ TEST(fully_connected_gpu, dynamic) {
|
||||
|
||||
const int32_t input_f = 3, input_b = 1, weight_b = 4;
|
||||
|
||||
auto fake_alignment_size = engine.get_device_info().supports_immad ? 8 : 16;
|
||||
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f }, data_types::f32,format::bfyx };
|
||||
auto input_data = engine.allocate_memory(layout{ ov::PartialShape{ input_b, input_f }, data_types::f32,format::bfyx });
|
||||
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f }, data_types::f32,format::bfyx });
|
||||
@ -2071,7 +2073,7 @@ TEST(fully_connected_gpu, dynamic) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2199,7 +2201,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
|
||||
auto input_data1 = engine.allocate_memory(input_actual_layout);
|
||||
auto input_data2 = engine.allocate_memory(input_actual_layout);
|
||||
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f }, data_types::f32,format::bfyx });
|
||||
|
||||
auto fake_alignment_size = engine.get_device_info().supports_immad ? 8 : 16;
|
||||
set_values(input_data1, { 0.5f, -2.0f, -0.5f });
|
||||
set_values(input_data2, { -0.5f, 2.0f, 0.5f });
|
||||
set_values(weights_data, { 1.5f, 1.0f, 0.5f,
|
||||
@ -2228,7 +2230,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2252,7 +2254,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2272,6 +2274,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
|
||||
|
||||
const int32_t input_f = 3, weight_b = 4;
|
||||
|
||||
auto fake_alignment_size = engine.get_device_info().supports_immad ? 8 : 16;
|
||||
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f }, data_types::f32,format::bfyx };
|
||||
auto input_actual_layout1 = layout{ ov::PartialShape{ 2, input_f }, data_types::f32,format::bfyx};
|
||||
auto input_actual_layout2 = layout{ ov::PartialShape{ 1, input_f }, data_types::f32,format::bfyx};
|
||||
@ -2311,7 +2314,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(2, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(2, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 2);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2340,7 +2343,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(1, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(1, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 1);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2360,6 +2363,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) {
|
||||
|
||||
const int32_t input_f = 3, weight_b = 4;
|
||||
|
||||
auto fake_alignment_size = engine.get_device_info().supports_immad ? 8 : 16;
|
||||
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f }, data_types::f32,format::bfyx };
|
||||
auto input_actual_layout1 = layout{ ov::PartialShape{ 2, input_f }, data_types::f32,format::bfyx};
|
||||
auto input_actual_layout2 = layout{ ov::PartialShape{ 1, input_f }, data_types::f32,format::bfyx};
|
||||
@ -2398,7 +2402,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(2, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(2, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 2); // fake_alignment
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2427,7 +2431,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) {
|
||||
auto output_prim_mem = outputs.begin()->second.get_memory();
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(1, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(1, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), 1); // fake_alignment
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
@ -2661,6 +2665,7 @@ TEST(fully_connected_gpu, has_cached_weights_reorder) {
|
||||
|
||||
const int32_t input_f = 3, input_b = 1, weight_b = 4;
|
||||
|
||||
auto fake_alignment_size = engine.get_device_info().supports_immad ? 8 : 16;
|
||||
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f }, data_types::f32,format::bfyx };
|
||||
auto input_data = engine.allocate_memory(layout{ ov::PartialShape{ input_b, input_f }, data_types::f32,format::bfyx });
|
||||
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f }, data_types::f32,format::bfyx });
|
||||
@ -2701,7 +2706,7 @@ TEST(fully_connected_gpu, has_cached_weights_reorder) {
|
||||
ASSERT_TRUE(reorder_impl == nullptr);
|
||||
|
||||
auto out_l = network.get_output_layout(outputs.begin()->first);
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, 8)); // fake_alignment
|
||||
ASSERT_EQ(output_prim_mem->get_layout().batch(), align_to(input_b, fake_alignment_size)); // fake_alignment
|
||||
ASSERT_EQ(out_l.batch(), input_b);
|
||||
ASSERT_EQ(out_l.feature(), weight_b);
|
||||
ASSERT_EQ(out_l.spatial(0), 1);
|
||||
|
@ -70,9 +70,13 @@ public:
|
||||
|
||||
const auto primitive_hash = primitve->hash();
|
||||
const auto params_hash = primitve->type->get_fake_aligned_params(*prim_inst->get_impl_params()).hash();
|
||||
|
||||
ASSERT_EQ(primitive_hash, 6924775129729406941UL);
|
||||
ASSERT_EQ(params_hash, 8552673460001178483UL);
|
||||
if (!engine.get_device_info().supports_immad) {
|
||||
ASSERT_EQ(primitive_hash, 6924775129729406941UL);
|
||||
ASSERT_EQ(params_hash, 15366394052020805414UL);
|
||||
} else {
|
||||
ASSERT_EQ(primitive_hash, 6924775129729406941UL);
|
||||
ASSERT_EQ(params_hash, 8552673460001178483UL);
|
||||
}
|
||||
}
|
||||
|
||||
void test_gather_basic(bool is_caching_test) {
|
||||
|
Loading…
Reference in New Issue
Block a user