[GPU] fix adaptive pooling kernel (#17429)

* fixed not to use ceil

* added an unit test
This commit is contained in:
Eddy Kim 2023-05-10 15:56:43 +09:00 committed by GitHub
parent 00eacd2a96
commit e7d94ba020
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 5 deletions

View File

@ -45,12 +45,15 @@ KERNEL(adaptive_pooling_gpu)(
#if OUTPUT_DIMS == 5
uint z_start = z * INPUT0_SIZE_Z / OUTPUT_SIZE_Z;
uint z_end = ceil((float)((z + 1) * INPUT0_SIZE_Z) / OUTPUT_SIZE_Z);
uint z_end = ((z + 1) * INPUT0_SIZE_Z) / OUTPUT_SIZE_Z;
z_end += (((z + 1) * INPUT0_SIZE_Z) - OUTPUT_SIZE_Z * z_end != 0) ? 1 : 0;
#endif
uint y_start = y * INPUT0_SIZE_Y / OUTPUT_SIZE_Y;
uint y_end = ceil((float)((y + 1) * INPUT0_SIZE_Y) / OUTPUT_SIZE_Y);
uint y_end = ((y + 1) * INPUT0_SIZE_Y) / OUTPUT_SIZE_Y;
y_end += (((y + 1) * INPUT0_SIZE_Y) - OUTPUT_SIZE_Y * y_end != 0) ? 1 : 0;
uint x_start = x * INPUT0_SIZE_X / OUTPUT_SIZE_X;
uint x_end = ceil((float)((x + 1) * INPUT0_SIZE_X) / OUTPUT_SIZE_X);
uint x_end = ((x + 1) * INPUT0_SIZE_X) / OUTPUT_SIZE_X;
x_end += (((x + 1) * INPUT0_SIZE_X) - OUTPUT_SIZE_X * x_end != 0) ? 1 : 0;
#if OUTPUT_DIMS == 5

View File

@ -171,7 +171,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_adaptive_avg_pooling_test_f32_2d,
::testing::ValuesIn(std::vector<AdaptiveAvgPoolingParams>{
{ tensor(1, 2, 7, 3), tensor(1, 2, 3, 3) },
{ tensor(2, 3, 7, 3), tensor(2, 3, 3, 3) },
{ tensor(1, 3, 16, 16), tensor(1, 3, 16, 16) },
{ tensor(1, 3, 7, 7), tensor(1, 3, 7, 7) },
}),
::testing::Values(format::bfyx),
::testing::Values(format::bfyx),

View File

@ -69,7 +69,7 @@ ov::Shape tensorToShape(const tensor& t, const format f)
template<typename T>
void generateTestData(const AdaptiveMaxPoolingParams& p, const format fmt,
std::vector<T>& inputs, std::vector<T>& outputs, std::vector<int32_t>& indices) {
const auto in = generate_random_1d<float>(p.inputTensor.count(), -127, 127, 1);
const auto in = generate_random_1d<float>(p.inputTensor.count(), -127, 127, 8);
std::vector<float> out(p.outputTensor.count());
std::vector<int32_t> ind(p.outputTensor.count());