[GPU] Remove f16<->f32 conversion duplicates (#13735)

This commit is contained in:
Vladimir Paramuzov 2022-11-07 11:01:20 +04:00 committed by GitHub
parent 560dba22cb
commit ad403cf55e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 119 additions and 345 deletions

View File

@ -4,7 +4,6 @@
#include "detection_output_inst.h"
#include "impls/implementation_map.hpp"
#include "math_utils.h"
#include "register.hpp"
#include "cpu_impl_helpers.hpp"
@ -12,6 +11,7 @@
#include <stdexcept>
#include <string>
#include <type_traits>
#include <immintrin.h>
#include <xmmintrin.h>
#include <vector>
#include <utility>

View File

@ -1,142 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <immintrin.h>
static inline uint16_t float32_to_float16(float value) {
#define TO_M128i(a) (*reinterpret_cast<__m128i*>(&(a)))
#define TO_M128(a) (*const_cast<__m128*>(reinterpret_cast<const __m128*>(&(a))))
static const uint32_t DWORD_SIGNMASK = 0x80000000;
static const uint32_t DWORD_MINFP16 = 0x38800000;
static const uint32_t DWORD_MAXFP16 = 0x477fe000;
static const uint32_t DWORD_FP16_2_POW_10 = (1 << 10);
static const uint32_t DWORD_FP16_EXPBIAS_NO_HALF = 0xc8000000;
static const uint32_t WORD_MAXFP16 = 0x7BFF;
static const __m128i IVec4SignMask = _mm_set1_epi32(DWORD_SIGNMASK);
static const __m128i IVec4MinNormalFp16 = _mm_set1_epi32(DWORD_MINFP16);
static const __m128i IVec4MaxNormalFp16 = _mm_set1_epi32(DWORD_MAXFP16);
static const __m128i IVec4OnePow10 = _mm_set1_epi32(DWORD_FP16_2_POW_10);
static const __m128i IVec4ExpBiasFp16 = _mm_set1_epi32(DWORD_FP16_EXPBIAS_NO_HALF);
static const __m128i IVec4MaxFp16InWords = _mm_set1_epi32(WORD_MAXFP16);
static const __m128 FVec4MaxNormalFp16 = TO_M128(IVec4MaxNormalFp16);
static const __m128 FVec4MinNormalFp16 = TO_M128(IVec4MinNormalFp16);
static const __m128i IVec4InfF32 = _mm_set1_epi32(0x7f800000); // inf in in hex representation
static const __m128i IVec4InfF16 = _mm_set1_epi32(0x00007c00);
static const __m128 FVec4MaxFp16InWords = TO_M128(IVec4MaxFp16InWords);
__m128 Src = _mm_set1_ps(value);
// Remove the sign bit from the source
__m128 AbsSrc = _mm_andnot_ps(TO_M128(IVec4SignMask), Src);
// Create a mask to identify the DWORDs that are smaller than the minimum normalized fp16 number
__m128 CmpToMinFp16Mask = _mm_cmplt_ps(AbsSrc, FVec4MinNormalFp16);
// Create a mask to identify the DWORDs that are larger than the maximum normalized fp16 number
__m128 CmpToMaxFp16Mask = _mm_cmpgt_ps(AbsSrc, FVec4MaxNormalFp16);
__m128i CmpToInfMask = _mm_cmpeq_epi32(TO_M128i(AbsSrc), IVec4InfF32);
// Create a mask with the minimum normalized fp16 number in the DWORDs that are smaller than it
__m128 MaskOfMinFp16 = _mm_and_ps(CmpToMinFp16Mask, FVec4MinNormalFp16);
__m128i MaskOf2POW10 = _mm_and_si128(TO_M128i(CmpToMinFp16Mask), IVec4OnePow10);
__m128 ResultPS = _mm_add_ps(AbsSrc, MaskOfMinFp16);
__m128i Result = TO_M128i(ResultPS);
// We need to move from a 127 biased domain to a 15 biased domain. This means subtracting 112 from the exponent. We
// will add '-112' to the exponent but since the exponent is shifted 23 bits to the left we need to shift '-112' 23
// bits to the left as well. This gives us 0xC8000000. We are going to shift the mantissa 13 bits to the right
// (moving from 23 bits mantissa to 10).
Result = _mm_add_epi32(Result, IVec4ExpBiasFp16);
// Shift the mantissa to go from 23 bits to 10 bits
Result = _mm_srli_epi32(Result, 13);
Result = _mm_sub_epi16(Result, MaskOf2POW10);
ResultPS = _mm_blendv_ps(TO_M128(Result), FVec4MaxFp16InWords, CmpToMaxFp16Mask);
Result = TO_M128i(ResultPS);
// infinity preserving blending
Result = _mm_blendv_epi8(Result, IVec4InfF16, CmpToInfMask);
__m128i iPackedResult = _mm_packs_epi32(Result, Result);
// iSignMask = mask of the sign bits of the source 4 dwords
__m128i iSignMask = _mm_and_si128(TO_M128i(Src), IVec4SignMask);
// Pack the sign mask to 4 words
__m128i iSignInWords = _mm_packs_epi32(iSignMask, iSignMask);
iPackedResult = _mm_or_si128(iPackedResult, iSignInWords);
uint16_t out16 = (uint16_t)_mm_extract_epi16(iPackedResult, 0);
return out16;
}
static inline float float16_to_float32(uint16_t value) {
static const uint32_t FLOAT16_EXP_SHIFT = (23 - 10);
static const uint32_t FLOAT16_EXP_MASK = 0x7C00;
static const uint32_t FLOAT32_EXP_MASK = 0x7F800000;
static const uint32_t FLOAT16_MANTISSA_MASK = 0x03FF;
static const uint32_t FLOAT16_TO_32_BIAS_DIFF_DENORM =
((127 - 15 - 10)
<< 23); // The difference is (127-15) but we want to do the calculation in the exp place (bit 23:32)
static const uint32_t FLOAT16_TO_32_BIAS_DIFF = ((127 - 15) << 10);
static const uint32_t FLOAT16_IMPLICIT_1 = (1 << 10);
static const uint32_t FLOAT16_EXP_MIN = (1 << 10);
static const uint32_t FLOAT16_SIGN_MASK = 0x8000;
__m128i a = _mm_unpacklo_epi16(_mm_set1_epi16(value), _mm_setzero_si128());
__m128i exps = _mm_and_si128(_mm_set1_epi32(FLOAT16_EXP_MASK), a); // Mask the exponents
__m128i mantissa = _mm_and_si128(_mm_set1_epi32(FLOAT16_MANTISSA_MASK), a); // Mask the mantissa
__m128i signs = _mm_and_si128(_mm_set1_epi32(FLOAT16_SIGN_MASK), a);
signs = _mm_slli_epi32(signs, 16);
__m128i nans = _mm_cmpeq_epi32(exps, _mm_set1_epi32(FLOAT16_EXP_MASK));
nans = _mm_and_si128(nans, _mm_set1_epi32(FLOAT32_EXP_MASK));
nans = _mm_or_si128(nans, signs);
__m128i subnormals = _mm_cmpeq_epi32(exps, _mm_setzero_si128());
int out32;
// e\m| 0 | 1
// ------------
// 0 | 0 | S
// ------------
// 1 | N | N
//
// The expression: (~exp) & mantissa, will evaluate to 0 exactly when the number is non subnormal or it's zero (just
// like in the table) testz Tests for this condition
if (_mm_testz_si128(subnormals, mantissa)) {
__m128i tmp;
exps = _mm_add_epi32(exps, _mm_set1_epi32(FLOAT16_TO_32_BIAS_DIFF));
tmp = _mm_or_si128(exps, mantissa);
tmp = _mm_slli_epi32(tmp, FLOAT16_EXP_SHIFT);
tmp = _mm_blendv_epi8(
tmp,
_mm_setzero_si128(),
subnormals); // The idea is of course to use blendv_ps, but epi8 will work the same and won't switch stack
tmp = _mm_or_si128(tmp, nans);
out32 = _mm_extract_epi32(tmp, 0);
} else {
__m128i normals = _mm_andnot_si128(subnormals, _mm_set1_epi32(FLOAT16_IMPLICIT_1)); // Mark all normal numbers
mantissa = _mm_or_si128(mantissa, normals); // Apply implicit bit
exps = _mm_max_epi16(
exps,
_mm_set1_epi32(
FLOAT16_EXP_MIN)); // All subnormals will have 1 in the exponent (needed for correct bias computation)
exps = _mm_slli_epi32(exps, FLOAT16_EXP_SHIFT);
exps = _mm_add_epi32(exps, _mm_set1_epi32(FLOAT16_TO_32_BIAS_DIFF_DENORM));
__m128 tmp;
tmp = _mm_mul_ps(_mm_castsi128_ps(exps), _mm_cvtepi32_ps(mantissa));
tmp = _mm_or_ps(tmp, _mm_castsi128_ps(nans));
out32 = _mm_extract_ps(tmp, 0);
}
float outf32 = *reinterpret_cast<float*>(&out32);
return outf32;
}

View File

@ -14,6 +14,7 @@
#include "intel_gpu/runtime/event.hpp"
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "intel_gpu/runtime/half.hpp"
#include "intel_gpu/graph/program.hpp"
#include "intel_gpu/graph/network.hpp"
@ -121,51 +122,11 @@ void dump_perf_data_raw(std::string dump_path, const std::list<std::shared_ptr<p
}
}
float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false) {
#if defined HALF_HALF_HPP
return val;
#else
// FP32 parts extracted from FP16.
uint32_t sign = (static_cast<uint16_t>(val) & 0x8000U) << 16;
uint32_t mantissa = (static_cast<uint16_t>(val) & 0x3FFU) << 13;
uint32_t exp_val_f16 = (static_cast<uint16_t>(val) & 0x7C00U) >> 10;
uint32_t exp;
if (exp_val_f16 == 0) {
// Handling +/-0 and denormals.
if (mantissa == 0) {
exp = 0;
} else if (flush_denorm_to_zero) {
sign = 0;
exp = 0;
mantissa = 0;
} else {
// Denorms conversion to normal numbers.
exp = 127 - 15;
while (!(mantissa & 0x400000U)) {
mantissa <<= 1;
--exp;
}
mantissa = (mantissa << 1) & 0x7FFFFFU;
exp <<= 23;
}
} else {
// Handling +/-infinity, NaN and normal numbers.
exp = (exp_val_f16 == 0x1FU ? 0xFFU : exp_val_f16 + 127 - 15) << 23;
}
float ret;
reinterpret_cast<uint32_t&>(ret) = sign | exp | mantissa;
return ret;
#endif
}
float convert_element(int32_t i) { return static_cast<float>(i); }
float convert_element(float f) { return f; }
float convert_element(half_t h) { return convert_half_to_float(h); }
float convert_element(half_t h) { return half_to_float(h); }
size_t get_x_pitch(const layout& layout) {
try {

View File

@ -89,7 +89,7 @@ public:
cldnn::mem_lock<int16_t> ref(output_not_fused_prim, get_test_stream());
cldnn::mem_lock<int16_t> output_ptr(output_fused_prim, get_test_stream());
for (size_t i = 0; i < output_fused_prim->get_layout().count(); i++) {
ASSERT_NEAR(float16_to_float32(ref[i]), float16_to_float32(output_ptr[i]), tolerance) << "i = " << i;
ASSERT_NEAR(half_to_float(ref[i]), half_to_float(output_ptr[i]), tolerance) << "i = " << i;
}
}
}
@ -139,7 +139,7 @@ public:
VF<int32_t> rnd_vec(s.count() / 32, static_cast<int32_t>(fill_value));
set_values(prim, rnd_vec);
} else if (l.data_type == data_types::f16) {
VF<uint16_t> rnd_vec(s.count(), float32_to_float16(fill_value));
VF<uint16_t> rnd_vec(s.count(), float_to_half(fill_value));
set_values(prim, rnd_vec);
} else if (l.data_type == data_types::f32) {
VF<float> rnd_vec(s.count(), fill_value);

View File

@ -296,6 +296,6 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_fp16) {
1.75f, 2.9375f, 1.1875f
};
for (size_t i = 0; i < expected_output_vec.size(); ++i) {
EXPECT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_output_vec[i], half_to_float(output_ptr[i]));
}
}

View File

@ -54,7 +54,7 @@ TEST(batch_to_space_fp16_gpu, i8111_bs1222_cb0000_ce0000) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -104,7 +104,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0000_ce0000) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -151,7 +151,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0010_ce0101) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -198,7 +198,7 @@ TEST(batch_to_space_fp16_gpu, i62121_bs12311_cb02000_ce00110) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -249,7 +249,7 @@ TEST(batch_to_space_fp16_gpu, i1212112_bs112321_cb02000_ce00110) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -300,7 +300,7 @@ TEST(batch_to_space_fp16_gpu, i21611_bs1112_cb0000_ce0000_b_fs_yx_fsv16) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -350,7 +350,7 @@ TEST(batch_to_space_fp16_gpu, i2812_bs1112_cb0000_ce0000_b_fs_yx_fsv16) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -239,7 +239,7 @@ TEST_P(binary_convolution_test, conv) {
cldnn::mem_lock<float> ref(output_ref, get_test_stream());
cldnn::mem_lock<uint16_t> opt(outputMemory, get_test_stream());
ASSERT_EQ(ref[i], float16_to_float32(opt[i])) << i;
ASSERT_EQ(ref[i], half_to_float(opt[i])) << i;
}
}
}
@ -472,6 +472,6 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) {
EXPECT_EQ(output_layout.spatial(0), 2);
for (size_t i = 0; i < output_layout.count(); i++) {
EXPECT_EQ(float16_to_float32(output_ptr[i]), output_vec[i]) << "index="<< i;
EXPECT_EQ(half_to_float(output_ptr[i]), output_vec[i]) << "index="<< i;
}
}

View File

@ -214,7 +214,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_res.size(); ++i) {
EXPECT_NEAR(ref_res[i], float16_to_float32(output_ptr[i]), 1.001f);
EXPECT_NEAR(ref_res[i], half_to_float(output_ptr[i]), 1.001f);
}
}

View File

@ -289,6 +289,6 @@ TEST(cum_sum_gpu_f16, DISABLED_basic_1d) {
ASSERT_EQ(output->count(), answers.size());
for (size_t i = 0; i < answers.size(); ++i) {
EXPECT_TRUE(are_equal(answers[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(answers[i], half_to_float(output_ptr[i]))) << i;
}
}

View File

@ -946,7 +946,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pa
for (unsigned int i = 0; i < expected_output_vec.size(); i++)
{
EXPECT_FLOAT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
EXPECT_FLOAT_EQ(expected_output_vec[i], half_to_float(output_ptr[i]));
}
}
@ -1892,7 +1892,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad
};
for (unsigned int i = 0; i < expected_output_vec.size(); i++) {
EXPECT_FLOAT_EQ(expected_output_vec[i], float16_to_float32(output_ptr[i]));
EXPECT_FLOAT_EQ(expected_output_vec[i], half_to_float(output_ptr[i]));
}
}

View File

@ -53,7 +53,7 @@ TEST(depth_to_space_fp16_gpu, d1411_bs2) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
ASSERT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -95,7 +95,7 @@ TEST(depth_to_space_fp16_gpu, d1421_bs2) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
ASSERT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -158,7 +158,7 @@ TEST(depth_to_space_fp16_gpu, d1933_bs3) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
ASSERT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -72,7 +72,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -125,7 +125,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic_without_weights) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -219,7 +219,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim2) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]), static_cast<float>(1e-2))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]), static_cast<float>(1e-2))) << i;
}
}
@ -360,7 +360,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim3) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]), static_cast<float>(1e-2))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]), static_cast<float>(1e-2))) << i;
}
}
@ -424,7 +424,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -489,7 +489,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_first_empty) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -554,7 +554,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_last_empty) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -612,7 +612,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_without_weights_and_def_index) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -758,7 +758,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_dim3) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]), static_cast<float>(1e-2))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]), static_cast<float>(1e-2))) << i;
}
}
@ -823,7 +823,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -888,7 +888,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -953,7 +953,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -1011,7 +1011,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
}
}
@ -1157,7 +1157,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], float16_to_float32(output_ptr[i]), static_cast<float>(1e-2))) << i;
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]), static_cast<float>(1e-2))) << i;
}
}

View File

@ -38,7 +38,7 @@ inline void DoTest(engine& engine,
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -395,7 +395,7 @@ TEST(gather8_gpu_fp16, d323_axisY_bdim_m1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -485,7 +485,7 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -613,7 +613,7 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -707,7 +707,7 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -770,7 +770,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -832,7 +832,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -903,7 +903,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -961,7 +961,7 @@ TEST(gather_gpu_fp16, d14_axisB) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1023,7 +1023,7 @@ TEST(gather_gpu_fp16, d222_axisB) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1084,7 +1084,7 @@ TEST(gather_gpu_fp16, d22_axisY) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1145,7 +1145,7 @@ TEST(gather_gpu_fp16, d22_axisF) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -66,7 +66,7 @@ inline void DoTestBase(engine& engine,
// Compare output value
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -327,7 +327,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -369,7 +369,7 @@ TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -415,7 +415,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -461,7 +461,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -508,7 +508,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -555,7 +555,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -602,6 +602,6 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -87,7 +87,7 @@ TEST(scatter_elements_update_gpu_fp16, d2411_axisF) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -164,7 +164,7 @@ struct scatter_nd_update_random_test : testing::TestWithParam<scatter_nd_update_
ov::Shape(updates_vec.begin(), updates_vec.end()));
for (size_t i = 0; i < outputs_ref.size(); ++i) {
EXPECT_EQ(outputs_ref[i], float16_to_float32(outputs_ptr[i]));
EXPECT_EQ(outputs_ref[i], half_to_float(outputs_ptr[i]));
}
}
@ -581,7 +581,7 @@ TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -665,7 +665,7 @@ TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -729,7 +729,7 @@ TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -800,7 +800,7 @@ TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -930,7 +930,7 @@ TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1026,7 +1026,7 @@ TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1104,7 +1104,7 @@ TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1202,7 +1202,7 @@ TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1270,7 +1270,7 @@ TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1336,7 +1336,7 @@ TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1401,7 +1401,7 @@ TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1456,7 +1456,7 @@ TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1531,7 +1531,7 @@ TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1586,7 +1586,7 @@ TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1635,7 +1635,7 @@ TEST(scatter_nd_update_gpu_fp16_test1, data1_indice1_update1) {
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1775,7 +1775,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2311) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -1914,7 +1914,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2211) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2064,7 +2064,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2167,7 +2167,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2411) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2270,7 +2270,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2311) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2379,7 +2379,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2211) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2496,7 +2496,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2662,7 +2662,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i25111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -2830,7 +2830,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i24111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -3001,7 +3001,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i23111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -3184,7 +3184,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i22111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -3385,7 +3385,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i21111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -3537,7 +3537,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i261111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -3690,7 +3690,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i251111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -3846,7 +3846,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i241111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -4009,7 +4009,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i231111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -4183,7 +4183,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i221111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -4380,6 +4380,6 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i211111) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -112,7 +112,7 @@ TEST(scatter_update_gpu_fp16, d2411_axisB) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]))
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]))
<< "i=" << i << ", target_format=" << target_format;
}
}
@ -284,7 +284,7 @@ TEST(scatter_update_gpu_fp16, d4311_axisB) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]))
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]))
<< "i=" << i << ", target_format=" << target_format;
}
}
@ -418,7 +418,7 @@ TEST(scatter_update_gpu_fp16, d2521_axisF) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]))
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]))
<< "i=" << i << ", target_format=" << target_format;
}
}
@ -525,7 +525,7 @@ TEST(scatter_update_gpu_fp16, d2241_axisY) {
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]))
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]))
<< "i=" << i << ", target_format=" << target_format;
}
}
@ -716,7 +716,7 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]))
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]))
<< "i=" << i << ", target_format=" << target_format;
}
}
@ -1582,7 +1582,7 @@ TEST(scatter_update_gpu_fp16, d21214_bfzyx_axisX_bfwzyx) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]))
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]))
<< "i=" << i
<< ", target_format_2d=" << target_format
<< ", target_format_3d=" << target_format_3d;

View File

@ -55,7 +55,7 @@ TEST(space_to_batch_fp16_gpu, i1222_bs1222_pb0000_pe0000) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -102,7 +102,7 @@ TEST(space_to_batch_fp16_gpu, i1242_bs1221_pb0020_pe0000) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -148,7 +148,7 @@ TEST(space_to_batch_fp16_gpu, i2132_bs1222_pb0010_pe0100) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -192,7 +192,7 @@ TEST(space_to_batch_fp16_gpu, i12132_bs12122_pb00010_pe00000) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -246,7 +246,7 @@ TEST(space_to_batch_fp16_gpu, i134121_bs142121_pb010100_pe000000) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -299,7 +299,7 @@ TEST(space_to_batch_fp16_gpu, i11611_bs1222_pb0010_pe0001_b_fs_yx_fsv16) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -348,7 +348,7 @@ TEST(space_to_batch_fp16_gpu, i1812_bs1221_pb0010_pe0200_b_fs_yx_fsv16) {
ASSERT_EQ(output_ptr.size(), expected_results.size());
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -54,7 +54,7 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mbf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -96,7 +96,7 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mbf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -151,7 +151,7 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mbf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -214,7 +214,7 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mbf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -446,7 +446,7 @@ TEST(space_to_depth_fp16_gpu, d1122_bs2_mdf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -488,7 +488,7 @@ TEST(space_to_depth_fp16_gpu, d1142_bs2_mdf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -543,7 +543,7 @@ TEST(space_to_depth_fp16_gpu, d1264_bs2_mdf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
@ -606,7 +606,7 @@ TEST(space_to_depth_fp16_gpu, d1199_bs3_mdf) {
};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
EXPECT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}

View File

@ -3,7 +3,7 @@
//
#pragma once
#include "include/math_utils.h"
#include "intel_gpu/runtime/half.hpp"
struct FLOAT16 {
struct representation {
@ -22,19 +22,19 @@ struct FLOAT16 {
static constexpr FLOAT16 lowest_val() { return FLOAT16((uint16_t)(0xfbff)); }
operator double() const {
double d = (double)float16_to_float32(v);
double d = (double)cldnn::half_to_float(v);
return d;
}
operator float() const {
float f = float16_to_float32(v);
float f = cldnn::half_to_float(v);
return f;
}
operator int16_t() const { return *(int16_t *)(&v); }
operator long long int() const { return v; }
operator uint32_t() const { return v; }
FLOAT16(float f) { v = float32_to_float16(f); }
FLOAT16(size_t s) { v = float32_to_float16(float(s)); }
FLOAT16(int i) { v = float32_to_float16(float(i)); }
FLOAT16(float f) { v = cldnn::float_to_half(f); }
FLOAT16(size_t s) { v = cldnn::float_to_half(float(s)); }
FLOAT16(int i) { v = cldnn::float_to_half(float(i)); }
// TODO Below should have constructor tag to avoid ambigious behaviour, ex FLOAT16(16.f) != FLOAT16((uint16_t)16)
explicit constexpr FLOAT16(int16_t d) : v(d) {}
explicit constexpr FLOAT16(uint16_t d) : v(d) {}

View File

@ -93,52 +93,7 @@ namespace rnd_generators
static output_type convert(const calc_type value)
{
constexpr std::uint32_t fp32_one = 1;
constexpr std::uint32_t fp32_scd_bits = 23;
constexpr std::uint32_t fp32_exp_bits = 8;
constexpr std::uint32_t fp32_sgn_bits = 1;
constexpr std::uint32_t fp32_scd_off = 0;
constexpr std::uint32_t fp32_exp_off = fp32_scd_off + fp32_scd_bits;
constexpr std::uint32_t fp32_sgn_off = fp32_exp_off + fp32_exp_bits;
constexpr std::uint32_t fp32_scd_mask = ((fp32_one << fp32_scd_bits) - 1) << fp32_scd_off;
constexpr std::uint32_t fp32_exp_mask = ((fp32_one << fp32_exp_bits) - 1) << fp32_exp_off;
constexpr std::uint32_t fp32_sgn_mask = ((fp32_one << fp32_sgn_bits) - 1) << fp32_sgn_off;
constexpr std::uint32_t fp32_exp_bias = (fp32_one << (fp32_exp_bits - 1)) - 1;
constexpr std::uint16_t fp16_one = 1;
constexpr std::uint32_t fp16_scd_bits = 10;
constexpr std::uint32_t fp16_exp_bits = 5;
constexpr std::uint32_t fp16_sgn_bits = 1;
constexpr std::uint32_t fp16_scd_off = 0;
constexpr std::uint32_t fp16_exp_off = fp16_scd_off + fp16_scd_bits;
constexpr std::uint32_t fp16_sgn_off = fp16_exp_off + fp16_exp_bits;
constexpr std::uint16_t fp16_scd_mask = ((fp16_one << fp16_scd_bits) - 1) << fp16_scd_off;
constexpr std::uint16_t fp16_exp_mask = ((fp16_one << fp16_exp_bits) - 1) << fp16_exp_off;
constexpr std::uint16_t fp16_sgn_mask = ((fp16_one << fp16_sgn_bits) - 1) << fp16_sgn_off;
constexpr std::uint32_t fp16_exp_bias = (fp16_one << (fp16_exp_bits - 1)) - 1;
std::uint32_t repr = reinterpret_cast<const std::uint32_t&>(value);
std::uint16_t significand = static_cast<std::uint16_t>((repr & fp32_scd_mask) >> (fp32_scd_bits - fp16_scd_bits)) & fp16_scd_mask;
std::uint32_t fp32_exponent = (repr & fp32_exp_mask) >> fp32_exp_off;
if (fp32_exponent == 0)
fp32_exponent = fp32_exp_bias - fp16_exp_bias; // handle +/-0 correctly.
if (fp32_exponent > fp32_exp_bias + fp16_exp_bias)
throw std::logic_error("Conversion to half_t failed. Please use smaller scale (<= 65504).");
std::uint16_t exponent = static_cast<std::uint16_t>((fp32_exponent + fp16_exp_bias - fp32_exp_bias) << fp16_exp_off) & fp16_exp_mask;
std::uint16_t sign = static_cast<std::uint16_t>((repr & fp32_sgn_mask) >> (fp32_sgn_off - fp16_sgn_off)) & fp16_sgn_mask;
std::uint16_t conv_repr = significand | exponent | sign;
return reinterpret_cast<const output_type&>(conv_repr);
return FLOAT16(cldnn::float_to_half(value));
}
};