[IE CLDNN] Fixed fsv16 lrn kernel with fp16 input (#2086)
This commit is contained in:
parent
0cd0c1a551
commit
51564f415c
@ -41,14 +41,15 @@ KERNEL (lrn_gpu_across_channel_multiple_features_fsv16)(
|
||||
for (uint i = 0; i < LOCAL_SIZE; ++i, ++input_offset_f) {
|
||||
bool non_zero = input_offset_f >= 0 && input_offset_f < INPUT0_FEATURE_NUM;
|
||||
uint input_idx = INPUT0_GET_INDEX(batch_id, input_offset_f, y, x);
|
||||
val[i] = (int)non_zero * TO_INPUT0_TYPE(input[input_idx]);
|
||||
val[i] = (int)non_zero * TO_INPUT0_TYPE(ALPHA_VAL_FACTOR_DIV_BY_SIZE) * TO_INPUT0_TYPE(input[input_idx]);
|
||||
res = mad(val[i], val[i], res);
|
||||
}
|
||||
res = mad(res, TO_INPUT0_TYPE(ALPHA_DIV_BY_SIZE), TO_INPUT0_TYPE(K));
|
||||
res = native_powr(res, -TO_INPUT0_TYPE(BETA));
|
||||
|
||||
uint output_idx = OUTPUT_GET_INDEX(batch_id, feature_id, y, x);
|
||||
INPUT0_TYPE lrn_result = res * val[PADDING];
|
||||
uint input_idx = INPUT0_GET_INDEX(batch_id, feature_id, y, x);
|
||||
INPUT0_TYPE lrn_result = res * input[input_idx];
|
||||
#if HAS_FUSED_OPS
|
||||
FUSED_OPS;
|
||||
output[output_idx] = TO_OUTPUT_TYPE(FUSED_OPS_RESULT);
|
||||
|
@ -125,6 +125,55 @@ TEST(lrn_fp32_gpu, basic2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lrn_fp16_gpu, basic1) {
|
||||
// input : 1x16x1x1
|
||||
// Output : 1x16x1x1
|
||||
const auto& engine = get_test_engine();
|
||||
|
||||
const size_t b = 1;
|
||||
const size_t f = 16;
|
||||
const size_t y = 1;
|
||||
const size_t x = 1;
|
||||
|
||||
auto input = memory::allocate(engine, { data_types::f16, format::b_fs_yx_fsv16, { b, f, x, y } });
|
||||
std::vector<half_t> inputVals(b * f * y * x);
|
||||
std::generate(inputVals.begin(), inputVals.end(), []() {
|
||||
static float n = 0;
|
||||
return half_t(n++);
|
||||
});
|
||||
|
||||
set_values(input, inputVals);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input.get_layout()));
|
||||
uint32_t size = 5;
|
||||
float k = 0.5f;
|
||||
float alpha = 9.9e-05f;
|
||||
float beta = 1.f;
|
||||
topology.add(lrn("lrn", "input", size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("lrn").get_memory();
|
||||
auto output_ptr = output.pointer<uint16_t>();
|
||||
|
||||
std::vector<float> expected_results = {
|
||||
0.f, 1.99889f, 3.99525f, 5.98696f,
|
||||
7.97159f, 9.94682f, 11.9104f, 13.86f,
|
||||
15.7936f, 17.709f, 19.6041f, 21.4769f,
|
||||
23.3257f, 25.1485f, 27.2091f, 29.3151f
|
||||
};
|
||||
|
||||
ASSERT_EQ(output_ptr.size(), expected_results.size());
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
EXPECT_TRUE(are_equal(expected_results[i], half_to_float(output_ptr[i]))) << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lrn_fp32_gpu, basic3) {
|
||||
// input : 2x16x4x4
|
||||
// Output : 2x16x4x4
|
||||
|
Loading…
Reference in New Issue
Block a user