From 2e10a1cb132b9c03302a5cbb9fd9340d7f56aefc Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Wed, 11 Jan 2023 15:46:15 +0900 Subject: [PATCH] Added trucation mode to reorder for convert (#14090) * Apply convert trucation * Added trucation mode to reorder for convert * Added unittest * convert_long not casting --- .../include/intel_gpu/primitives/reorder.hpp | 10 +++- .../intel_gpu/src/graph/impls/ocl/reorder.cpp | 2 + .../cl_kernels/reorder_data.cl | 4 ++ .../kernels/reorder/reorder_kernel.cpp | 3 ++ .../kernels/reorder/reorder_kernel_base.h | 1 + .../intel_gpu/src/plugin/ops/convert.cpp | 4 +- .../tests/test_cases/reorder_gpu_test.cpp | 47 +++++++++++++++++++ 7 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp index 6b2045b29e5..63e11151393 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp @@ -67,18 +67,21 @@ struct reorder : public primitive_base { /// @param input Input primitive id. /// @param output_layout Requested memory layout. /// @param values_to_subtract Array of mean subtract values. + /// @param truncate Convert truncation mode. reorder(const primitive_id& id, const input_info& input, format output_format, data_types output_data_type, const std::vector& values_to_subtract = {}, const reorder_mean_mode mode = reorder_mean_mode::subtract, - const padding& output_padding = padding()) + const padding& output_padding = padding(), + const bool truncate = false) : primitive_base(id, {input}, {output_padding}, {optional_data_type{output_data_type}}), output_format(output_format), mean(""), subtract_per_feature(values_to_subtract), - mean_mode(mode) {} + mean_mode(mode), + truncate(truncate) {} /// @brief Constructs reorder primitive which takes mean subtract values from another primitive. /// @param id This primitive id. @@ -149,6 +152,9 @@ struct reorder : public primitive_base { input_mem_type == memory_type::surface; } + /// @brief Convert truncation Mode + bool truncate = false; + protected: std::vector> get_dependencies() const override { if (mean.empty()) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp index 6e7131aed98..c466c6eacc4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/reorder.cpp @@ -101,6 +101,8 @@ public: } params.winograd = impl_param.input_layouts[0].format.is_winograd() || output_layout.format.is_winograd(); + params.truncate = impl_param.typed_desc()->truncate; + return {params, optional_params}; } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl index 03d3122458e..2b34213acac 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/reorder_data.cl @@ -141,7 +141,11 @@ KERNEL (reorder_data)( IMAGE_WRITE(output, (int2)(x, y), colorRGBA); #else #if INPUT0_IS_FP && !OUTPUT_IS_FP +#if CONVERT_TRUNCATE + output[output_idx] = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(convert_long(res)), ACTIVATION_PARAMS_TYPED); +#else output[output_idx] = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE_SAT(res), ACTIVATION_PARAMS_TYPED); +#endif #else output[output_idx] = ACTIVATION_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(res), ACTIVATION_PARAMS_TYPED); #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel.cpp index 99158e8d7cc..47ce223e89b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel.cpp @@ -33,6 +33,9 @@ ParamsKey ReorderKernelRef::GetSupportedKey() const { JitConstants ReorderKernelRef::GetJitConstants(const reorder_params& params) const { auto jit = ReorderKernelBase::GetJitConstants(params); + if (params.truncate) { + jit.AddConstant(MakeJitConstant("CONVERT_TRUNCATE", true)); + } jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0])); if (params.surface_input) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.h index 171ac4be10d..29bde597548 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/reorder/reorder_kernel_base.h @@ -26,6 +26,7 @@ struct reorder_params : public base_params { bool winograd = false; bool has_padded_output = false; bool surface_input = false; + bool truncate = false; ParamsKey GetParamsKey() const override { auto k = base_params::GetParamsKey(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/convert.cpp b/src/plugins/intel_gpu/src/plugin/ops/convert.cpp index 81f06f764ae..19403db07ec 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convert.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convert.cpp @@ -41,7 +41,9 @@ static void CreateConvertOp(Program& p, const std::shared_ptr(), - cldnn::reorder_mean_mode::subtract); + cldnn::reorder_mean_mode::subtract, + cldnn::padding(), + true); p.add_primitive(*op, reorderPrim); } diff --git a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp index 2034221a37f..8fc5e7ee851 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp @@ -876,6 +876,53 @@ TEST(reorder_gpu, basic_convert_int8) { } } +TEST(reorder_gpu, basic_convert_uint8) { + + auto& engine = get_test_engine(); + layout in_layout = { type_to_data_type::value,format::byxf,{ 1, 1, 3, 3 } }; + layout byte_layout = { type_to_data_type::value, format::bfyx,{ 1, 1, 3, 3 } }; + std::initializer_list input_f = { 1.0f, -2.5f, 3.1f, -4.0f, 5.03f, -6.99f, 7.0f, -8.0f, 9.0f }; + std::list final_results = { 1.0f, 254.0f, 3.0f, 252.0f, 5.0f, 250.0f, 7.0f, 248.0f, 9.0f }; + + // Allocate memory for input image. + auto input_memory = engine.allocate_memory(in_layout); + set_values(input_memory, input_f); + + // Create input_layout description + input_layout input("input", in_layout); + + topology topology( + input, + reorder("reorder_input", + input_info(input), + cldnn::format::any, + cldnn::data_types::u8, + std::vector(), + cldnn::reorder_mean_mode::subtract, + cldnn::padding(), + true), + reorder("reorder2", input_info("reorder_input"), in_layout) + ); + + network network( + engine, + topology, + build_options{ + build_option::outputs({ "reorder_input", "reorder2"}) + }); + + network.set_input_data("input", input_memory); + + auto outputs = network.execute(); + + auto interm = outputs.at("reorder2").get_memory(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); + unsigned int cntr = 0; + for (const auto& exp : final_results) { + EXPECT_EQ(exp, interm_ptr[cntr++]); + } +} + TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { // Converts an ARGB(uint8) image to common clDNN input of bfyx FP32 //