From 0d798b7431704bf4c23403fc4e64f0e619addcae Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 2 Mar 2023 12:43:33 +0400 Subject: [PATCH] Building GPU plugin for Linux ARM64 (#16008) * Building GPU plugin for ARM64 * changed order of headers * Fixed clang-format --- cmake/features.cmake | 8 ++++++- .../c/tests/ov_remote_context_test.cpp | 3 +-- src/plugins/intel_gpu/CMakeLists.txt | 1 - .../intel_gpu/src/graph/CMakeLists.txt | 8 +++++-- src/plugins/intel_gpu/src/graph/half.cpp | 24 ++++++++++++++++++- .../src/graph/impls/cpu/detection_output.cpp | 15 ++++++++++-- src/plugins/intel_gpu/tests/CMakeLists.txt | 6 +++-- 7 files changed, 54 insertions(+), 11 deletions(-) diff --git a/cmake/features.cmake b/cmake/features.cmake index 4c98a2021e9..17c5ccc1b3c 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -14,7 +14,13 @@ ie_option (ENABLE_COMPILE_TOOL "Enables compile_tool" ON) ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON) -ie_dependent_option (ENABLE_INTEL_GPU "GPU OpenCL-based plugin for OpenVINO Runtime" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF) +if(X86_64) + set(ENABLE_INTEL_GPU_DEFAULT ON) +else() + set(ENABLE_INTEL_GPU_DEFAULT OFF) +endif() + +ie_dependent_option (ENABLE_INTEL_GPU "GPU OpenCL-based plugin for OpenVINO Runtime" ${ENABLE_INTEL_GPU_DEFAULT} "X86_64 OR AARCH64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF) if (ANDROID OR (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)) # oneDNN doesn't support old compilers and android builds for now, so we'll diff --git a/src/bindings/c/tests/ov_remote_context_test.cpp b/src/bindings/c/tests/ov_remote_context_test.cpp index 8a9178da3af..12e61ac0f1b 100644 --- a/src/bindings/c/tests/ov_remote_context_test.cpp +++ b/src/bindings/c/tests/ov_remote_context_test.cpp @@ -2,8 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - +#include "openvino/runtime/intel_gpu/ocl/ocl_wrapper.hpp" #include "ov_test.hpp" class ov_remote_context_ocl : public ::testing::TestWithParam { diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 306b56987d7..a76e015346b 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -10,7 +10,6 @@ set (TARGET_NAME "openvino_intel_gpu_plugin") if(CMAKE_COMPILER_IS_GNUCXX) ie_add_compiler_flags(-Wno-strict-aliasing) - ie_add_compiler_flags(-msse4.1 -msse4.2) endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") diff --git a/src/plugins/intel_gpu/src/graph/CMakeLists.txt b/src/plugins/intel_gpu/src/graph/CMakeLists.txt index 1232ff8d973..efefc017cc6 100644 --- a/src/plugins/intel_gpu/src/graph/CMakeLists.txt +++ b/src/plugins/intel_gpu/src/graph/CMakeLists.txt @@ -61,5 +61,9 @@ endif() ov_install_static_lib(${TARGET_NAME} gpu) -ie_sse42_optimization_flags(sse4_2_flags) -set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES COMPILE_FLAGS "${sse4_2_flags}") +if(ENABLE_SSE42) + ie_sse42_optimization_flags(sse4_2_flags) + set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES + COMPILE_FLAGS "${sse4_2_flags}" + COMPILE_DEFINITIONS "HAVE_SSE") +endif() diff --git a/src/plugins/intel_gpu/src/graph/half.cpp b/src/plugins/intel_gpu/src/graph/half.cpp index cf53005fabc..2d0ae76e803 100644 --- a/src/plugins/intel_gpu/src/graph/half.cpp +++ b/src/plugins/intel_gpu/src/graph/half.cpp @@ -1,13 +1,21 @@ // Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include + #include +#ifdef HAVE_SSE +#include +#else +#include "openvino/core/type/float16.hpp" +#endif // HAVE_SSE + #include "intel_gpu/runtime/half.hpp" namespace cldnn { +#ifdef HAVE_SSE + float half_to_float(uint16_t value) { static const uint32_t FLOAT16_EXP_SHIFT = (23 - 10); static const uint32_t FLOAT16_EXP_MASK = 0x7C00; @@ -70,6 +78,7 @@ float half_to_float(uint16_t value) { float outf32 = *reinterpret_cast(&out32); return outf32; } + uint16_t float_to_half(float value) { #define TO_M128i(a) (*reinterpret_cast<__m128i*>(&(a))) #define TO_M128(a) (*const_cast<__m128*>(reinterpret_cast(&(a)))) @@ -140,4 +149,17 @@ uint16_t float_to_half(float value) { iPackedResult = _mm_or_si128(iPackedResult, iSignInWords); return (uint16_t)_mm_extract_epi16(iPackedResult, 0); } + +#else + +float half_to_float(uint16_t value) { + return ov::float16(value); +} + +uint16_t float_to_half(float value) { + return ov::float16(value); +} + +#endif // HAVE_SSE + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index 14a3e15b1a1..1fb39cc1bf4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -11,11 +11,14 @@ #include #include #include -#include -#include #include #include +#ifdef HAVE_SSE +#include +#include +#endif // HAVE_SSE + namespace cldnn { namespace cpu { @@ -554,9 +557,12 @@ public: if (stride == 1 && std::is_same::value) { float const* confidence_ptr_float = (float const*)(&(*confidence_data)); confidence_ptr_float += idx; +#ifdef HAVE_SSE __m128 threshold = _mm_load_ps1(&confidence_threshold); +#endif // HAVE_SSE for (int prior = 0; prior < num_of_priors; ++prior) { int cls = 0; +#ifdef HAVE_SSE for (; cls + 3 < num_classes; cls += 4) { __m128 scores = _mm_loadu_ps(confidence_ptr_float); confidence_ptr_float += 4; @@ -584,6 +590,7 @@ public: label_to_scores[cls + 3].emplace_back(s, prior); } } +#endif // HAVE_SSE for (; cls < num_classes; ++cls) { float score = *confidence_ptr_float; if (score > confidence_threshold) { @@ -646,12 +653,15 @@ public: if (stride == 1 && std::is_same::value) { float const* confidence_ptr_float = (float const*)(&(*confidence_data)); confidence_ptr_float += idx; +#ifdef HAVE_SSE __m128 threshold = _mm_load_ps1(&confidence_threshold); +#endif // HAVE_SSE for (int prior = 0; prior < num_of_priors; ++prior) { int idx_start = (background_label_id == 0 ? 1 : 0); int cls = idx_start; float max_score = 0; int max_cls = 0; +#ifdef HAVE_SSE for (; cls + 3 < num_classes; cls += 4) { if ((background_label_id == 0) && (cls == idx_start)) { confidence_ptr_float += 1; @@ -695,6 +705,7 @@ public: } } } +#endif // HAVE_SSE for (; cls < num_classes; ++cls) { float score = *confidence_ptr_float; if (score > confidence_threshold) { diff --git a/src/plugins/intel_gpu/tests/CMakeLists.txt b/src/plugins/intel_gpu/tests/CMakeLists.txt index 8e04a4f71e0..99df88f6868 100644 --- a/src/plugins/intel_gpu/tests/CMakeLists.txt +++ b/src/plugins/intel_gpu/tests/CMakeLists.txt @@ -77,8 +77,10 @@ elseif((NOT ANDROID) AND (UNIX)) target_link_libraries(${TARGET_NAME} PRIVATE pthread) endif() -ie_sse42_optimization_flags(sse4_2_flags) -set_source_files_properties(${SOURCES_ALL} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}") +if(ENABLE_SSE42) + ie_sse42_optimization_flags(sse4_2_flags) + set_source_files_properties(${SOURCES_ALL} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}") +endif() install(TARGETS ${TARGET_NAME} RUNTIME DESTINATION tests