Building GPU plugin for Linux ARM64 (#16008)

* Building GPU plugin for ARM64

* changed order of headers

* Fixed clang-format
This commit is contained in:
Ilya Lavrenov 2023-03-02 12:43:33 +04:00 committed by GitHub
parent 24b0baa0d1
commit 0d798b7431
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 54 additions and 11 deletions

View File

@ -14,7 +14,13 @@ ie_option (ENABLE_COMPILE_TOOL "Enables compile_tool" ON)
ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON)
ie_dependent_option (ENABLE_INTEL_GPU "GPU OpenCL-based plugin for OpenVINO Runtime" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
if(X86_64)
set(ENABLE_INTEL_GPU_DEFAULT ON)
else()
set(ENABLE_INTEL_GPU_DEFAULT OFF)
endif()
ie_dependent_option (ENABLE_INTEL_GPU "GPU OpenCL-based plugin for OpenVINO Runtime" ${ENABLE_INTEL_GPU_DEFAULT} "X86_64 OR AARCH64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
if (ANDROID OR (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
# oneDNN doesn't support old compilers and android builds for now, so we'll

View File

@ -2,8 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <CL/cl2.hpp>
#include "openvino/runtime/intel_gpu/ocl/ocl_wrapper.hpp"
#include "ov_test.hpp"
class ov_remote_context_ocl : public ::testing::TestWithParam<std::string> {

View File

@ -10,7 +10,6 @@ set (TARGET_NAME "openvino_intel_gpu_plugin")
if(CMAKE_COMPILER_IS_GNUCXX)
ie_add_compiler_flags(-Wno-strict-aliasing)
ie_add_compiler_flags(-msse4.1 -msse4.2)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")

View File

@ -61,5 +61,9 @@ endif()
ov_install_static_lib(${TARGET_NAME} gpu)
ie_sse42_optimization_flags(sse4_2_flags)
set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
if(ENABLE_SSE42)
ie_sse42_optimization_flags(sse4_2_flags)
set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES
COMPILE_FLAGS "${sse4_2_flags}"
COMPILE_DEFINITIONS "HAVE_SSE")
endif()

View File

@ -1,13 +1,21 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <immintrin.h>
#include <stdint.h>
#ifdef HAVE_SSE
#include <immintrin.h>
#else
#include "openvino/core/type/float16.hpp"
#endif // HAVE_SSE
#include "intel_gpu/runtime/half.hpp"
namespace cldnn {
#ifdef HAVE_SSE
float half_to_float(uint16_t value) {
static const uint32_t FLOAT16_EXP_SHIFT = (23 - 10);
static const uint32_t FLOAT16_EXP_MASK = 0x7C00;
@ -70,6 +78,7 @@ float half_to_float(uint16_t value) {
float outf32 = *reinterpret_cast<float*>(&out32);
return outf32;
}
uint16_t float_to_half(float value) {
#define TO_M128i(a) (*reinterpret_cast<__m128i*>(&(a)))
#define TO_M128(a) (*const_cast<__m128*>(reinterpret_cast<const __m128*>(&(a))))
@ -140,4 +149,17 @@ uint16_t float_to_half(float value) {
iPackedResult = _mm_or_si128(iPackedResult, iSignInWords);
return (uint16_t)_mm_extract_epi16(iPackedResult, 0);
}
#else
float half_to_float(uint16_t value) {
return ov::float16(value);
}
uint16_t float_to_half(float value) {
return ov::float16(value);
}
#endif // HAVE_SSE
} // namespace cldnn

View File

@ -11,11 +11,14 @@
#include <stdexcept>
#include <string>
#include <type_traits>
#include <immintrin.h>
#include <xmmintrin.h>
#include <vector>
#include <utility>
#ifdef HAVE_SSE
#include <immintrin.h>
#include <xmmintrin.h>
#endif // HAVE_SSE
namespace cldnn {
namespace cpu {
@ -554,9 +557,12 @@ public:
if (stride == 1 && std::is_same<dtype, float>::value) {
float const* confidence_ptr_float = (float const*)(&(*confidence_data));
confidence_ptr_float += idx;
#ifdef HAVE_SSE
__m128 threshold = _mm_load_ps1(&confidence_threshold);
#endif // HAVE_SSE
for (int prior = 0; prior < num_of_priors; ++prior) {
int cls = 0;
#ifdef HAVE_SSE
for (; cls + 3 < num_classes; cls += 4) {
__m128 scores = _mm_loadu_ps(confidence_ptr_float);
confidence_ptr_float += 4;
@ -584,6 +590,7 @@ public:
label_to_scores[cls + 3].emplace_back(s, prior);
}
}
#endif // HAVE_SSE
for (; cls < num_classes; ++cls) {
float score = *confidence_ptr_float;
if (score > confidence_threshold) {
@ -646,12 +653,15 @@ public:
if (stride == 1 && std::is_same<dtype, float>::value) {
float const* confidence_ptr_float = (float const*)(&(*confidence_data));
confidence_ptr_float += idx;
#ifdef HAVE_SSE
__m128 threshold = _mm_load_ps1(&confidence_threshold);
#endif // HAVE_SSE
for (int prior = 0; prior < num_of_priors; ++prior) {
int idx_start = (background_label_id == 0 ? 1 : 0);
int cls = idx_start;
float max_score = 0;
int max_cls = 0;
#ifdef HAVE_SSE
for (; cls + 3 < num_classes; cls += 4) {
if ((background_label_id == 0) && (cls == idx_start)) {
confidence_ptr_float += 1;
@ -695,6 +705,7 @@ public:
}
}
}
#endif // HAVE_SSE
for (; cls < num_classes; ++cls) {
float score = *confidence_ptr_float;
if (score > confidence_threshold) {

View File

@ -77,8 +77,10 @@ elseif((NOT ANDROID) AND (UNIX))
target_link_libraries(${TARGET_NAME} PRIVATE pthread)
endif()
ie_sse42_optimization_flags(sse4_2_flags)
set_source_files_properties(${SOURCES_ALL} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
if(ENABLE_SSE42)
ie_sse42_optimization_flags(sse4_2_flags)
set_source_files_properties(${SOURCES_ALL} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
endif()
install(TARGETS ${TARGET_NAME}
RUNTIME DESTINATION tests