Building GPU plugin for Linux ARM64 (#16008)
* Building GPU plugin for ARM64 * changed order of headers * Fixed clang-format
This commit is contained in:
parent
24b0baa0d1
commit
0d798b7431
@ -14,7 +14,13 @@ ie_option (ENABLE_COMPILE_TOOL "Enables compile_tool" ON)
|
||||
|
||||
ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON)
|
||||
|
||||
ie_dependent_option (ENABLE_INTEL_GPU "GPU OpenCL-based plugin for OpenVINO Runtime" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
|
||||
if(X86_64)
|
||||
set(ENABLE_INTEL_GPU_DEFAULT ON)
|
||||
else()
|
||||
set(ENABLE_INTEL_GPU_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
ie_dependent_option (ENABLE_INTEL_GPU "GPU OpenCL-based plugin for OpenVINO Runtime" ${ENABLE_INTEL_GPU_DEFAULT} "X86_64 OR AARCH64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
|
||||
|
||||
if (ANDROID OR (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
|
||||
# oneDNN doesn't support old compilers and android builds for now, so we'll
|
||||
|
@ -2,8 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <CL/cl2.hpp>
|
||||
|
||||
#include "openvino/runtime/intel_gpu/ocl/ocl_wrapper.hpp"
|
||||
#include "ov_test.hpp"
|
||||
|
||||
class ov_remote_context_ocl : public ::testing::TestWithParam<std::string> {
|
||||
|
@ -10,7 +10,6 @@ set (TARGET_NAME "openvino_intel_gpu_plugin")
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
ie_add_compiler_flags(-Wno-strict-aliasing)
|
||||
ie_add_compiler_flags(-msse4.1 -msse4.2)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
|
@ -61,5 +61,9 @@ endif()
|
||||
|
||||
ov_install_static_lib(${TARGET_NAME} gpu)
|
||||
|
||||
ie_sse42_optimization_flags(sse4_2_flags)
|
||||
set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
|
||||
if(ENABLE_SSE42)
|
||||
ie_sse42_optimization_flags(sse4_2_flags)
|
||||
set_source_files_properties(impls/cpu/detection_output.cpp half.cpp PROPERTIES
|
||||
COMPILE_FLAGS "${sse4_2_flags}"
|
||||
COMPILE_DEFINITIONS "HAVE_SSE")
|
||||
endif()
|
||||
|
@ -1,13 +1,21 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <immintrin.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#include "openvino/core/type/float16.hpp"
|
||||
#endif // HAVE_SSE
|
||||
|
||||
#include "intel_gpu/runtime/half.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
|
||||
float half_to_float(uint16_t value) {
|
||||
static const uint32_t FLOAT16_EXP_SHIFT = (23 - 10);
|
||||
static const uint32_t FLOAT16_EXP_MASK = 0x7C00;
|
||||
@ -70,6 +78,7 @@ float half_to_float(uint16_t value) {
|
||||
float outf32 = *reinterpret_cast<float*>(&out32);
|
||||
return outf32;
|
||||
}
|
||||
|
||||
uint16_t float_to_half(float value) {
|
||||
#define TO_M128i(a) (*reinterpret_cast<__m128i*>(&(a)))
|
||||
#define TO_M128(a) (*const_cast<__m128*>(reinterpret_cast<const __m128*>(&(a))))
|
||||
@ -140,4 +149,17 @@ uint16_t float_to_half(float value) {
|
||||
iPackedResult = _mm_or_si128(iPackedResult, iSignInWords);
|
||||
return (uint16_t)_mm_extract_epi16(iPackedResult, 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
float half_to_float(uint16_t value) {
|
||||
return ov::float16(value);
|
||||
}
|
||||
|
||||
uint16_t float_to_half(float value) {
|
||||
return ov::float16(value);
|
||||
}
|
||||
|
||||
#endif // HAVE_SSE
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -11,11 +11,14 @@
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#endif // HAVE_SSE
|
||||
|
||||
namespace cldnn {
|
||||
namespace cpu {
|
||||
|
||||
@ -554,9 +557,12 @@ public:
|
||||
if (stride == 1 && std::is_same<dtype, float>::value) {
|
||||
float const* confidence_ptr_float = (float const*)(&(*confidence_data));
|
||||
confidence_ptr_float += idx;
|
||||
#ifdef HAVE_SSE
|
||||
__m128 threshold = _mm_load_ps1(&confidence_threshold);
|
||||
#endif // HAVE_SSE
|
||||
for (int prior = 0; prior < num_of_priors; ++prior) {
|
||||
int cls = 0;
|
||||
#ifdef HAVE_SSE
|
||||
for (; cls + 3 < num_classes; cls += 4) {
|
||||
__m128 scores = _mm_loadu_ps(confidence_ptr_float);
|
||||
confidence_ptr_float += 4;
|
||||
@ -584,6 +590,7 @@ public:
|
||||
label_to_scores[cls + 3].emplace_back(s, prior);
|
||||
}
|
||||
}
|
||||
#endif // HAVE_SSE
|
||||
for (; cls < num_classes; ++cls) {
|
||||
float score = *confidence_ptr_float;
|
||||
if (score > confidence_threshold) {
|
||||
@ -646,12 +653,15 @@ public:
|
||||
if (stride == 1 && std::is_same<dtype, float>::value) {
|
||||
float const* confidence_ptr_float = (float const*)(&(*confidence_data));
|
||||
confidence_ptr_float += idx;
|
||||
#ifdef HAVE_SSE
|
||||
__m128 threshold = _mm_load_ps1(&confidence_threshold);
|
||||
#endif // HAVE_SSE
|
||||
for (int prior = 0; prior < num_of_priors; ++prior) {
|
||||
int idx_start = (background_label_id == 0 ? 1 : 0);
|
||||
int cls = idx_start;
|
||||
float max_score = 0;
|
||||
int max_cls = 0;
|
||||
#ifdef HAVE_SSE
|
||||
for (; cls + 3 < num_classes; cls += 4) {
|
||||
if ((background_label_id == 0) && (cls == idx_start)) {
|
||||
confidence_ptr_float += 1;
|
||||
@ -695,6 +705,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // HAVE_SSE
|
||||
for (; cls < num_classes; ++cls) {
|
||||
float score = *confidence_ptr_float;
|
||||
if (score > confidence_threshold) {
|
||||
|
@ -77,8 +77,10 @@ elseif((NOT ANDROID) AND (UNIX))
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE pthread)
|
||||
endif()
|
||||
|
||||
ie_sse42_optimization_flags(sse4_2_flags)
|
||||
set_source_files_properties(${SOURCES_ALL} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
|
||||
if(ENABLE_SSE42)
|
||||
ie_sse42_optimization_flags(sse4_2_flags)
|
||||
set_source_files_properties(${SOURCES_ALL} PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
|
||||
endif()
|
||||
|
||||
install(TARGETS ${TARGET_NAME}
|
||||
RUNTIME DESTINATION tests
|
||||
|
Loading…
Reference in New Issue
Block a user