ARM Compute Library flags (#17640)
This commit is contained in:
parent
fe1ac700f0
commit
ef9b3e3cd1
@ -46,8 +46,7 @@ jobs:
|
|||||||
system.debug: true
|
system.debug: true
|
||||||
VSTS_HTTP_RETRY: 5
|
VSTS_HTTP_RETRY: 5
|
||||||
VSTS_HTTP_TIMEOUT: 200
|
VSTS_HTTP_TIMEOUT: 200
|
||||||
OPENVINO_ARCH: 'aarch64'
|
NUM_PROC: 2
|
||||||
NUM_PROC: 1
|
|
||||||
BUILD_TYPE: Release
|
BUILD_TYPE: Release
|
||||||
OPENVINO_REPO_DIR: $(Build.Repository.LocalPath)
|
OPENVINO_REPO_DIR: $(Build.Repository.LocalPath)
|
||||||
BUILD_OPENVINO: $(WORK_DIR)/build
|
BUILD_OPENVINO: $(WORK_DIR)/build
|
||||||
@ -116,14 +115,9 @@ jobs:
|
|||||||
python3 -m pip install -r $(OPENVINO_REPO_DIR)/src/bindings/python/requirements.txt
|
python3 -m pip install -r $(OPENVINO_REPO_DIR)/src/bindings/python/requirements.txt
|
||||||
python3 -m pip install -r $(OPENVINO_REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt
|
python3 -m pip install -r $(OPENVINO_REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt
|
||||||
# install dependencies needed to build CPU plugin for ARM
|
# install dependencies needed to build CPU plugin for ARM
|
||||||
sudo -E apt --assume-yes install scons crossbuild-essential-arm64
|
sudo -E apt --assume-yes install scons gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu
|
||||||
# generic dependencies
|
# generic dependencies
|
||||||
sudo -E apt --assume-yes install cmake ccache
|
sudo -E apt --assume-yes install cmake ccache ninja-build unzip
|
||||||
# Speed up build
|
|
||||||
sudo -E apt -y --no-install-recommends install unzip
|
|
||||||
wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
|
|
||||||
unzip ninja-linux.zip
|
|
||||||
sudo cp -v ninja /usr/local/bin/
|
|
||||||
displayName: 'Install dependencies'
|
displayName: 'Install dependencies'
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
@ -133,13 +127,15 @@ jobs:
|
|||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
python3 -m pip install conan
|
python3 -m pip install conan
|
||||||
|
# install build profile compilers
|
||||||
|
sudo -E apt --assume-yes install gcc g++
|
||||||
# generate build profile
|
# generate build profile
|
||||||
conan profile detect
|
conan profile detect
|
||||||
# generate host profile for linux_arm64
|
# generate host profile for linux_arm64
|
||||||
echo "include(default)" > $(BUILD_OPENVINO)/linux_arm64
|
echo "include(default)" > $(BUILD_OPENVINO)/linux_arm64
|
||||||
echo "[buildenv]" >> $(BUILD_OPENVINO)/linux_arm64
|
echo "[buildenv]" >> $(BUILD_OPENVINO)/linux_arm64
|
||||||
echo "CC=aarch64-linux-gnu-gcc" >> $(BUILD_OPENVINO)/linux_arm64
|
echo "CC=aarch64-linux-gnu-gcc-10" >> $(BUILD_OPENVINO)/linux_arm64
|
||||||
echo "CXX=aarch64-linux-gnu-g++" >> $(BUILD_OPENVINO)/linux_arm64
|
echo "CXX=aarch64-linux-gnu-g++-10" >> $(BUILD_OPENVINO)/linux_arm64
|
||||||
# install OpenVINO dependencies
|
# install OpenVINO dependencies
|
||||||
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
export CMAKE_C_COMPILER_LAUNCHER=ccache
|
export CMAKE_C_COMPILER_LAUNCHER=ccache
|
||||||
|
51
src/plugins/intel_cpu/thirdparty/ACLConfig.cmake
vendored
51
src/plugins/intel_cpu/thirdparty/ACLConfig.cmake
vendored
@ -87,6 +87,42 @@ elseif(ENABLE_ARM_COMPUTE_CMAKE)
|
|||||||
# required by oneDNN to attempt to parse ACL version
|
# required by oneDNN to attempt to parse ACL version
|
||||||
set(ENV{ACL_ROOT_DIR} "${ARM_COMPUTE_SOURCE_DIR}")
|
set(ENV{ACL_ROOT_DIR} "${ARM_COMPUTE_SOURCE_DIR}")
|
||||||
elseif(NOT TARGET arm_compute::arm_compute)
|
elseif(NOT TARGET arm_compute::arm_compute)
|
||||||
|
#
|
||||||
|
# Options
|
||||||
|
#
|
||||||
|
|
||||||
|
set(ARM_COMPUTE_SCONS_JOBS "8" CACHE STRING "Number of parallel threads to build ARM Compute Library")
|
||||||
|
|
||||||
|
set(ARM_COMPUTE_TARGET_GENERIC_ARCHS armv8a
|
||||||
|
armv8.2-a
|
||||||
|
armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2
|
||||||
|
armv8r64 # the same as armv8.4-a
|
||||||
|
)
|
||||||
|
if(ARM)
|
||||||
|
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv7a)
|
||||||
|
set(ARM_COMPUTE_TARGET_ARCHS armv7a armv7a-hf
|
||||||
|
# requires estate=32
|
||||||
|
${ARM_COMPUTE_TARGET_GENERIC_ARCHS})
|
||||||
|
else()
|
||||||
|
if(APPLE)
|
||||||
|
# Apple M1 / M2 is assumed
|
||||||
|
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8.2-a)
|
||||||
|
else()
|
||||||
|
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8a)
|
||||||
|
endif()
|
||||||
|
set(ARM_COMPUTE_TARGET_ARCHS arm64-v8a
|
||||||
|
arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
|
||||||
|
# used with estate=64
|
||||||
|
${ARM_COMPUTE_TARGET_GENERIC_ARCHS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(ARM_COMPUTE_TARGET_ARCH "${ARM_COMPUTE_TARGET_ARCH_DEFAULT}" CACHE STRING "Architecture for ARM ComputeLibrary")
|
||||||
|
set_property(CACHE ARM_COMPUTE_TARGET_ARCH PROPERTY STRINGS ${ARM_COMPUTE_TARGET_ARCHS})
|
||||||
|
|
||||||
|
#
|
||||||
|
# Configure & build
|
||||||
|
#
|
||||||
|
|
||||||
set(ARM_COMPUTE_SOURCE_DIR "${intel_cpu_thirdparty_SOURCE_DIR}/ComputeLibrary")
|
set(ARM_COMPUTE_SOURCE_DIR "${intel_cpu_thirdparty_SOURCE_DIR}/ComputeLibrary")
|
||||||
set(ARM_COMPUTE_BINARY_DIR "${intel_cpu_thirdparty_BINARY_DIR}/ComputeLibrary")
|
set(ARM_COMPUTE_BINARY_DIR "${intel_cpu_thirdparty_BINARY_DIR}/ComputeLibrary")
|
||||||
|
|
||||||
@ -128,8 +164,6 @@ elseif(NOT TARGET arm_compute::arm_compute)
|
|||||||
reference_openmp=0
|
reference_openmp=0
|
||||||
validation_tests=0
|
validation_tests=0
|
||||||
benchmark_tests=0
|
benchmark_tests=0
|
||||||
# TODO: check this for Apple Silicon
|
|
||||||
# multi_isa=1
|
|
||||||
# TODO: use CC for ARM compute library to minimize binary size
|
# TODO: use CC for ARM compute library to minimize binary size
|
||||||
# build_config=<file>
|
# build_config=<file>
|
||||||
# TODO: use data_type_support to disable useless kernels
|
# TODO: use data_type_support to disable useless kernels
|
||||||
@ -137,6 +171,16 @@ elseif(NOT TARGET arm_compute::arm_compute)
|
|||||||
arch=${ARM_COMPUTE_TARGET_ARCH}
|
arch=${ARM_COMPUTE_TARGET_ARCH}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if(ARM)
|
||||||
|
list(APPEND ARM_COMPUTE_OPTIONS estate=32)
|
||||||
|
else()
|
||||||
|
list(APPEND ARM_COMPUTE_OPTIONS estate=64)
|
||||||
|
if(NOT APPLE AND CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2)
|
||||||
|
# arm_sve.h header is not available on gcc older 10.2
|
||||||
|
list(APPEND ARM_COMPUTE_OPTIONS multi_isa=1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(NOT MSVC64)
|
if(NOT MSVC64)
|
||||||
list(APPEND ARM_COMPUTE_OPTIONS
|
list(APPEND ARM_COMPUTE_OPTIONS
|
||||||
build_dir=${ARM_COMPUTE_BINARY_DIR}
|
build_dir=${ARM_COMPUTE_BINARY_DIR}
|
||||||
@ -342,8 +386,7 @@ elseif(NOT TARGET arm_compute::arm_compute)
|
|||||||
|
|
||||||
add_library(arm_compute::half INTERFACE IMPORTED GLOBAL)
|
add_library(arm_compute::half INTERFACE IMPORTED GLOBAL)
|
||||||
set_target_properties(arm_compute::half PROPERTIES
|
set_target_properties(arm_compute::half PROPERTIES
|
||||||
INTERFACE_INCLUDE_DIRECTORIES ${ARM_COMPUTE_SOURCE_DIR}/include
|
INTERFACE_INCLUDE_DIRECTORIES ${ARM_COMPUTE_SOURCE_DIR}/include)
|
||||||
OSX_ARCHITECTURES arm64)
|
|
||||||
|
|
||||||
# Helpers for oneDNN intergation
|
# Helpers for oneDNN intergation
|
||||||
|
|
||||||
|
72
src/plugins/intel_cpu/thirdparty/CMakeLists.txt
vendored
72
src/plugins/intel_cpu/thirdparty/CMakeLists.txt
vendored
@ -4,9 +4,9 @@
|
|||||||
|
|
||||||
project(intel_cpu_thirdparty)
|
project(intel_cpu_thirdparty)
|
||||||
|
|
||||||
if((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_GREATER_EQUAL "1910"))
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
|
||||||
# 1910 version of Visual Studio 2017
|
# Visual Studio 2017 (v141 toolset)
|
||||||
# This flagis needed for enabling SIMD vectorization with command '#pragma omp simd'.
|
# This flag is needed for enabling SIMD vectorization with command '#pragma omp simd'.
|
||||||
# Compilation with '/openmp:experimental' key allow us to enable vectorizatikon capability in MSVC.
|
# Compilation with '/openmp:experimental' key allow us to enable vectorizatikon capability in MSVC.
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp:experimental")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp:experimental")
|
||||||
endif()
|
endif()
|
||||||
@ -38,13 +38,8 @@ function(ov_add_onednn)
|
|||||||
set(DNNL_ENABLE_WORKLOAD "INFERENCE" CACHE STRING "" FORCE)
|
set(DNNL_ENABLE_WORKLOAD "INFERENCE" CACHE STRING "" FORCE)
|
||||||
|
|
||||||
# Allow to enable oneDNN verbose with CPU_DEBUG_CAPS and rely on oneDNN default configuration otherwise
|
# Allow to enable oneDNN verbose with CPU_DEBUG_CAPS and rely on oneDNN default configuration otherwise
|
||||||
if (ENABLE_CPU_DEBUG_CAPS)
|
if(ENABLE_CPU_DEBUG_CAPS)
|
||||||
set(DNNL_VERBOSE "ON" CACHE STRING "" FORCE)
|
set(DNNL_VERBOSE ON CACHE STRING "" FORCE)
|
||||||
endif()
|
|
||||||
|
|
||||||
set(SDL_cmake_included ON) ## to skip internal SDL flags. SDL flags are already set on IE level
|
|
||||||
if (ANDROID OR ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR OV_COMPILER_IS_CLANG) AND NOT (THREADING STREQUAL "OMP")))
|
|
||||||
set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM..
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(X86_64)
|
if(X86_64)
|
||||||
@ -53,43 +48,23 @@ function(ov_add_onednn)
|
|||||||
set(DNNL_TARGET_ARCH "X86" CACHE STRING "" FORCE)
|
set(DNNL_TARGET_ARCH "X86" CACHE STRING "" FORCE)
|
||||||
elseif(RISCV64)
|
elseif(RISCV64)
|
||||||
set(DNNL_TARGET_ARCH "RV64" CACHE STRING "" FORCE)
|
set(DNNL_TARGET_ARCH "RV64" CACHE STRING "" FORCE)
|
||||||
elseif(AARCH64 OR ARM)
|
elseif(ARM)
|
||||||
# TODO: fix warning
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
|
|
||||||
ie_add_compiler_flags(-Wno-macro-redefined)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(ARM_COMPUTE_SCONS_JOBS "8" CACHE STRING "Number of parallel threads to build ARM Compute Library")
|
|
||||||
set(DNNL_USE_ACL ON CACHE BOOL "" FORCE)
|
|
||||||
|
|
||||||
if(ARM)
|
|
||||||
set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE)
|
set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE)
|
||||||
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv7a)
|
elseif(AARCH64)
|
||||||
set(ARM_COMPUTE_TARGET_ARCHS armv7a armv7a-hf)
|
|
||||||
else()
|
|
||||||
set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE)
|
set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE)
|
||||||
# move to separate ACL cmake
|
|
||||||
if(APPLE)
|
|
||||||
# Apple M1 / M2 is assumed
|
|
||||||
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv8.2-a)
|
|
||||||
else()
|
|
||||||
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8a)
|
|
||||||
endif()
|
|
||||||
set(ARM_COMPUTE_TARGET_ARCHS arm64-v8a
|
|
||||||
arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
|
|
||||||
armv8a
|
|
||||||
armv8.2-a armv8.2-a-sve
|
|
||||||
armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2
|
|
||||||
armv8r64 # the same as armv8.4-a
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(ARM_COMPUTE_TARGET_ARCH "${ARM_COMPUTE_TARGET_ARCH_DEFAULT}" CACHE STRING "Architecture for ARM ComputeLibrary")
|
|
||||||
set_property(CACHE ARM_COMPUTE_TARGET_ARCH PROPERTY STRINGS ${ARM_COMPUTE_TARGET_ARCHS})
|
|
||||||
else()
|
else()
|
||||||
message(FATAL_ERROR "Unsupported system processor ${CMAKE_SYSTEM_PROCESSOR}")
|
message(FATAL_ERROR "Unsupported system processor ${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(AARCH64 OR ARM)
|
||||||
|
set(DNNL_USE_ACL ON CACHE BOOL "Use ARM Conpute Library kernels in oneDNN" FORCE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(SDL_cmake_included ON) ## to skip internal SDL flags. SDL flags are already set on IE level
|
||||||
|
if (ANDROID OR ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR OV_COMPILER_IS_CLANG) AND NOT (THREADING STREQUAL "OMP")))
|
||||||
|
set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM..
|
||||||
|
endif()
|
||||||
|
|
||||||
# WA for old TBBConfig.cmake like tbb2019_20180718oss
|
# WA for old TBBConfig.cmake like tbb2019_20180718oss
|
||||||
# they don't check that imported target is already created
|
# they don't check that imported target is already created
|
||||||
if(TBB_FOUND)
|
if(TBB_FOUND)
|
||||||
@ -101,15 +76,12 @@ function(ov_add_onednn)
|
|||||||
link_libraries(TBB::tbb)
|
link_libraries(TBB::tbb)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
|
|
||||||
set(DNNL_TARGET_ARCH "X64")
|
|
||||||
elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
|
|
||||||
set(DNNL_TARGET_ARCH "AARCH64")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
|
if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
|
||||||
ie_add_compiler_flags(-Wno-undef)
|
ie_add_compiler_flags(-Wno-undef)
|
||||||
ie_add_compiler_flags(-Wno-missing-declarations)
|
ie_add_compiler_flags(-Wno-missing-declarations)
|
||||||
|
if(ARM OR AARCH64)
|
||||||
|
ie_add_compiler_flags(-Wno-macro-redefined)
|
||||||
|
endif()
|
||||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11 AND CMAKE_COMPILER_IS_GNUCXX)
|
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11 AND CMAKE_COMPILER_IS_GNUCXX)
|
||||||
ie_add_compiler_flags(-Wno-array-bounds)
|
ie_add_compiler_flags(-Wno-array-bounds)
|
||||||
ie_add_compiler_flags(-Wno-stringop-overflow)
|
ie_add_compiler_flags(-Wno-stringop-overflow)
|
||||||
@ -137,7 +109,9 @@ function(ov_add_onednn)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
# to find our FindACL.cmake
|
# to find our FindACL.cmake
|
||||||
|
if(DNNL_USE_ACL)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${intel_cpu_thirdparty_SOURCE_DIR}")
|
list(APPEND CMAKE_MODULE_PATH "${intel_cpu_thirdparty_SOURCE_DIR}")
|
||||||
|
endif()
|
||||||
|
|
||||||
add_subdirectory(onednn EXCLUDE_FROM_ALL)
|
add_subdirectory(onednn EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
@ -157,6 +131,4 @@ function(ov_add_onednn)
|
|||||||
endif()
|
endif()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
if(ENABLE_INTEL_CPU)
|
ov_add_onednn()
|
||||||
ov_add_onednn()
|
|
||||||
endif()
|
|
||||||
|
Loading…
Reference in New Issue
Block a user