ARM Compute Library flags (#17640)

This commit is contained in:
Ilya Lavrenov 2023-05-22 13:23:43 +04:00 committed by GitHub
parent fe1ac700f0
commit ef9b3e3cd1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 79 additions and 68 deletions

View File

@ -46,8 +46,7 @@ jobs:
system.debug: true
VSTS_HTTP_RETRY: 5
VSTS_HTTP_TIMEOUT: 200
OPENVINO_ARCH: 'aarch64'
NUM_PROC: 1
NUM_PROC: 2
BUILD_TYPE: Release
OPENVINO_REPO_DIR: $(Build.Repository.LocalPath)
BUILD_OPENVINO: $(WORK_DIR)/build
@ -116,14 +115,9 @@ jobs:
python3 -m pip install -r $(OPENVINO_REPO_DIR)/src/bindings/python/requirements.txt
python3 -m pip install -r $(OPENVINO_REPO_DIR)/src/bindings/python/wheel/requirements-dev.txt
# install dependencies needed to build CPU plugin for ARM
sudo -E apt --assume-yes install scons crossbuild-essential-arm64
sudo -E apt --assume-yes install scons gcc-10-aarch64-linux-gnu g++-10-aarch64-linux-gnu
# generic dependencies
sudo -E apt --assume-yes install cmake ccache
# Speed up build
sudo -E apt -y --no-install-recommends install unzip
wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
unzip ninja-linux.zip
sudo cp -v ninja /usr/local/bin/
sudo -E apt --assume-yes install cmake ccache ninja-build unzip
displayName: 'Install dependencies'
- script: |
@ -133,13 +127,15 @@ jobs:
- script: |
python3 -m pip install conan
# install build profile compilers
sudo -E apt --assume-yes install gcc g++
# generate build profile
conan profile detect
# generate host profile for linux_arm64
echo "include(default)" > $(BUILD_OPENVINO)/linux_arm64
echo "[buildenv]" >> $(BUILD_OPENVINO)/linux_arm64
echo "CC=aarch64-linux-gnu-gcc" >> $(BUILD_OPENVINO)/linux_arm64
echo "CXX=aarch64-linux-gnu-g++" >> $(BUILD_OPENVINO)/linux_arm64
echo "CC=aarch64-linux-gnu-gcc-10" >> $(BUILD_OPENVINO)/linux_arm64
echo "CXX=aarch64-linux-gnu-g++-10" >> $(BUILD_OPENVINO)/linux_arm64
# install OpenVINO dependencies
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export CMAKE_C_COMPILER_LAUNCHER=ccache

View File

@ -87,6 +87,42 @@ elseif(ENABLE_ARM_COMPUTE_CMAKE)
# required by oneDNN to attempt to parse ACL version
set(ENV{ACL_ROOT_DIR} "${ARM_COMPUTE_SOURCE_DIR}")
elseif(NOT TARGET arm_compute::arm_compute)
#
# Options
#
set(ARM_COMPUTE_SCONS_JOBS "8" CACHE STRING "Number of parallel threads to build ARM Compute Library")
set(ARM_COMPUTE_TARGET_GENERIC_ARCHS armv8a
armv8.2-a
armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2
armv8r64 # the same as armv8.4-a
)
if(ARM)
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv7a)
set(ARM_COMPUTE_TARGET_ARCHS armv7a armv7a-hf
# requires estate=32
${ARM_COMPUTE_TARGET_GENERIC_ARCHS})
else()
if(APPLE)
# Apple M1 / M2 is assumed
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8.2-a)
else()
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8a)
endif()
set(ARM_COMPUTE_TARGET_ARCHS arm64-v8a
arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
# used with estate=64
${ARM_COMPUTE_TARGET_GENERIC_ARCHS})
endif()
set(ARM_COMPUTE_TARGET_ARCH "${ARM_COMPUTE_TARGET_ARCH_DEFAULT}" CACHE STRING "Architecture for ARM ComputeLibrary")
set_property(CACHE ARM_COMPUTE_TARGET_ARCH PROPERTY STRINGS ${ARM_COMPUTE_TARGET_ARCHS})
#
# Configure & build
#
set(ARM_COMPUTE_SOURCE_DIR "${intel_cpu_thirdparty_SOURCE_DIR}/ComputeLibrary")
set(ARM_COMPUTE_BINARY_DIR "${intel_cpu_thirdparty_BINARY_DIR}/ComputeLibrary")
@ -128,8 +164,6 @@ elseif(NOT TARGET arm_compute::arm_compute)
reference_openmp=0
validation_tests=0
benchmark_tests=0
# TODO: check this for Apple Silicon
# multi_isa=1
# TODO: use CC for ARM compute library to minimize binary size
# build_config=<file>
# TODO: use data_type_support to disable useless kernels
@ -137,6 +171,16 @@ elseif(NOT TARGET arm_compute::arm_compute)
arch=${ARM_COMPUTE_TARGET_ARCH}
)
if(ARM)
list(APPEND ARM_COMPUTE_OPTIONS estate=32)
else()
list(APPEND ARM_COMPUTE_OPTIONS estate=64)
if(NOT APPLE AND CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2)
# arm_sve.h header is not available on gcc older 10.2
list(APPEND ARM_COMPUTE_OPTIONS multi_isa=1)
endif()
endif()
if(NOT MSVC64)
list(APPEND ARM_COMPUTE_OPTIONS
build_dir=${ARM_COMPUTE_BINARY_DIR}
@ -342,8 +386,7 @@ elseif(NOT TARGET arm_compute::arm_compute)
add_library(arm_compute::half INTERFACE IMPORTED GLOBAL)
set_target_properties(arm_compute::half PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES ${ARM_COMPUTE_SOURCE_DIR}/include
OSX_ARCHITECTURES arm64)
INTERFACE_INCLUDE_DIRECTORIES ${ARM_COMPUTE_SOURCE_DIR}/include)
# Helpers for oneDNN intergation

View File

@ -4,9 +4,9 @@
project(intel_cpu_thirdparty)
if((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") AND (MSVC_VERSION VERSION_GREATER_EQUAL "1910"))
# 1910 version of Visual Studio 2017
# This flagis needed for enabling SIMD vectorization with command '#pragma omp simd'.
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND MSVC_TOOLSET_VERSION GREATER_EQUAL 141)
# Visual Studio 2017 (v141 toolset)
# This flag is needed for enabling SIMD vectorization with command '#pragma omp simd'.
# Compilation with '/openmp:experimental' key allow us to enable vectorizatikon capability in MSVC.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp:experimental")
endif()
@ -38,13 +38,8 @@ function(ov_add_onednn)
set(DNNL_ENABLE_WORKLOAD "INFERENCE" CACHE STRING "" FORCE)
# Allow to enable oneDNN verbose with CPU_DEBUG_CAPS and rely on oneDNN default configuration otherwise
if (ENABLE_CPU_DEBUG_CAPS)
set(DNNL_VERBOSE "ON" CACHE STRING "" FORCE)
endif()
set(SDL_cmake_included ON) ## to skip internal SDL flags. SDL flags are already set on IE level
if (ANDROID OR ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR OV_COMPILER_IS_CLANG) AND NOT (THREADING STREQUAL "OMP")))
set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM..
if(ENABLE_CPU_DEBUG_CAPS)
set(DNNL_VERBOSE ON CACHE STRING "" FORCE)
endif()
if(X86_64)
@ -53,43 +48,23 @@ function(ov_add_onednn)
set(DNNL_TARGET_ARCH "X86" CACHE STRING "" FORCE)
elseif(RISCV64)
set(DNNL_TARGET_ARCH "RV64" CACHE STRING "" FORCE)
elseif(AARCH64 OR ARM)
# TODO: fix warning
if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
ie_add_compiler_flags(-Wno-macro-redefined)
endif()
set(ARM_COMPUTE_SCONS_JOBS "8" CACHE STRING "Number of parallel threads to build ARM Compute Library")
set(DNNL_USE_ACL ON CACHE BOOL "" FORCE)
if(ARM)
set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE)
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv7a)
set(ARM_COMPUTE_TARGET_ARCHS armv7a armv7a-hf)
else()
set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE)
# move to separate ACL cmake
if(APPLE)
# Apple M1 / M2 is assumed
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT armv8.2-a)
else()
set(ARM_COMPUTE_TARGET_ARCH_DEFAULT arm64-v8a)
endif()
set(ARM_COMPUTE_TARGET_ARCHS arm64-v8a
arm64-v8.2-a arm64-v8.2-a-sve arm64-v8.2-a-sve2
armv8a
armv8.2-a armv8.2-a-sve
armv8.6-a armv8.6-a-sve armv8.6-a-sve2 armv8.6-a-sve2-sme2
armv8r64 # the same as armv8.4-a
)
endif()
set(ARM_COMPUTE_TARGET_ARCH "${ARM_COMPUTE_TARGET_ARCH_DEFAULT}" CACHE STRING "Architecture for ARM ComputeLibrary")
set_property(CACHE ARM_COMPUTE_TARGET_ARCH PROPERTY STRINGS ${ARM_COMPUTE_TARGET_ARCHS})
elseif(ARM)
set(DNNL_TARGET_ARCH "ARM" CACHE STRING "" FORCE)
elseif(AARCH64)
set(DNNL_TARGET_ARCH "AARCH64" CACHE STRING "" FORCE)
else()
message(FATAL_ERROR "Unsupported system processor ${CMAKE_SYSTEM_PROCESSOR}")
endif()
if(AARCH64 OR ARM)
set(DNNL_USE_ACL ON CACHE BOOL "Use ARM Conpute Library kernels in oneDNN" FORCE)
endif()
set(SDL_cmake_included ON) ## to skip internal SDL flags. SDL flags are already set on IE level
if (ANDROID OR ((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR OV_COMPILER_IS_CLANG) AND NOT (THREADING STREQUAL "OMP")))
set(OpenMP_cmake_included ON) ## to skip "omp simd" inside a code. Lead to some crashes inside NDK LLVM..
endif()
# WA for old TBBConfig.cmake like tbb2019_20180718oss
# they don't check that imported target is already created
if(TBB_FOUND)
@ -101,15 +76,12 @@ function(ov_add_onednn)
link_libraries(TBB::tbb)
endif()
if(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64")
set(DNNL_TARGET_ARCH "X64")
elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "arm64")
set(DNNL_TARGET_ARCH "AARCH64")
endif()
if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG)
ie_add_compiler_flags(-Wno-undef)
ie_add_compiler_flags(-Wno-missing-declarations)
if(ARM OR AARCH64)
ie_add_compiler_flags(-Wno-macro-redefined)
endif()
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11 AND CMAKE_COMPILER_IS_GNUCXX)
ie_add_compiler_flags(-Wno-array-bounds)
ie_add_compiler_flags(-Wno-stringop-overflow)
@ -137,7 +109,9 @@ function(ov_add_onednn)
endif()
# to find our FindACL.cmake
list(APPEND CMAKE_MODULE_PATH "${intel_cpu_thirdparty_SOURCE_DIR}")
if(DNNL_USE_ACL)
list(APPEND CMAKE_MODULE_PATH "${intel_cpu_thirdparty_SOURCE_DIR}")
endif()
add_subdirectory(onednn EXCLUDE_FROM_ALL)
@ -157,6 +131,4 @@ function(ov_add_onednn)
endif()
endfunction()
if(ENABLE_INTEL_CPU)
ov_add_onednn()
endif()
ov_add_onednn()