Windows arm64 support for CPU plugin (#17075)

* ARM32 support

* ARM32 support

* Fixed packaging

* Windows arm64 support

* Updated submodule

* 32 bits support in Intel CPU plugin

* Fixed FIndAcl.cmake

* Enable proper conditional  compilation for Windows ARM64

* Enable proper conditional  compilation for Windows ARM64

* Updated submodule

* Updated submodule

* Updated submodule

* Updated submodule

* Updated submodule

* Added template_extension to CPU func tests dependencies

* Updated submodule

* Enabled runtime model tests

* Updated submodule

* Submodule update
This commit is contained in:
Ilya Lavrenov 2023-04-25 16:41:28 +04:00 committed by GitHub
parent d423491bcb
commit 2e3deb8d8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 79 additions and 27 deletions

View File

@ -1,9 +1,11 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
add_subdirectory(old)
add_subdirectory(new)
# Enable code style check
file(GLOB_RECURSE template_extension_src "${CMAKE_CURRENT_SOURCE_DIR}/new/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/new/*.hpp")
add_clang_format_target(openvino_template_extension_clang FOR_SOURCES ${template_extension_src})

View File

@ -181,6 +181,9 @@ using namespace ov::frontend::onnx;
namespace ngraph {
namespace onnx_import {
const char* OPENVINO_ONNX_DOMAIN = "org.openvinotoolkit";
namespace {
template <typename Container = std::map<int64_t, Operator>>
typename Container::const_iterator find(int64_t version, const Container& map) {

View File

@ -106,7 +106,7 @@ private:
std::unordered_map<std::string, DomainOpset> m_map;
};
constexpr const char* OPENVINO_ONNX_DOMAIN = "org.openvinotoolkit";
extern const char* OPENVINO_ONNX_DOMAIN;
} // namespace onnx_import

View File

@ -133,6 +133,7 @@ void GridSample::createPrimitive() {
jcp.cannelNum = jcp.dynamicChannel ? 1lu : srcDataDims[1];
}
#if defined(OPENVINO_ARCH_X86_64)
if (x64::mayiuse(x64::avx512_core)) {
jitKernel.reset(new GridSampleKernel<x64::avx512_core>(jcp));
} else if (x64::mayiuse(x64::avx2)) {
@ -140,6 +141,7 @@ void GridSample::createPrimitive() {
} else if (x64::mayiuse(x64::sse41)) {
jitKernel.reset(new GridSampleKernel<x64::sse41>(jcp));
}
#endif // OPENVINO_ARCH_X86_64
if (!jitKernel) {
THROW_ERROR << " could not create JIT kernel.";
}

View File

@ -30,6 +30,9 @@ namespace intel_cpu {
namespace node {
#define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' "
#if defined(OPENVINO_ARCH_X86_64)
template <cpu_isa_t isa>
struct jit_move_scale_kernel : public jit_uni_move_scale_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_move_scale_kernel)
@ -159,6 +162,8 @@ private:
std::unordered_map<size_t, std::unique_ptr<jit_emitter>> emitters;
};
#endif // OPENVINO_ARCH_X86_64
Interaction::Interaction(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context)
: Node(op, context, NgraphShapeInferFactory(op, EMPTY_PORT_MASK)) {
std::string errorMessage;
@ -269,8 +274,6 @@ void Interaction::execRef(dnnl::stream strm) {
}
}
void Interaction::execute(dnnl::stream strm) {
execRef(strm);
}
@ -324,6 +327,7 @@ void Interaction::prepareParams() {
interJcp.broadcast_scales = fqScales.size() == 1;
interJcp.input_size = interactFeatureSize;
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu_isa_t::avx512_core)) {
moveFeatureKernel.reset(new jit_move_scale_kernel<cpu_isa_t::avx512_core>(jcp));
moveInteractKernel.reset(new jit_move_scale_kernel<cpu_isa_t::avx512_core>(interJcp));
@ -333,13 +337,14 @@ void Interaction::prepareParams() {
} else if (mayiuse(cpu_isa_t::sse41)) {
moveFeatureKernel.reset(new jit_move_scale_kernel<cpu_isa_t::sse41>(jcp));
moveInteractKernel.reset(new jit_move_scale_kernel<cpu_isa_t::sse41>(interJcp));
} else {
THROW_ERROR << "cannot create jit eltwise kernel";
}
#endif // OPENVINO_ARCH_X86_64
if (moveFeatureKernel && moveInteractKernel) {
moveFeatureKernel->create_ker();
moveInteractKernel->create_ker();
} else {
THROW_ERROR << "cannot create jit eltwise kernel";
}
#ifdef CPU_DEBUG_CAPS
if (prim) {

View File

@ -31,6 +31,8 @@ namespace ov {
namespace intel_cpu {
namespace node {
#if defined(OPENVINO_ARCH_X86_64)
template <cpu_isa_t isa>
struct jit_mul_add_softmax_kernel : public jit_uni_mul_add_softmax_kernel, public jit_generator {
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_mul_add_softmax_kernel)
@ -663,6 +665,8 @@ private:
std::unordered_map<size_t, std::unique_ptr<jit_emitter>> emitters;
};
#endif // OPENVINO_ARCH_X86_64
bool MHA::isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept {
try {
const auto mha = std::dynamic_pointer_cast<const MHANode>(op);
@ -789,6 +793,7 @@ void MHA::initSupportedPrimitiveDescriptors() {
}
void MHA::init_brgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>& brgKernel, bool use_amx) {
#ifdef OPENVINO_ARCH_X86_64
brgemm_t brgDesc;
brgemm_strides_t strides {static_cast<dnnl_dim_t>(ctx.M * ctx.K), static_cast<dnnl_dim_t>(ctx.K * ctx.N)};
@ -815,6 +820,9 @@ void MHA::init_brgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>& brgKerne
THROW_ERROR << "cannot be executed due to invalid brgconv params";
}
brgKernel.reset(brgKernel_);
#else
THROW_ERROR << "is not supported on non-x86_64";
#endif // OPENVINO_ARCH_X86_64
}
void MHA::init_brgemm_copy_a(std::unique_ptr<jit_brgemm_matmul_copy_a_t>& brgCopyKernel, size_t K, size_t K_blk, size_t K_tail,
@ -834,7 +842,9 @@ void MHA::init_brgemm_copy_a(std::unique_ptr<jit_brgemm_matmul_copy_a_t>& brgCop
brgCopyKernelConf.a_dt_sz = DnnlExtensionUtils::sizeOfDataType(static_cast<dnnl::memory::data_type>(dt_in0));
brgCopyKernelConf.transposed_A = false;
#if defined(OPENVINO_ARCH_X86_64)
create_brgemm_matmul_copy_a(brgCopyKernel, &brgCopyKernelConf);
#endif // OPENVINO_ARCH_X86_64
}
void MHA::init_brgemm_copy_b(std::unique_ptr<jit_brgemm_matmul_copy_b_t>& brgCopyKernel, size_t N, size_t N_blk, size_t N_tail, size_t LDB, size_t K,
@ -868,7 +878,9 @@ void MHA::init_brgemm_copy_b(std::unique_ptr<jit_brgemm_matmul_copy_b_t>& brgCop
brgCopyKernelConf.has_zero_point_b = false;
brgCopyKernelConf.src_zp_type = dnnl::impl::cpu::x64::none;
#if defined(OPENVINO_ARCH_X86_64)
create_brgemm_matmul_copy_b(brgCopyKernel, &brgCopyKernelConf);
#endif // OPENVINO_ARCH_X86_64
}
void MHA::prepareParams() {
@ -1078,13 +1090,16 @@ void MHA::prepareParams() {
jcp.with_scales1 = !fqScales2.empty();
jcp.broadcast_scales1 = fqScales2.size() == 1;
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu_isa_t::avx512_core)) {
mulAddSoftmaxKernel.reset(new jit_mul_add_softmax_kernel<cpu_isa_t::avx512_core>(jcp));
} else if (mayiuse(cpu_isa_t::avx2)) {
mulAddSoftmaxKernel.reset(new jit_mul_add_softmax_kernel<cpu_isa_t::avx2>(jcp));
} else if (mayiuse(cpu_isa_t::sse41)) {
mulAddSoftmaxKernel.reset(new jit_mul_add_softmax_kernel<cpu_isa_t::sse41>(jcp));
} else {
}
#endif // OPENVINO_ARCH_X86_64
if (!mulAddSoftmaxKernel) {
THROW_ERROR << "cannot create jit eltwise kernel";
}
}
@ -1099,13 +1114,16 @@ void MHA::prepareParams() {
jcp.src_stride = N1;
jcp.dst_stride = batch1 * N1;
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu_isa_t::avx512_core)) {
convertReorderKernel.reset(new jit_convert_reorder_kernel<cpu_isa_t::avx512_core>(jcp));
} else if (mayiuse(cpu_isa_t::avx2)) {
convertReorderKernel.reset(new jit_convert_reorder_kernel<cpu_isa_t::avx2>(jcp));
} else if (mayiuse(cpu_isa_t::sse41)) {
convertReorderKernel.reset(new jit_convert_reorder_kernel<cpu_isa_t::sse41>(jcp));
} else {
}
#endif // OPENVINO_ARCH_X86_64
if (!convertReorderKernel) {
THROW_ERROR << "cannot create jit eltwise kernel";
}
}
@ -1122,13 +1140,17 @@ void MHA::prepareParams() {
jcp.outter_src_stride = strTranspose1In0[3];
jcp.outter_dst_stride = N0;
#if defined(OPENVINO_ARCH_X86_64)
if (mayiuse(cpu_isa_t::avx512_core)) {
convertTransposeKernel.reset(new jit_convert_transpose_kernel<cpu_isa_t::avx512_core>(jcp));
} else if (mayiuse(cpu_isa_t::avx2)) {
convertTransposeKernel.reset(new jit_convert_transpose_kernel<cpu_isa_t::avx2>(jcp));
} else if (mayiuse(cpu_isa_t::sse41)) {
convertTransposeKernel.reset(new jit_convert_transpose_kernel<cpu_isa_t::sse41>(jcp));
} else {
}
#endif // OPENVINO_ARCH_X86_64
if (!convertTransposeKernel) {
THROW_ERROR << "cannot create jit eltwise kernel";
}
}
@ -1167,6 +1189,7 @@ static void reorder2D(const srcT* pin, dstT* pout, const std::vector<size_t>& di
}
void MHA::callBrgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>& brgKernel, const void* pin0, const void* pin1, void* pout, void* wsp) {
#if defined(OPENVINO_ARCH_X86_64)
if (ctx.is_with_amx)
amx_tile_configure(ctx.palette);
if (ctx.is_with_comp) {
@ -1175,6 +1198,9 @@ void MHA::callBrgemm(brgemmCtx& ctx, std::unique_ptr<brgemm_kernel_t>& brgKernel
} else {
brgemm_kernel_execute(brgKernel.get(), 1, pin0, pin1, nullptr, pout, wsp);
}
#else
THROW_ERROR << "is not supported on non-x64 platforms";
#endif // OPENVINO_ARCH_X86_64
}
template <typename in1_type>

View File

@ -30,6 +30,7 @@
#include "transformations/snippets/x64/pass/remove_converts.hpp"
#include "transformations/snippets/x64/pass/enforce_precision.hpp"
#include "transformations/cpu_opset/common/pass/convert_to_swish_cpu.hpp"
#include "transformations/defs.hpp"
using namespace InferenceEngine;
using namespace dnnl::impl::utils;
@ -93,8 +94,12 @@ void Snippet::copy_snippet() {
snippet = std::make_shared<ngraph::snippets::op::Subgraph>(subgraph_node_inputs, new_body);
ngraph::copy_runtime_info(original_snippet, snippet);
snippet->set_friendly_name(original_snippet->get_friendly_name());
#if defined(OPENVINO_ARCH_X86_64)
snippet->set_generator(std::make_shared<CPUGenerator>(host_isa));
isa_num_lanes = snippet->get_generator()->get_target_machine()->get_lanes();
#else
IE_THROW(NotImplemented) << "CPU plugin: code-generation is not supported on non-x64 platforms";
#endif // OPENVINO_ARCH_X86_64
}
void Snippet::initSupportedPrimitiveDescriptors() {
@ -538,17 +543,17 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
// enforce BF16 precisions to supported operations
// MatMul has to be decomposed to Brgemm operations before enforcement
// Note, MatMul decomposition will be ran later again for case if BF16 enforcement is not happened
pre_dialect.register_pass<ngraph::snippets::pass::MatMulToBrgemm>();
pre_dialect.register_pass<pass::EnforcePrecision>(element::f32, element::bf16);
CPU_REGISTER_PASS_X64(pre_dialect, ngraph::snippets::pass::MatMulToBrgemm);
CPU_REGISTER_PASS_X64(pre_dialect, pass::EnforcePrecision, element::f32, element::bf16);
}
ov::pass::Manager post_dialect;
post_dialect.register_pass<ov::intel_cpu::pass::BrgemmToBrgemmCPU>();
CPU_REGISTER_PASS_X64(post_dialect, ov::intel_cpu::pass::BrgemmToBrgemmCPU);
ov::pass::Manager post_precision;
post_precision.register_pass<ov::intel_cpu::pass::RemoveConverts>();
post_precision.register_pass<ov::intel_cpu::pass::FuseLoadConvert>();
post_precision.register_pass<ov::intel_cpu::pass::FuseStoreConvert>();
CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::RemoveConverts);
CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::FuseLoadConvert);
CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::FuseStoreConvert);
// LoadConvert uses Load emitter that support conversion from any type to only f32
post_precision.get_pass_config()->set_callback<ov::intel_cpu::pass::FuseLoadConvert>(
[](const std::shared_ptr<const ov::Node>& n) -> bool {
@ -563,7 +568,7 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) {
return convert->get_input_element_type(0) != ov::element::f32;
return true;
});
post_precision.register_pass<ov::intel_cpu::pass::MulAddToFMA>();
CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::MulAddToFMA);
schedule = snippet->generate(
pre_dialect,

View File

@ -4,18 +4,21 @@
set(TARGET_NAME ov_cpu_func_tests)
add_library(cpuSpecificRtInfo STATIC $<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src/utils/rt_info/memory_formats_attribute.hpp
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src/utils/rt_info/memory_formats_attribute.cpp)
add_library(cpuSpecificRtInfo STATIC
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src/utils/rt_info/memory_formats_attribute.hpp
$<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src/utils/rt_info/memory_formats_attribute.cpp)
target_link_libraries(cpuSpecificRtInfo PRIVATE openvino::runtime)
set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $<TARGET_PROPERTY:openvino_intel_cpu_plugin,SOURCE_DIR>/src)
set(DEPENDENCIES openvino_intel_cpu_plugin)
set(DEPENDENCIES openvino_intel_cpu_plugin template_extension)
set(LINK_LIBRARIES funcSharedTests cpuSpecificRtInfo inference_engine_snippets)
if (ENABLE_OV_ONNX_FRONTEND)
if(ENABLE_OV_ONNX_FRONTEND)
list(APPEND DEFINES TEST_MODELS="${TEST_MODEL_ZOO}")
else()
set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
endif()
if(NOT X86_64)
list(APPEND EXCLUDED_SOURCE_PATHS
${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests
@ -23,6 +26,7 @@ if(NOT X86_64)
${CMAKE_CURRENT_SOURCE_DIR}/bfloat16
)
endif()
addIeTargetTest(
NAME ${TARGET_NAME}
ROOT ${CMAKE_CURRENT_SOURCE_DIR}
@ -32,8 +36,7 @@ addIeTargetTest(
DEPENDENCIES ${DEPENDENCIES}
LINK_LIBRARIES ${LINK_LIBRARIES}
ADD_CPPLINT
LABELS
CPU
LABELS CPU
)
set_ie_threading_interface_for(${TARGET_NAME})
@ -43,6 +46,5 @@ include(target_per_test.cmake)
# exclude 'ov_cpu_func_tests' from target 'all' if specific tests path was specified
if(DEFINED ENABLE_CPU_SPECIFIC_TESTS_PATH)
set_target_properties(${TARGET_NAME} PROPERTIES
EXCLUDE_FROM_ALL ON)
set_target_properties(${TARGET_NAME} PROPERTIES EXCLUDE_FROM_ALL ON)
endif()

View File

@ -90,7 +90,7 @@ else()
if(MSVC64)
# clang-cl does not recognize /MP option
string(REPLACE "/MP " "" extra_cxx_flags "${extra_cxx_flags}")
else()
elseif(CMAKE_POSITION_INDEPENDENT_CODE)
# -fPIC is not applicable for clang-cl
set(extra_cxx_flags "${extra_cxx_flags} -fPIC")
endif()
@ -134,9 +134,15 @@ else()
# https://cmake.org/cmake/help/latest/command/add_custom_command.html#examples-generating-files
if(OV_GENERATOR_MULTI_CONFIG AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.20)
foreach(option IN LISTS ARM_COMPUTE_DEBUG_OPTIONS)
list(APPEND ARM_COMPUTE_OPTIONS $<$<CONFIG:Debug>:${option}>)
list(APPEND ARM_COMPUTE_OPTIONS $<$<CONFIG:Debug>:${option}>
$<$<CONFIG:RelWithDebInfo>:${option}>)
endforeach()
elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES)
string(TOUPPER "${config}" config_upper)
set(flags ${CMAKE_CXX_FLAGS_${config_upper}})
set(extra_cxx_flags "${extra_cxx_flags} $<$<CONFIG:${config}>:${flags}>")
endforeach()
elseif(CMAKE_BUILD_TYPE MATCHES "^(Debug|RelWithDebInfo)$")
list(APPEND ARM_COMPUTE_OPTIONS ${ARM_COMPUTE_DEBUG_OPTIONS})
endif()
@ -239,6 +245,7 @@ else()
endforeach()
endif()
elseif(MSVC64)
# required for clang-cl compiler
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.20)
set(extra_cxx_flags "${extra_cxx_flags} $<IF:$<CONFIG:Release>,/MD,/MDd>")
else()

@ -1 +1 @@
Subproject commit dff21dea14e9557cc86f4073c62f8942ac022fd8
Subproject commit 9b547e7cb6017a3603509d879dd11e1aee7dce7f