[GPU] Reduce unused macros to reduce loading time (#7435)
* Reduce unused macros, where two strategies are used: (1) Extract batch_headers and let them be included only once in each batch. (2) Static reduction in primitive_db_gen.py, which scans each macro's users and exclude the macro if there is no user. * Removed dependency from runtime to kernel_selector * Resolve too large string error * Fix duplicated definition (GET_FILTER_XXXX is defined in both fetch_weight.cl and by runtime. Removed from runtime because the definition is incorrect * Resolve GRN & deconv & gpu_select issues * Fix cldnn unittest issues * Minor fix * Applied review comments * Fix rebase error
This commit is contained in:
parent
e0cea200e3
commit
61c97edd40
@ -74,12 +74,14 @@ file(GLOB_RECURSE __CLDNN_Sources__cl_kernels
|
||||
set(__CLDNN_Directory__cg_cache "${CLDNN__CODEGEN_INCDIR}")
|
||||
set(__CLDNN_CGDirectory__cg_cache "${CLDNN__CODEGEN_DIR}/cache")
|
||||
set(__CLDNN_Label__cg_cache "${__CLDNN_Label__core}\\codegen")
|
||||
set(__CLDNN_Label__cg_cache_batch_headers "${__CLDNN_Label__core}\\codegen")
|
||||
set(__CLDNN_File__cg_cache__prim_db "ks_primitive_db.inc")
|
||||
set(__CLDNN_File__cg_cache__prim_db_batch_headers "ks_primitive_db_batch_headers.inc")
|
||||
set(__CLDNN_Sources__cg_cache
|
||||
"${__CLDNN_Directory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}"
|
||||
"${__CLDNN_Directory__cg_cache}/${__CLDNN_File__cg_cache__prim_db_batch_headers}"
|
||||
)
|
||||
|
||||
|
||||
set(__CLDNN_AllSources
|
||||
${__CLDNN_Sources__main}
|
||||
${__CLDNN_Sources__core}
|
||||
@ -144,12 +146,13 @@ endif()
|
||||
# =================================== Custom pre- and post-steps =======================================
|
||||
add_custom_command(OUTPUT "${__CLDNN_CGDirectory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E make_directory "${__CLDNN_CGDirectory__cg_cache}"
|
||||
COMMAND "${PYTHON_EXECUTABLE}" "${__CLDNN_Directory__core_common}/primitive_db_gen.py" -out_path "${__CLDNN_CGDirectory__cg_cache}" -out_file_name "${__CLDNN_File__cg_cache__prim_db}" -kernels "${__CLDNN_Directory__cl_kernels}"
|
||||
COMMAND "${PYTHON_EXECUTABLE}" "${__CLDNN_Directory__core_common}/primitive_db_gen.py" -out_path "${__CLDNN_CGDirectory__cg_cache}" -out_file_name_prim_db "${__CLDNN_File__cg_cache__prim_db}" -out_file_name_batch_headers "${__CLDNN_File__cg_cache__prim_db_batch_headers}" -kernels "${__CLDNN_Directory__cl_kernels}"
|
||||
DEPENDS ${__CLDNN_Sources__cl_kernels} "${__CLDNN_Directory__core_common}/primitive_db_gen.py"
|
||||
COMMENT "Generating ${__CLDNN_File__cg_cache__prim_db} ..."
|
||||
)
|
||||
add_custom_command(OUTPUT "${__CLDNN_Directory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__CLDNN_CGDirectory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}" "${__CLDNN_Directory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}"
|
||||
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${__CLDNN_CGDirectory__cg_cache}/${__CLDNN_File__cg_cache__prim_db_batch_headers}" "${__CLDNN_Directory__cg_cache}/${__CLDNN_File__cg_cache__prim_db_batch_headers}"
|
||||
DEPENDS "${__CLDNN_CGDirectory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}" ${__CLDNN_Sources__cl_kernels} "${__CLDNN_Directory__core_common}/primitive_db_gen.py"
|
||||
COMMENT "Updating file if the file changed (${__CLDNN_File__cg_cache__prim_db}) ..."
|
||||
)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
KERNEL(activation)(
|
||||
__global INPUT0_TYPE* input,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#ifdef PARAMETERIZED
|
||||
#define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX_SAFE)(idx_order)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#ifdef BATCH_AXIS
|
||||
#define VALUES_NUM INPUT0_BATCH_NUM
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define GLOBAL_SIZE 128
|
||||
#define LOCAL_SIZE GLOBAL_SIZE
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#ifndef SG_SIZE
|
||||
#define SG_SIZE 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(average_unpooling_gpu)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
|
||||
{
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(batch_to_space_ref)(const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define OC_BLOCK_SIZE 32
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/unit_type.cl"
|
||||
|
||||
#define OC_BLOCK_SIZE 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define OC_BLOCK_SIZE 32
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(binary_convolution_ref)(const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
|
||||
KERNEL(border_gpu_ref)(
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
|
||||
KERNEL(broadcast_gpu_ref)(
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define WORK_GROUP_SIZE 16
|
||||
#define IC_BLOCK 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
//
|
||||
// In this kernel we are processing "fyx" as flatten 1D "elements".
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/unit_type.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define GET_INDEX(prefix, ORDER) CAT(prefix, _GET_INDEX)(ORDER)
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
///////////////////////// Input Index /////////////////////////
|
||||
inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
|
||||
|
@ -2,10 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define TYPE_N_(type, n) type##n
|
||||
#define TYPE_N(type, n) TYPE_N_(type, n)
|
||||
|
@ -2,10 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
// ======================================================================================
|
||||
// Host side jit-constants:
|
||||
|
@ -3,9 +3,9 @@
|
||||
//
|
||||
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
|
||||
// ======================================================================================
|
||||
// Host side jit-constants:
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/imad.cl"
|
||||
|
||||
#define INPUT0_PACKED_TYPE uint
|
||||
|
@ -3,9 +3,9 @@
|
||||
//
|
||||
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
|
||||
// ======================================================================================
|
||||
// Host side jit-constants:
|
||||
|
@ -2,10 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define TYPE_N_(type, n) type##n
|
||||
#define TYPE_N(type, n) TYPE_N_(type, n)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
#if FP16_UNIT_USED
|
||||
@ -128,4 +128,3 @@ KERNEL(convolution_bfyx_1x1)(
|
||||
#undef CONCAT_TOKEN
|
||||
#undef CONCAT_TOKEN_HANDLER1
|
||||
#undef MULTIPLY_BLOCKS_16x16
|
||||
#undef MAKE_VECTOR_TYPE
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/gemm_common.cl"
|
||||
|
||||
#define MULT(C_, A_, i_) \
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define SIMD_SIZE 8
|
||||
__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#if FP16_UNIT_USED
|
||||
#define ALIGNED_BLOCK_READ(ptr, byte_offset) as_half(intel_sub_group_block_read_us8((const __global ushort*)(ptr) + (byte_offset)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Direct Convolution
|
||||
@ -26,7 +26,7 @@ KERNEL(convolution_f16_10x12x16)(
|
||||
#endif
|
||||
uint split_idx)
|
||||
{
|
||||
#include "include/vec_typedefs.cl"
|
||||
#include "include/batch_headers/vec_typedefs.cl"
|
||||
|
||||
const unsigned global_x = (uint)get_global_id(0);
|
||||
const unsigned global_y = (uint)get_global_id(1);
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define INPUT_TYPE INPUT0_TYPE
|
||||
#define INPUT_TYPE2 MAKE_VECTOR_TYPE(INPUT0_TYPE, 2)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/unit_type.cl"
|
||||
|
||||
#if X_BLOCK_SIZE > 1
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#if defined(cl_intel_subgroups_short)
|
||||
#define TILE_M 1
|
||||
@ -20,7 +20,7 @@ KERNEL(convolution_f16)(
|
||||
#endif
|
||||
uint split_idx)
|
||||
{
|
||||
#include "include/vec_typedefs.cl"
|
||||
#include "include/batch_headers/vec_typedefs.cl"
|
||||
|
||||
const unsigned group_x = get_group_id(0);
|
||||
const unsigned group_y = get_group_id(1);
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
#define TILE_M 2
|
||||
@ -20,7 +20,7 @@ KERNEL(convolution_f32)(
|
||||
#endif
|
||||
uint split_idx)
|
||||
{
|
||||
#include "include/vec_typedefs.cl"
|
||||
#include "include/batch_headers/vec_typedefs.cl"
|
||||
|
||||
const unsigned group_x = get_group_id(0);
|
||||
const unsigned group_y = get_group_id(1);
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
|
||||
__attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define FEATURE_SLICE_SIZE 16
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/unit_type.cl"
|
||||
|
||||
#define BATCH_SLICE_SIZE 16
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/unit_type.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "include/unit_type.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/unit_type.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#if QUANTIZATION_TERM
|
||||
#define ACCUMULATOR_TYPE int
|
||||
@ -20,7 +20,6 @@
|
||||
#define AS_TYPE_N_(type, n, x) as_##type##n(x)
|
||||
#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x)
|
||||
#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x)
|
||||
#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
|
||||
#define OUTPUT_TYPE16 MAKE_VECTOR_TYPE(OUTPUT_TYPE, 16)
|
||||
#define BATCH_SLICE_SIZE 16
|
||||
#define FEATURE_SLICE_SIZE 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#if QUANTIZATION_TERM
|
||||
#define ACCUMULATOR_TYPE int
|
||||
@ -16,7 +16,6 @@
|
||||
#define ACTIVATION_TYPE INPUT0_TYPE
|
||||
#define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x)
|
||||
#endif
|
||||
#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size)
|
||||
#define OUTPUT_TYPE16 MAKE_VECTOR_TYPE(OUTPUT_TYPE, 16)
|
||||
#define BATCH_SLICE_SIZE 16
|
||||
#define FEATURE_SLICE_SIZE 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/mmad.cl"
|
||||
|
||||
#define AS_TYPE(type, val) CAT(as_, type)(val)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(convolution_mmad_b_fs_yx_fsv32_dw)(
|
||||
__global INPUT0_TYPE* input,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/imad.cl"
|
||||
|
||||
#define CEIL_DIV(x, y) (1 + ((x) - 1) / (y))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/imad.cl"
|
||||
|
||||
#define CEIL_DIV(x, y) (1 + ((x) - 1) / (y))
|
||||
|
@ -8,7 +8,7 @@
|
||||
// Output matrix dimensions: M x N
|
||||
// --------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
|
||||
|
||||
#define DOT4i0( _result, _A, _B, i) \
|
||||
|
@ -8,8 +8,8 @@
|
||||
// Output matrix dimensions: M x N
|
||||
// --------------------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
|
||||
#define DOT8i_0( _result, _A, _B, i) \
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(convolution_gpu_yxfb_ref)(
|
||||
const __global UNIT_TYPE* input,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
__attribute__((intel_reqd_sub_group_size(16)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
KERNEL(convolution_gpu_yxfb_yxio_b16)(
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
__attribute__((reqd_work_group_size(LOCAL_WORK_GROUP_SIZE, 1, 1)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
__attribute__((reqd_work_group_size(LOCAL_WORK_GROUP_SIZE, 1, 1)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(ctc_greedy_decoder_ref)(const __global INPUT0_TYPE* probabilities
|
||||
,const __global INPUT1_TYPE* sequence_indicators
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
///////////////////////// Input Index /////////////////////////
|
||||
inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
///////////////////////// Input Index /////////////////////////
|
||||
inline uint FUNC(get_input_index)(uint b, uint f, uint w, uint z, uint y, uint x)
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#include "deconvolution_gpu_imad_common.cl"
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define WORK_GROUP_GROUP_SIZE 16
|
||||
|
||||
|
@ -2,10 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#include "deconvolution_gpu_imad_common.cl"
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#define CEIL_DIV(a, b) (((a) + ((b) - 1)) / (b))
|
||||
#define ALIGN(a, b) (CEIL_DIV(a, b) * (b))
|
||||
|
@ -2,10 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/imad.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
#include "deconvolution_gpu_imad_common.cl"
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(deconvolution_gpu_yxfb_ref)(
|
||||
const __global INPUT0_TYPE* input,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/unit_type.cl"
|
||||
|
||||
#define FEATURE_SLICE_SIZE 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
__attribute__((intel_reqd_sub_group_size(16)))
|
||||
KERNEL(deformable_convolution_gpu_bfyx_interp)(
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
|
||||
KERNEL(deformable_convolution_gpu_bfyx_ref)(
|
||||
const __global INPUT0_TYPE* data,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(depth_to_space_block2_opt)(const __global half* input, __global half* output)
|
||||
{
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(depth_to_space_ref)(const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/detection_output_common.cl"
|
||||
|
||||
// DetectionOuput - performs non-maximuim suppression to generate the detection output
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define FEATURE_SLICE_SIZE 16
|
||||
#define unroll_for __attribute__((opencl_unroll_hint())) for
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define OUTPUT_TYPE_BLOCK MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)
|
||||
#define TO_TYPE(type, val) CAT(convert_, type)(val)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(eltwise_fs_b_yx_fsv32)(
|
||||
INPUTS_DECLS
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/unit_type.cl"
|
||||
|
||||
// Kernel works only for sub_group size of 16 with 32 features slice size and process 2 features per WI
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(eltwise_gpu_vload8)(INPUTS_DECLS
|
||||
__global OUTPUT_TYPE* output)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#ifdef PACKED_SUM
|
||||
KERNEL(embedding_bag_ref)(
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
KERNEL(extract_image_patches_ref)(const __global INPUT0_TYPE* input,
|
||||
__global OUTPUT_TYPE* output)
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
#include "include/mmad.cl"
|
||||
|
||||
#define INPUT_PACKED_TYPE_8 CAT(INPUT_PACKED_TYPE, 8)
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#if defined(__fc_f16)
|
||||
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
// Required JIT constants:
|
||||
// - FP16_SUPPORTED - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
// Required JIT constants:
|
||||
// - FP16_SUPPORTED - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
|
||||
|
@ -2,9 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
// JIT Parameters:
|
||||
// SIMD - sub-group size/simd width, one of {8, 16};
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/fetch_weights.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_weights.cl"
|
||||
|
||||
KERNEL(fc)(
|
||||
const __global INPUT0_TYPE* input,
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
// Block read - currently block is 4 bytes aligned.
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
// Just-in-time macro definitions:
|
||||
@ -486,7 +486,6 @@ KERNEL (fully_connected_gpu_bx_bs_x_bsv16_b1)(
|
||||
|
||||
#undef CONCAT_TOKEN_HANDLER1
|
||||
#undef CONCAT_TOKEN
|
||||
#undef MAKE_VECTOR_TYPE
|
||||
#undef CVT_UNIT
|
||||
#undef CHUNK_UNITS_TYPE
|
||||
#undef AS_CHUNK
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
#if FP16_UNIT_USED
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
__attribute__((reqd_work_group_size(8, 1, 1)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/sub_group.cl"
|
||||
|
||||
#if FP16_UNIT_USED
|
||||
@ -224,7 +224,6 @@ KERNEL (fully_connected_gpu_xb_xb_b8_x8_vload)(
|
||||
|
||||
#undef SUB_GROUP_SIZE
|
||||
#undef ALIGNED_BLOCK_READ8
|
||||
#undef MAKE_VECTOR_TYPE
|
||||
#undef CONCAT_TOKEN
|
||||
#undef CONCAT_TOKEN_HANDLER1
|
||||
#undef MULTIPLY_BLOCKS_8x8
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
// Just-in-time macro definitions:
|
||||
@ -275,7 +275,6 @@ KERNEL (fully_connected_gpu_xb_xb_block_fp16)(
|
||||
|
||||
#undef CONCAT_TOKEN_HANDLER1
|
||||
#undef CONCAT_TOKEN
|
||||
#undef MAKE_VECTOR_TYPE
|
||||
#undef CVT_UNIT
|
||||
#undef CHUNK_UNITS_TYPE
|
||||
#undef AS_CHUNK
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
// Required JIT constants:
|
||||
// - FP16_SUPPORTED - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(8, 1, 1)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
// Required JIT constants:
|
||||
// - FP16_SUPPORTED - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
|
||||
|
@ -2,7 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/unit_type.cl"
|
||||
|
||||
#define unroll_for __attribute__((opencl_unroll_hint)) for
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/imad.cl"
|
||||
|
||||
#define SIMD_SIZE 16
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/reshape_dims.cl"
|
||||
|
||||
// Required JIT constants:
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
|
||||
#define SIMD_SIZE 8
|
||||
__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
|
||||
|
@ -2,8 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/data_types.cl"
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/image_data.cl"
|
||||
|
||||
__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
|
||||
__attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE)))
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/common.cl"
|
||||
#include "include/data_types.cl"
|
||||
#include "include/batch_headers/common.cl"
|
||||
#include "include/batch_headers/data_types.cl"
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
@ -2,7 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "include/fetch_data.cl"
|
||||
#include "include/batch_headers/fetch_data.cl"
|
||||
#include "include/imad.cl"
|
||||
#if QUANTIZATION_TERM
|
||||
# define ACCUMULATOR_TYPE int
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user