[GPU] Parallelize node.choose_impl in compile_graph (#6261)

This commit is contained in:
Paul Youngsoo Ahn 2021-07-12 13:58:29 +09:00 committed by GitHub
parent 79a6ad0c0f
commit 60651fe8f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
86 changed files with 151 additions and 105 deletions

View File

@ -16,6 +16,10 @@
#include <utility>
#include <string>
#define CLDNN_THREADING_SEQ 0
#define CLDNN_THREADING_TBB 1
#define CLDNN_THREADING_THREADPOOL 2
namespace cldnn {
class stream;

View File

@ -127,7 +127,8 @@ struct kernel_string {
kernel_string() : str(""), jit(""), undefs(""), options(""), entry_point(""), batch_compilation(false) {}
std::string get_hash() { return str + jit + undefs + options + entry_point; }
std::string get_str() const { return str + jit + undefs + options + entry_point; }
size_t get_hash() const { return std::hash<std::string>()(get_str()); }
};
} // namespace cldnn

View File

@ -69,11 +69,10 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
KernelData kd = KernelData::Default<activation_params>(params);
activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
const std::string kernel_id = GetEntryPoint(kernelName, params.layerID, options);
auto dispatchData = SetDefault(newParams);
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -86,7 +86,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
KernelData kd = KernelData::Default<arg_max_min_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -45,7 +45,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
KernelData kd = KernelData::Default<arg_max_min_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -47,7 +47,7 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio
auto& kernel = kd.kernels[i];
DispatchData dispatchData = SetDefault(newParams);
auto cldnnJit = GetJitConstants(newParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
dispatchData.gws = { Align(size, 16), orgParams.inputs[0].Batch().v, 1 };

View File

@ -65,7 +65,7 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param
KernelData kd = KernelData::Default<average_unpooling_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -79,7 +79,7 @@ KernelsData BatchToSpaceKernelBase::GetCommonKernelsData(const Params& params, c
}
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -130,7 +130,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para
auto finalKernelName = GetKernelName(newParams);
auto cldnnJit = GetJitConstants(newParams, dispatchData);
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options);
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
auto& kernel = kd.kernels[0];

View File

@ -40,7 +40,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
KernelData k_data = KernelData::Default<border_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -37,7 +37,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
KernelData k_data = KernelData::Default<broadcast_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -108,7 +108,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
auto& kernel = kd.kernels[i];
DispatchData dispatchData = SetDefault(newParams);
auto cldnnJit = GetJitConstants(newParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);

View File

@ -105,7 +105,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
auto& kernel = kd.kernels[i];
DispatchData dispatchData = SetDefault(newParams);
auto cldnnJit = GetJitConstants(newParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, optParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, optParams, i);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);

View File

@ -214,7 +214,7 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
auto finalKernelName = GetKernelName(newParams);
auto cldnnJit = GetJitConstants(newParams, dispatchData);
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options);
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
auto& kernel = kd.kernels[0];

View File

@ -81,7 +81,7 @@ KernelsData DeformableConvolutionKernel_bfyx_interp::GetKernelsData(const Params
convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
CommonDispatchData dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -56,7 +56,7 @@ KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& param
KernelData kd = KernelData::Default<ctc_greedy_decoder_params>(params);
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -90,7 +90,7 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
}
auto dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -40,7 +40,7 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
// partial sum
auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_1);
cldnn_jit.AddConstant(MakeJitConstant("CUM_SUM_PARTIAL_SUM", 1));
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData.stage_1, params.engineInfo, kernelName, jit, entry_point);
@ -51,7 +51,7 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
}
{
// Final
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, 1);
auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_final);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -122,7 +122,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
}
auto cldnn_jit = GetJitConstants(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -61,7 +61,7 @@ KernelsData DepthToSpaceKernelBase::GetCommonKernelsData(const Params& params, c
}
auto dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -326,7 +326,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
KernelData kd = KernelData::Default<eltwise_params>(params);
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -39,7 +39,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, con
KernelData kd = KernelData::Default<eltwise_params>(params);
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -613,7 +613,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
KernelData kd = KernelData::Default<eltwise_params>(params);
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -78,7 +78,7 @@ KernelsData EltwiseKernel_fs_b_yx_fsv32::GetKernelsData(const Params& params, co
std::pair<std::string, std::string> jit;
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
try {
auto cldnn_jit = GetJitConstants(newParams);

View File

@ -72,7 +72,7 @@ KernelsData EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32::GetKernelsData(const Par
std::pair<std::string, std::string> jit;
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
try {
auto cldnn_jit = GetJitConstants(newParams);

View File

@ -95,7 +95,7 @@ KernelsData EltwiseKernel_vload8::GetKernelsData(const Params& params, const opt
std::pair<std::string, std::string> jit;
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
try {
auto cldnn_jit = GetJitConstants(newParams);

View File

@ -50,7 +50,7 @@ KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const op
}
auto dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -63,7 +63,7 @@ KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& pa
KernelData kd = KernelData::Default<extract_image_patches_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -83,7 +83,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params &params,
kd.kernels.resize(1);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
const DispatchData dispatchData = SetDefault(newParams, autoTuneIndex);
auto cldnn_jit = GetJitConstants(newParams, dispatchData);

View File

@ -268,7 +268,7 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p
auto finalKernelName = GetKernelName(newParams);
auto cldnnJit = GetJitConstants(newParams, dispatchData);
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options);
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
auto& kernel = kd.kernels[0];

View File

@ -209,7 +209,7 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional
gather_params& newParams = *static_cast<gather_params*>(kd.params.get());
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -187,7 +187,7 @@ KernelsData GatherNDKernelRef::GetKernelsData(const Params& params, const option
auto dispatchData = SetDefault(newParams, options);
auto cldnn_jit = GetJitConstants(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params));

View File

@ -34,7 +34,7 @@ KernelsData GatherTreeKernelBase::GetCommonKernelsData(const Params& params,
auto dispatchData = SetDefault(gt_params);
auto kernel_data = KernelData::Default<gather_tree_params>(params);
auto cldnn_jit = GetJitConstants(gt_params);
auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
FillCLKernelData(kernel_data.kernels[0],
dispatchData,

View File

@ -45,7 +45,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
KernelData k_data = KernelData::Default<gemm_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -137,7 +137,7 @@ KernelsData GemmKernelMMADint8::GetKernelsData(const Params& params, const optio
KernelData k_data = KernelData::Default<gemm_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -107,7 +107,7 @@ KernelsData GemmKernelMMADslmInt8::GetKernelsData(const Params& params, const op
KernelData k_data = KernelData::Default<gemm_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -48,6 +48,8 @@ GemmKernelBase::DispatchData GemmKernelTiledOpt::SetDefault(const gemm_params& p
GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gemm_params& params) const {
const auto& output = params.output;
GemmKernelTiledOpt::GemmTuningData tuning_data;
auto m_size = output.Y().v;
auto n_size = output.X().v;
auto k_size = params.transpose_input0 ? params.inputs[0].Y().v : params.inputs[0].X().v;
@ -83,6 +85,7 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
JitConstants jit = Parent::GetJitConstants(params);
const auto& output = params.output;
GemmTuningData tuning_data = SetTuningParams(params);
auto m_size = output.Y().v;
auto n_size = output.X().v;

View File

@ -12,12 +12,12 @@ class GemmKernelTiledOpt : public GemmKernelBase {
public:
using Parent = GemmKernelBase;
mutable struct GemmTuningData {
struct GemmTuningData {
size_t simd_size = 8;
size_t tile_m_size = 1;
size_t tile_k_size = 1;
size_t tile_n_size = 1;
} tuning_data;
};
GemmKernelTiledOpt() : GemmKernelBase("gemm_tiled_opt") {}

View File

@ -38,7 +38,7 @@ KernelsData GRNKernelBase::GetCommonKernelsData(const Params& params,
KernelData kd = KernelData::Default<grn_params>(params);
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -87,7 +87,7 @@ KernelsData LRNKernelBase::GetCommonKernelsData(const Params& params,
KernelData kd = KernelData::Default<lrn_params>(params);
auto cldnnJit = GetJitConstants(orgParams, dispatchData);
auto entryPoint = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entryPoint = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
auto fused_deps_total = GetFusedPrimitiveInputsCount(params);

View File

@ -77,7 +77,7 @@ KernelsData LSTMEltKernelBase::GetCommonKernelsData(const Params& params, const
auto& kernel = kd.kernels[0];
auto cldnnJit = GetJitConstants(newParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
kernel.params.workGroups.global = {out.X().v, out.Batch().v, 1};

View File

@ -47,7 +47,7 @@ KernelsData LSTMGemmKernelBase::GetCommonKernelsData(const Params& params, const
// TODO: reorder weights if needed
auto& kernel = kd.kernels[0];
auto cldnnJit = GetJitConstants(newParams);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
kernel.params.workGroups.global = {out.X().v, out.Batch().v, 1};

View File

@ -85,7 +85,7 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params,
}
auto cldnn_jit = GetJitConstants(dlstm_params);
auto entry_point = GetEntryPoint(kernelName, dlstm_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, dlstm_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -57,7 +57,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para
KernelData k_data = KernelData::Default<lstm_dynamic_input_params>(params, 1);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -118,7 +118,7 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p
KernelData k_data = KernelData::Default<lstm_dynamic_timeloop_params>(params, 1);
auto cldnn_jit = GetJitConstants(org_params);
auto entry_point = GetEntryPoint(kernelName, org_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, org_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -65,7 +65,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
KernelData kd = KernelData::Default<max_unpooling_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -182,11 +182,12 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
KernelData kd = KernelData::Default<mvn_params>(params, kernels_num);
auto finalKernelName = GetKernelName(orgParams);
size_t entry_part_id = 0;
{
// Mean first stage
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1);
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_1", 1));
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel,
@ -210,7 +211,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
// Mean second stage
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2);
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_2", 1));
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[1];
FillCLKernelData(kernel,
@ -234,7 +235,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
// Variance first stage
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1);
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_1", 1));
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[2];
FillCLKernelData(kernel,
@ -257,7 +258,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
// Variance second stage
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2);
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_2", 1));
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[3];
FillCLKernelData(kernel,
@ -282,7 +283,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MAIN", 1));
cldnn_jit.AddConstant(MakeJitConstant("PRECALC_MEAN", 1));
cldnn_jit.AddConstant(MakeJitConstant("PRECALC_VARIANCE", params.mvnNormalizeVariance));
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id);
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[kernels_num - 1];
FillCLKernelData(kernel,

View File

@ -62,7 +62,7 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
auto finalKernelName = GetKernelName(orgParams);
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options);
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -56,7 +56,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params,
KernelData kd = KernelData::Default<normalize_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -45,7 +45,7 @@ KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params,
KernelData k_data = KernelData::Default<one_hot_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -36,7 +36,7 @@ KernelsData PermuteKernelBase::GetKernelsData(const Params& params, const option
auto dispatchData = SetDefault(newParams);
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
std::pair<std::string, std::string> jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 1, GetFusedPrimitiveInputsCount(params));

View File

@ -175,7 +175,7 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
KernelData kd = KernelData::Default<pooling_params>(params);
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -36,7 +36,7 @@ KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params
auto dispatchData = SetDefault(prim_params);
KernelData k_data = KernelData::Default<PyramidROIAlign_params>(params);
auto cldnn_jit = GetJitConstants(prim_params);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = k_data.kernels[0];

View File

@ -62,7 +62,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
}
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -231,7 +231,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
KernelData kd = KernelData::Default<reduce_params>(params);
auto cldnn_jit = GetJitConstants(params);
auto entry_point = GetEntryPoint(kernelName, params.layerID, options);
auto entry_point = GetEntryPoint(kernelName, params.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -80,7 +80,7 @@ KernelsData RegionYoloKernelRef::GetKernelsData(const Params& params, const opti
KernelData kd = KernelData::Default<region_yolo_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -198,7 +198,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params
dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
@ -222,7 +222,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
DispatchData dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -52,7 +52,7 @@ KernelsData ReorgYoloKernelRef::GetKernelsData(const Params& params, const optio
KernelData kd = KernelData::Default<reorg_yolo_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -221,7 +221,7 @@ KernelsData ResampleKernelBase::GetCommonKernelsData(const Params& params, const
resample_params& newParams = *static_cast<resample_params*>(kd.params.get());
auto dispatchData = SetDefault(newParams);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -37,7 +37,7 @@ KernelsData ReshapeKernelRef::GetKernelsData(const Params& params, const optiona
KernelData kd = KernelData::Default<reshape_params>(params);
reshape_params& newParams = *static_cast<reshape_params*>(kd.params.get());
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = MakeBaseParamsJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -58,7 +58,7 @@ KernelsData ReverseSequenceKernelRef::GetKernelsData(const Params& params, const
assert(params.GetType() == KernelType::REVERSE_SEQUENCE);
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -50,7 +50,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params,
KernelData kd = KernelData::Default<roi_pooling_params>(params);
auto cldnn_jit = GetJitConstants(orgParams);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -145,7 +145,7 @@ KernelsData ScatterElementsUpdateKernelRef::GetKernelsData(const Params& params,
for (int i = 0; i < 2; i++) {
auto dispatchData = SetDefault(newParams, options, (i == 1));
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
if (i == 1) {
cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true"));

View File

@ -162,7 +162,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const
// Second iter - update values specified by updates at specific index position specified by indices
for (int i = 0; i < 2; i++) {
auto dispatchData = SetDefault(newParams, options, (i == 1));
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
if (i == 1) {
cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true"));

View File

@ -269,7 +269,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
for (int i = start_with_iteration; i < 2; i++) {
auto dispatchData = SetDefault(newParams, options, (i == 1));
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
if (i == 1) {
cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true"));

View File

@ -123,7 +123,7 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
KernelData kd = KernelData::Default<select_params>(params);
select_params& newParams = *static_cast<select_params*>(kd.params.get());
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -89,7 +89,7 @@ KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const
assert(params.GetType() == KernelType::SHUFFLE_CHANNELS);
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -62,7 +62,7 @@ KernelsData SoftmaxKernelBase::GetCommonKernelsData(const Params& params, const
auto dispatchData = SetDefault(orgParams, options);
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
auto& kernel = kd.kernels[0];

View File

@ -79,7 +79,7 @@ KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, c
}
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -91,7 +91,7 @@ KernelsData SpaceToDepthKernelRef::GetKernelsData(const Params& params, const op
}
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -155,7 +155,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
assert(params.GetType() == KernelType::STRIDED_SLICE);
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -55,7 +55,7 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p
tile_params& newParams = *static_cast<tile_params*>(kd.params.get());
auto dispatchData = SetDefault(newParams, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto cldnn_jit = GetJitConstants(newParams);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);

View File

@ -63,8 +63,10 @@ public:
} // namespace
std::string KernelBaseOpenCL::GetEntryPoint(const std::string& templateName,
const std::string& layerID,
const optional_params& options) const {
const std::string& layerID,
const Params& params,
const optional_params& options,
const size_t partID) const {
std::string kernelID = layerID;
if (kernelID.empty() || !options.meaningfulKernelsNames) {
@ -74,7 +76,8 @@ std::string KernelBaseOpenCL::GetEntryPoint(const std::string& templateName,
std::replace(kernelID.begin(), kernelID.end(), '.', '_');
std::replace(kernelID.begin(), kernelID.end(), '/', '_');
kernelID += "_" + toCodeString(UniqeID());
// UniqueID = program_id + processing_index + additional weight/reorder tag
kernelID += "_" + params.uniqueID + "_" + std::to_string(partID);
return kernelID;
}

View File

@ -24,7 +24,9 @@ protected:
const std::string& kernel_name) const;
std::string GetEntryPoint(const std::string& templateName,
const std::string& layerID,
const optional_params& options) const;
const Params& params,
const optional_params& options,
const size_t partID = 0) const;
Arguments GetArgsDesc(uint32_t num_of_input,
bool use_weights,
bool use_bias,

View File

@ -119,6 +119,7 @@ bool UpdateWeightsParams(weight_bias_params& newParams,
r_params.output = newParams.weights.TransformIgnorePadding(reqLayout, dtype, groups, false);
r_params.rotate_180 = rotate;
r_params.engineInfo = newParams.engineInfo;
r_params.uniqueID = newParams.uniqueID + "_weight";
reorder_optional_params op;
KernelsData kernels_data = reorderKS.GetBestKernels(r_params, op);

View File

@ -8,7 +8,6 @@
namespace kernel_selector {
const primitive_db KernelBase::db;
thread_local size_t KernelBase::counter = 0;
std::string toString(const kernel_selector::CommonDispatchData& dispatchData) {
auto gws = dispatchData.gws;

View File

@ -56,14 +56,12 @@ public:
virtual const std::string GetName() const { return kernelName; }
static const primitive_db& get_db() { return db; }
static void ResetCounter() { counter = 0; }
protected:
static const primitive_db db;
const std::string kernelName;
static void CheckDispatchData(const std::string& kernelName, const kernel_selector::CommonDispatchData& dispatchData);
static size_t UniqeID() { return counter++; } // TODO: use interlocked
virtual Datatype GetUnitType(const base_params& params) const;
bool IsFusedPrimitiveSupported(const fused_operation_desc& fused_op) const;
@ -71,8 +69,5 @@ protected:
virtual std::vector<FusedOpType> GetSupportedFusedOps() const;
virtual JitConstants MakeFusedOpsJitConstants(const base_params &params, const std::vector<FusedOpsConfiguration> &conf) const;
virtual JitConstants MakeFusedOpsDeclsJitConstants(const base_params &params, const std::vector<FusedOpsConfiguration> &conf) const;
private:
static thread_local size_t counter;
};
} // namespace kernel_selector

View File

@ -404,6 +404,7 @@ public:
std::string layerID;
std::string forceImplementation;
EngineInfo engineInfo;
std::string uniqueID;
virtual std::string to_string() const;
virtual std::string to_cache_string_v2() const;

View File

@ -242,7 +242,7 @@ kernels_cache::kernels_cache(engine& engine) : _engine(engine) { }
kernel_id kernels_cache::set_kernel_source(
const std::shared_ptr<kernel_string>& kernel_string,
bool dump_custom_program) {
std::lock_guard<std::mutex> lock(_mutex);
// we need unique id in order to avoid conflict across topologies.
const auto kernel_num = _kernels.size() + _kernels_code.size();
kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num);

View File

@ -13,11 +13,7 @@
#include <memory>
#include <atomic>
#include <string>
#include <unordered_set>
#define CLDNN_THREADING_SEQ 0
#define CLDNN_THREADING_TBB 1
#define CLDNN_THREADING_THREADPOOL 2
#include <set>
#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
#include <tbb/task_arena.h>
@ -111,26 +107,28 @@ public:
std::shared_ptr<kernel_string> kernel_strings;
std::string id;
bool dump_custom_program;
size_t hash_value;
kernel_code(const std::shared_ptr<kernel_string>& _kernel_strings,
const std::string& _id,
bool _dump_custom_program)
: kernel_strings(_kernel_strings),
id(_id),
dump_custom_program(_dump_custom_program) {}
dump_custom_program(_dump_custom_program),
hash_value(_kernel_strings->get_hash()) {}
bool operator == (const kernel_code& c2) const {
return kernel_strings->get_hash() == c2.kernel_strings->get_hash();
bool operator == (const kernel_code& rhs) const {
return (hash_value == rhs.hash_value);
}
};
struct hash_kernel_code {
size_t operator()(const kernel_code& x) const {
return std::hash<std::string>()(x.kernel_strings->get_hash());
struct cmp_kernel_code {
bool operator()(const kernel_code& x1, const kernel_code& x2) const {
return (x1.hash_value < x2.hash_value);
}
};
using kernels_code = std::unordered_set<kernel_code, hash_kernel_code>;
using kernels_code = std::set<kernel_code, cmp_kernel_code>;
private:
static std::mutex _mutex;

View File

@ -10,17 +10,49 @@
#include "program_node.h"
#include "cldnn/runtime/engine.hpp"
#include "runtime/cldnn_itt.hpp"
#include <iostream>
#include <cmath>
#include <iomanip>
#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#endif
using namespace cldnn;
void compile_graph::run(program_impl& p) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph");
size_t order_idx = 0;
for (auto& node : p.get_processing_order()) {
node->set_unique_id(std::to_string(order_idx++));
if (!node->is_type<data>()) {
node->get_output_layout();
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
}
}
}
#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
const auto n_threads = p.get_engine().configuration().n_threads;
auto arena = std::unique_ptr<tbb::task_arena>(new tbb::task_arena());
arena->initialize(n_threads);
arena->execute([this, &p] {
auto& proc_order = p.get_processing_order();
tbb::parallel_for(tbb::blocked_range<size_t>(0, proc_order.size()), [&proc_order, &p](const tbb::blocked_range<size_t>& r) {
for (auto i = r.begin(); i != r.end(); ++i) {
auto& node = *(std::next(proc_order.begin(), i));
node->set_unique_id(std::to_string(i));
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
}
}
});
});
arena.reset();
#else
for (auto& node : p.get_processing_order()) {
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
}
}
#endif
}

View File

@ -59,6 +59,7 @@ void post_input_reorder::run(program_impl& p) {
input_layout.size,
input_layout.data_padding);
auto& reorder = add_reorder(p, input, node, current_layout);
reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
reorder.get_output_layout(false);
node->set_output_layout(previous_layout, false);
reorder.set_selected_impl(reorder.type()->choose_impl(p.get_engine(), reorder));

View File

@ -28,6 +28,7 @@ void remove_redundant_reorders::run(program_impl& p) {
return;
auto& eng = p.get_engine();
node.set_unique_id(node.get_unique_id() + "_reorder");
auto new_impl = node.type()->choose_impl(eng, node);
node.set_selected_impl(std::move(new_impl));
};

View File

@ -305,7 +305,12 @@ public:
bool need_lockable_memory() const;
std::string get_unique_id() const { return unique_id; }
void set_unique_id(std::string id) { unique_id = id; }
protected:
std::string unique_id;
std::shared_ptr<primitive> desc;
program_impl& myprog;

View File

@ -734,6 +734,7 @@ void set_params(const program_node& node, kernel_selector::params& params) {
const auto& program = node.get_program();
const auto& device_info = program.get_engine().get_device_info();
params.uniqueID = std::to_string(program.get_id()) + "_" + node.get_unique_id();
params.engineInfo.bSubGroupSupport = device_info.supports_subgroups;
params.engineInfo.bSubGroupShortSupport = device_info.supports_subgroups_short;
params.engineInfo.bSubGroupCharSupport = device_info.supports_subgroups_char;

View File

@ -94,7 +94,6 @@ program_impl::program_impl(engine& engine_ref,
tuning_cache(nullptr),
is_body_program(is_body_program) {
init_primitives();
kernel_selector::KernelBase::ResetCounter();
set_options();
pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
prepare_nodes(topology);