[GPU] Parallelize node.choose_impl in compile_graph (#6261)
This commit is contained in:
parent
79a6ad0c0f
commit
60651fe8f5
@ -16,6 +16,10 @@
|
||||
#include <utility>
|
||||
#include <string>
|
||||
|
||||
#define CLDNN_THREADING_SEQ 0
|
||||
#define CLDNN_THREADING_TBB 1
|
||||
#define CLDNN_THREADING_THREADPOOL 2
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
class stream;
|
||||
|
@ -127,7 +127,8 @@ struct kernel_string {
|
||||
|
||||
kernel_string() : str(""), jit(""), undefs(""), options(""), entry_point(""), batch_compilation(false) {}
|
||||
|
||||
std::string get_hash() { return str + jit + undefs + options + entry_point; }
|
||||
std::string get_str() const { return str + jit + undefs + options + entry_point; }
|
||||
size_t get_hash() const { return std::hash<std::string>()(get_str()); }
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -69,11 +69,10 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
|
||||
KernelData kd = KernelData::Default<activation_params>(params);
|
||||
|
||||
activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
|
||||
const std::string kernel_id = GetEntryPoint(kernelName, params.layerID, options);
|
||||
|
||||
auto dispatchData = SetDefault(newParams);
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -86,7 +86,7 @@ KernelsData ArgMaxMinKernelAxis::GetKernelsData(const Params& params, const opti
|
||||
KernelData kd = KernelData::Default<arg_max_min_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -45,7 +45,7 @@ KernelsData ArgMaxMinKernelBase::GetCommonKernelsData(const Params& params, cons
|
||||
KernelData kd = KernelData::Default<arg_max_min_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -47,7 +47,7 @@ KernelsData ArgMaxMinKernelOpt::GetKernelsData(const Params& params, const optio
|
||||
auto& kernel = kd.kernels[i];
|
||||
DispatchData dispatchData = SetDefault(newParams);
|
||||
auto cldnnJit = GetJitConstants(newParams);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
|
||||
|
||||
dispatchData.gws = { Align(size, 16), orgParams.inputs[0].Batch().v, 1 };
|
||||
|
@ -65,7 +65,7 @@ KernelsData AverageUnpoolingKernelBase::GetCommonKernelsData(const Params& param
|
||||
KernelData kd = KernelData::Default<average_unpooling_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -79,7 +79,7 @@ KernelsData BatchToSpaceKernelBase::GetCommonKernelsData(const Params& params, c
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -130,7 +130,7 @@ KernelsData BinaryConvolutionKernelBase::GetCommonKernelsData(const Params& para
|
||||
|
||||
auto finalKernelName = GetKernelName(newParams);
|
||||
auto cldnnJit = GetJitConstants(newParams, dispatchData);
|
||||
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -40,7 +40,7 @@ KernelsData BorderKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData k_data = KernelData::Default<border_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -37,7 +37,7 @@ KernelsData BroadcastKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData k_data = KernelData::Default<broadcast_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -108,7 +108,7 @@ KernelsData ConcatenationKernelBase::GetCommonKernelsData(const Params& params,
|
||||
auto& kernel = kd.kernels[i];
|
||||
DispatchData dispatchData = SetDefault(newParams);
|
||||
auto cldnnJit = GetJitConstants(newParams);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
|
||||
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
|
||||
|
||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
|
||||
|
@ -105,7 +105,7 @@ KernelsData ConcatenationKernel_fs_b_yx_fsv32::GetKernelsData(const Params& para
|
||||
auto& kernel = kd.kernels[i];
|
||||
DispatchData dispatchData = SetDefault(newParams);
|
||||
auto cldnnJit = GetJitConstants(newParams);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, optParams);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, optParams, i);
|
||||
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
|
||||
|
||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entryPoint, params.engineInfo);
|
||||
|
@ -214,7 +214,7 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params,
|
||||
|
||||
auto finalKernelName = GetKernelName(newParams);
|
||||
auto cldnnJit = GetJitConstants(newParams, dispatchData);
|
||||
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -81,7 +81,7 @@ KernelsData DeformableConvolutionKernel_bfyx_interp::GetKernelsData(const Params
|
||||
convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
|
||||
|
||||
CommonDispatchData dispatchData = SetDefault(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -56,7 +56,7 @@ KernelsData CTCGreedyDecoderKernelBase::GetCommonKernelsData(const Params& param
|
||||
KernelData kd = KernelData::Default<ctc_greedy_decoder_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -90,7 +90,7 @@ KernelsData CumSumKernelBase::GetCommonKernelsData(const Params& params,
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -40,7 +40,7 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
|
||||
// partial sum
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_1);
|
||||
cldnn_jit.AddConstant(MakeJitConstant("CUM_SUM_PARTIAL_SUM", 1));
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData.stage_1, params.engineInfo, kernelName, jit, entry_point);
|
||||
@ -51,7 +51,7 @@ KernelsData CumSumKernelPartialSum::GetMultiStageKernelsData(const Params& param
|
||||
}
|
||||
{
|
||||
// Final
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, 1);
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData.stage_final);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -122,7 +122,7 @@ KernelsData DeconvolutionKernelBase::GetKernelsData(const Params& params, const
|
||||
}
|
||||
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -61,7 +61,7 @@ KernelsData DepthToSpaceKernelBase::GetCommonKernelsData(const Params& params, c
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -326,7 +326,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv16::GetKernelsData(const Params& params, co
|
||||
KernelData kd = KernelData::Default<eltwise_params>(params);
|
||||
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -39,7 +39,7 @@ KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, con
|
||||
KernelData kd = KernelData::Default<eltwise_params>(params);
|
||||
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -613,7 +613,7 @@ KernelsData EltwiseKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
KernelData kd = KernelData::Default<eltwise_params>(params);
|
||||
eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -78,7 +78,7 @@ KernelsData EltwiseKernel_fs_b_yx_fsv32::GetKernelsData(const Params& params, co
|
||||
|
||||
std::pair<std::string, std::string> jit;
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
|
||||
try {
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
|
@ -72,7 +72,7 @@ KernelsData EltwiseKernel_mixed_byxf_and_fs_b_yx_fsv32::GetKernelsData(const Par
|
||||
|
||||
std::pair<std::string, std::string> jit;
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
|
||||
try {
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
|
@ -95,7 +95,7 @@ KernelsData EltwiseKernel_vload8::GetKernelsData(const Params& params, const opt
|
||||
|
||||
std::pair<std::string, std::string> jit;
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
|
||||
try {
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
|
@ -50,7 +50,7 @@ KernelsData EmbeddingBagKernelRef::GetKernelsData(const Params& params, const op
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -63,7 +63,7 @@ KernelsData ExtractImagePatchesKernelBase::GetCommonKernelsData(const Params& pa
|
||||
KernelData kd = KernelData::Default<extract_image_patches_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -83,7 +83,7 @@ KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params ¶ms,
|
||||
|
||||
kd.kernels.resize(1);
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
|
||||
const DispatchData dispatchData = SetDefault(newParams, autoTuneIndex);
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
||||
|
@ -268,7 +268,7 @@ KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& p
|
||||
|
||||
auto finalKernelName = GetKernelName(newParams);
|
||||
auto cldnnJit = GetJitConstants(newParams, dispatchData);
|
||||
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -209,7 +209,7 @@ KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional
|
||||
gather_params& newParams = *static_cast<gather_params*>(kd.params.get());
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -187,7 +187,7 @@ KernelsData GatherNDKernelRef::GetKernelsData(const Params& params, const option
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2, GetFusedPrimitiveInputsCount(params));
|
||||
|
@ -34,7 +34,7 @@ KernelsData GatherTreeKernelBase::GetCommonKernelsData(const Params& params,
|
||||
auto dispatchData = SetDefault(gt_params);
|
||||
auto kernel_data = KernelData::Default<gather_tree_params>(params);
|
||||
auto cldnn_jit = GetJitConstants(gt_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, gt_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
FillCLKernelData(kernel_data.kernels[0],
|
||||
dispatchData,
|
||||
|
@ -45,7 +45,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData k_data = KernelData::Default<gemm_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -137,7 +137,7 @@ KernelsData GemmKernelMMADint8::GetKernelsData(const Params& params, const optio
|
||||
KernelData k_data = KernelData::Default<gemm_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -107,7 +107,7 @@ KernelsData GemmKernelMMADslmInt8::GetKernelsData(const Params& params, const op
|
||||
KernelData k_data = KernelData::Default<gemm_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -48,6 +48,8 @@ GemmKernelBase::DispatchData GemmKernelTiledOpt::SetDefault(const gemm_params& p
|
||||
GemmKernelTiledOpt::GemmTuningData GemmKernelTiledOpt::SetTuningParams(const gemm_params& params) const {
|
||||
const auto& output = params.output;
|
||||
|
||||
GemmKernelTiledOpt::GemmTuningData tuning_data;
|
||||
|
||||
auto m_size = output.Y().v;
|
||||
auto n_size = output.X().v;
|
||||
auto k_size = params.transpose_input0 ? params.inputs[0].Y().v : params.inputs[0].X().v;
|
||||
@ -83,6 +85,7 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons
|
||||
JitConstants jit = Parent::GetJitConstants(params);
|
||||
|
||||
const auto& output = params.output;
|
||||
GemmTuningData tuning_data = SetTuningParams(params);
|
||||
|
||||
auto m_size = output.Y().v;
|
||||
auto n_size = output.X().v;
|
||||
|
@ -12,12 +12,12 @@ class GemmKernelTiledOpt : public GemmKernelBase {
|
||||
public:
|
||||
using Parent = GemmKernelBase;
|
||||
|
||||
mutable struct GemmTuningData {
|
||||
struct GemmTuningData {
|
||||
size_t simd_size = 8;
|
||||
size_t tile_m_size = 1;
|
||||
size_t tile_k_size = 1;
|
||||
size_t tile_n_size = 1;
|
||||
} tuning_data;
|
||||
};
|
||||
|
||||
GemmKernelTiledOpt() : GemmKernelBase("gemm_tiled_opt") {}
|
||||
|
||||
|
@ -38,7 +38,7 @@ KernelsData GRNKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData kd = KernelData::Default<grn_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -87,7 +87,7 @@ KernelsData LRNKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData kd = KernelData::Default<lrn_params>(params);
|
||||
|
||||
auto cldnnJit = GetJitConstants(orgParams, dispatchData);
|
||||
auto entryPoint = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
|
||||
auto fused_deps_total = GetFusedPrimitiveInputsCount(params);
|
||||
|
||||
|
@ -77,7 +77,7 @@ KernelsData LSTMEltKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
auto cldnnJit = GetJitConstants(newParams);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
|
||||
|
||||
kernel.params.workGroups.global = {out.X().v, out.Batch().v, 1};
|
||||
|
@ -47,7 +47,7 @@ KernelsData LSTMGemmKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
// TODO: reorder weights if needed
|
||||
auto& kernel = kd.kernels[0];
|
||||
auto cldnnJit = GetJitConstants(newParams);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entryPoint = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
|
||||
|
||||
kernel.params.workGroups.global = {out.X().v, out.Batch().v, 1};
|
||||
|
@ -85,7 +85,7 @@ KernelsData LSTM_DynamicInputKernelBfyxOpt::GetKernelsData(const Params& params,
|
||||
}
|
||||
|
||||
auto cldnn_jit = GetJitConstants(dlstm_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, dlstm_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, dlstm_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -57,7 +57,7 @@ KernelsData LSTM_DynamicInputKernelBase::GetCommonKernelsData(const Params& para
|
||||
KernelData k_data = KernelData::Default<lstm_dynamic_input_params>(params, 1);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -118,7 +118,7 @@ KernelsData LSTM_DynamicTimeloopKernelBase::GetCommonKernelsData(const Params& p
|
||||
KernelData k_data = KernelData::Default<lstm_dynamic_timeloop_params>(params, 1);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(org_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, org_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, org_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -65,7 +65,7 @@ KernelsData MaxUnpoolingKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData kd = KernelData::Default<max_unpooling_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -182,11 +182,12 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
|
||||
KernelData kd = KernelData::Default<mvn_params>(params, kernels_num);
|
||||
|
||||
auto finalKernelName = GetKernelName(orgParams);
|
||||
size_t entry_part_id = 0;
|
||||
{
|
||||
// Mean first stage
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1);
|
||||
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_1", 1));
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
|
||||
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel,
|
||||
@ -210,7 +211,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
|
||||
// Mean second stage
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2);
|
||||
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MEAN_2", 1));
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
|
||||
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[1];
|
||||
FillCLKernelData(kernel,
|
||||
@ -234,7 +235,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
|
||||
// Variance first stage
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_1);
|
||||
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_1", 1));
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
|
||||
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[2];
|
||||
FillCLKernelData(kernel,
|
||||
@ -257,7 +258,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
|
||||
// Variance second stage
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData.stage_2);
|
||||
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_VAR_2", 1));
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id++);
|
||||
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[3];
|
||||
FillCLKernelData(kernel,
|
||||
@ -282,7 +283,7 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par
|
||||
cldnn_jit.AddConstant(MakeJitConstant("MVN_KERNEL_MAIN", 1));
|
||||
cldnn_jit.AddConstant(MakeJitConstant("PRECALC_MEAN", 1));
|
||||
cldnn_jit.AddConstant(MakeJitConstant("PRECALC_VARIANCE", params.mvnNormalizeVariance));
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options, entry_part_id);
|
||||
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[kernels_num - 1];
|
||||
FillCLKernelData(kernel,
|
||||
|
@ -62,7 +62,7 @@ KernelsData MVNKernelBase::GetCommonKernelsData(const Params& params,
|
||||
|
||||
auto finalKernelName = GetKernelName(orgParams);
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(finalKernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(finalKernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -56,7 +56,7 @@ KernelsData NormalizeKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData kd = KernelData::Default<normalize_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -45,7 +45,7 @@ KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData k_data = KernelData::Default<one_hot_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -36,7 +36,7 @@ KernelsData PermuteKernelBase::GetKernelsData(const Params& params, const option
|
||||
auto dispatchData = SetDefault(newParams);
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
std::pair<std::string, std::string> jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
auto& kernel = kd.kernels[0];
|
||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, "", false, false, 1, GetFusedPrimitiveInputsCount(params));
|
||||
|
@ -175,7 +175,7 @@ KernelsData PoolingKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData kd = KernelData::Default<pooling_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -36,7 +36,7 @@ KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params
|
||||
auto dispatchData = SetDefault(prim_params);
|
||||
KernelData k_data = KernelData::Default<PyramidROIAlign_params>(params);
|
||||
auto cldnn_jit = GetJitConstants(prim_params);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = k_data.kernels[0];
|
||||
|
@ -62,7 +62,7 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -231,7 +231,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
|
||||
KernelData kd = KernelData::Default<reduce_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(params);
|
||||
auto entry_point = GetEntryPoint(kernelName, params.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, params.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -80,7 +80,7 @@ KernelsData RegionYoloKernelRef::GetKernelsData(const Params& params, const opti
|
||||
KernelData kd = KernelData::Default<region_yolo_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -198,7 +198,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_weights_params
|
||||
|
||||
dispatchData = SetDefault(newParams);
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
@ -222,7 +222,7 @@ KernelsData ReorderKernelBase::GetCommonKernelsData(const reorder_params& params
|
||||
|
||||
DispatchData dispatchData = SetDefault(newParams);
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -52,7 +52,7 @@ KernelsData ReorgYoloKernelRef::GetKernelsData(const Params& params, const optio
|
||||
KernelData kd = KernelData::Default<reorg_yolo_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -221,7 +221,7 @@ KernelsData ResampleKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
resample_params& newParams = *static_cast<resample_params*>(kd.params.get());
|
||||
|
||||
auto dispatchData = SetDefault(newParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -37,7 +37,7 @@ KernelsData ReshapeKernelRef::GetKernelsData(const Params& params, const optiona
|
||||
KernelData kd = KernelData::Default<reshape_params>(params);
|
||||
reshape_params& newParams = *static_cast<reshape_params*>(kd.params.get());
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = MakeBaseParamsJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -58,7 +58,7 @@ KernelsData ReverseSequenceKernelRef::GetKernelsData(const Params& params, const
|
||||
assert(params.GetType() == KernelType::REVERSE_SEQUENCE);
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -50,7 +50,7 @@ KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params,
|
||||
KernelData kd = KernelData::Default<roi_pooling_params>(params);
|
||||
|
||||
auto cldnn_jit = GetJitConstants(orgParams);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -145,7 +145,7 @@ KernelsData ScatterElementsUpdateKernelRef::GetKernelsData(const Params& params,
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto dispatchData = SetDefault(newParams, options, (i == 1));
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
|
||||
|
||||
if (i == 1) {
|
||||
cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true"));
|
||||
|
@ -162,7 +162,7 @@ KernelsData ScatterNDUpdateKernelRef::GetKernelsData(const Params& params, const
|
||||
// Second iter - update values specified by updates at specific index position specified by indices
|
||||
for (int i = 0; i < 2; i++) {
|
||||
auto dispatchData = SetDefault(newParams, options, (i == 1));
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
|
||||
|
||||
if (i == 1) {
|
||||
cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true"));
|
||||
|
@ -269,7 +269,7 @@ KernelsData ScatterUpdateKernelRef::GetKernelsData(const Params& params, const o
|
||||
|
||||
for (int i = start_with_iteration; i < 2; i++) {
|
||||
auto dispatchData = SetDefault(newParams, options, (i == 1));
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options, i);
|
||||
|
||||
if (i == 1) {
|
||||
cldnn_jit.AddConstant(MakeJitConstant("IS_SECOND_ITER", "true"));
|
||||
|
@ -123,7 +123,7 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
|
||||
KernelData kd = KernelData::Default<select_params>(params);
|
||||
select_params& newParams = *static_cast<select_params*>(kd.params.get());
|
||||
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -89,7 +89,7 @@ KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const
|
||||
assert(params.GetType() == KernelType::SHUFFLE_CHANNELS);
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -62,7 +62,7 @@ KernelsData SoftmaxKernelBase::GetCommonKernelsData(const Params& params, const
|
||||
|
||||
auto dispatchData = SetDefault(orgParams, options);
|
||||
auto cldnn_jit = GetJitConstants(orgParams, dispatchData);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, params, options);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
auto& kernel = kd.kernels[0];
|
||||
|
@ -79,7 +79,7 @@ KernelsData SpaceToBatchKernelBase::GetCommonKernelsData(const Params& params, c
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -91,7 +91,7 @@ KernelsData SpaceToDepthKernelRef::GetKernelsData(const Params& params, const op
|
||||
}
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -155,7 +155,7 @@ KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const op
|
||||
assert(params.GetType() == KernelType::STRIDED_SLICE);
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -55,7 +55,7 @@ KernelsData TileKernelRef::GetKernelsData(const Params& params, const optional_p
|
||||
tile_params& newParams = *static_cast<tile_params*>(kd.params.get());
|
||||
|
||||
auto dispatchData = SetDefault(newParams, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
|
||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||
auto cldnn_jit = GetJitConstants(newParams);
|
||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||
|
||||
|
@ -63,8 +63,10 @@ public:
|
||||
} // namespace
|
||||
|
||||
std::string KernelBaseOpenCL::GetEntryPoint(const std::string& templateName,
|
||||
const std::string& layerID,
|
||||
const optional_params& options) const {
|
||||
const std::string& layerID,
|
||||
const Params& params,
|
||||
const optional_params& options,
|
||||
const size_t partID) const {
|
||||
std::string kernelID = layerID;
|
||||
|
||||
if (kernelID.empty() || !options.meaningfulKernelsNames) {
|
||||
@ -74,7 +76,8 @@ std::string KernelBaseOpenCL::GetEntryPoint(const std::string& templateName,
|
||||
std::replace(kernelID.begin(), kernelID.end(), '.', '_');
|
||||
std::replace(kernelID.begin(), kernelID.end(), '/', '_');
|
||||
|
||||
kernelID += "_" + toCodeString(UniqeID());
|
||||
// UniqueID = program_id + processing_index + additional weight/reorder tag
|
||||
kernelID += "_" + params.uniqueID + "_" + std::to_string(partID);
|
||||
|
||||
return kernelID;
|
||||
}
|
||||
|
@ -24,7 +24,9 @@ protected:
|
||||
const std::string& kernel_name) const;
|
||||
std::string GetEntryPoint(const std::string& templateName,
|
||||
const std::string& layerID,
|
||||
const optional_params& options) const;
|
||||
const Params& params,
|
||||
const optional_params& options,
|
||||
const size_t partID = 0) const;
|
||||
Arguments GetArgsDesc(uint32_t num_of_input,
|
||||
bool use_weights,
|
||||
bool use_bias,
|
||||
|
@ -119,6 +119,7 @@ bool UpdateWeightsParams(weight_bias_params& newParams,
|
||||
r_params.output = newParams.weights.TransformIgnorePadding(reqLayout, dtype, groups, false);
|
||||
r_params.rotate_180 = rotate;
|
||||
r_params.engineInfo = newParams.engineInfo;
|
||||
r_params.uniqueID = newParams.uniqueID + "_weight";
|
||||
|
||||
reorder_optional_params op;
|
||||
KernelsData kernels_data = reorderKS.GetBestKernels(r_params, op);
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
namespace kernel_selector {
|
||||
const primitive_db KernelBase::db;
|
||||
thread_local size_t KernelBase::counter = 0;
|
||||
|
||||
std::string toString(const kernel_selector::CommonDispatchData& dispatchData) {
|
||||
auto gws = dispatchData.gws;
|
||||
|
@ -56,14 +56,12 @@ public:
|
||||
virtual const std::string GetName() const { return kernelName; }
|
||||
|
||||
static const primitive_db& get_db() { return db; }
|
||||
static void ResetCounter() { counter = 0; }
|
||||
|
||||
protected:
|
||||
static const primitive_db db;
|
||||
const std::string kernelName;
|
||||
|
||||
static void CheckDispatchData(const std::string& kernelName, const kernel_selector::CommonDispatchData& dispatchData);
|
||||
static size_t UniqeID() { return counter++; } // TODO: use interlocked
|
||||
virtual Datatype GetUnitType(const base_params& params) const;
|
||||
|
||||
bool IsFusedPrimitiveSupported(const fused_operation_desc& fused_op) const;
|
||||
@ -71,8 +69,5 @@ protected:
|
||||
virtual std::vector<FusedOpType> GetSupportedFusedOps() const;
|
||||
virtual JitConstants MakeFusedOpsJitConstants(const base_params ¶ms, const std::vector<FusedOpsConfiguration> &conf) const;
|
||||
virtual JitConstants MakeFusedOpsDeclsJitConstants(const base_params ¶ms, const std::vector<FusedOpsConfiguration> &conf) const;
|
||||
|
||||
private:
|
||||
static thread_local size_t counter;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -404,6 +404,7 @@ public:
|
||||
std::string layerID;
|
||||
std::string forceImplementation;
|
||||
EngineInfo engineInfo;
|
||||
std::string uniqueID;
|
||||
|
||||
virtual std::string to_string() const;
|
||||
virtual std::string to_cache_string_v2() const;
|
||||
|
@ -242,7 +242,7 @@ kernels_cache::kernels_cache(engine& engine) : _engine(engine) { }
|
||||
kernel_id kernels_cache::set_kernel_source(
|
||||
const std::shared_ptr<kernel_string>& kernel_string,
|
||||
bool dump_custom_program) {
|
||||
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
// we need unique id in order to avoid conflict across topologies.
|
||||
const auto kernel_num = _kernels.size() + _kernels_code.size();
|
||||
kernel_id id = kernel_string->entry_point + "_" + std::to_string(kernel_num);
|
||||
|
@ -13,11 +13,7 @@
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
#define CLDNN_THREADING_SEQ 0
|
||||
#define CLDNN_THREADING_TBB 1
|
||||
#define CLDNN_THREADING_THREADPOOL 2
|
||||
#include <set>
|
||||
|
||||
#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
|
||||
#include <tbb/task_arena.h>
|
||||
@ -111,26 +107,28 @@ public:
|
||||
std::shared_ptr<kernel_string> kernel_strings;
|
||||
std::string id;
|
||||
bool dump_custom_program;
|
||||
size_t hash_value;
|
||||
|
||||
kernel_code(const std::shared_ptr<kernel_string>& _kernel_strings,
|
||||
const std::string& _id,
|
||||
bool _dump_custom_program)
|
||||
: kernel_strings(_kernel_strings),
|
||||
id(_id),
|
||||
dump_custom_program(_dump_custom_program) {}
|
||||
dump_custom_program(_dump_custom_program),
|
||||
hash_value(_kernel_strings->get_hash()) {}
|
||||
|
||||
bool operator == (const kernel_code& c2) const {
|
||||
return kernel_strings->get_hash() == c2.kernel_strings->get_hash();
|
||||
bool operator == (const kernel_code& rhs) const {
|
||||
return (hash_value == rhs.hash_value);
|
||||
}
|
||||
};
|
||||
|
||||
struct hash_kernel_code {
|
||||
size_t operator()(const kernel_code& x) const {
|
||||
return std::hash<std::string>()(x.kernel_strings->get_hash());
|
||||
struct cmp_kernel_code {
|
||||
bool operator()(const kernel_code& x1, const kernel_code& x2) const {
|
||||
return (x1.hash_value < x2.hash_value);
|
||||
}
|
||||
};
|
||||
|
||||
using kernels_code = std::unordered_set<kernel_code, hash_kernel_code>;
|
||||
using kernels_code = std::set<kernel_code, cmp_kernel_code>;
|
||||
|
||||
private:
|
||||
static std::mutex _mutex;
|
||||
|
@ -10,17 +10,49 @@
|
||||
#include "program_node.h"
|
||||
#include "cldnn/runtime/engine.hpp"
|
||||
#include "runtime/cldnn_itt.hpp"
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
|
||||
#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/blocked_range.h>
|
||||
#endif
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
void compile_graph::run(program_impl& p) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph");
|
||||
size_t order_idx = 0;
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
node->set_unique_id(std::to_string(order_idx++));
|
||||
if (!node->is_type<data>()) {
|
||||
node->get_output_layout();
|
||||
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
|
||||
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if (CLDNN_THREADING == CLDNN_THREADING_TBB)
|
||||
const auto n_threads = p.get_engine().configuration().n_threads;
|
||||
auto arena = std::unique_ptr<tbb::task_arena>(new tbb::task_arena());
|
||||
arena->initialize(n_threads);
|
||||
arena->execute([this, &p] {
|
||||
auto& proc_order = p.get_processing_order();
|
||||
tbb::parallel_for(tbb::blocked_range<size_t>(0, proc_order.size()), [&proc_order, &p](const tbb::blocked_range<size_t>& r) {
|
||||
for (auto i = r.begin(); i != r.end(); ++i) {
|
||||
auto& node = *(std::next(proc_order.begin(), i));
|
||||
node->set_unique_id(std::to_string(i));
|
||||
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
|
||||
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
arena.reset();
|
||||
#else
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
|
||||
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -59,6 +59,7 @@ void post_input_reorder::run(program_impl& p) {
|
||||
input_layout.size,
|
||||
input_layout.data_padding);
|
||||
auto& reorder = add_reorder(p, input, node, current_layout);
|
||||
reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
|
||||
reorder.get_output_layout(false);
|
||||
node->set_output_layout(previous_layout, false);
|
||||
reorder.set_selected_impl(reorder.type()->choose_impl(p.get_engine(), reorder));
|
||||
|
@ -28,6 +28,7 @@ void remove_redundant_reorders::run(program_impl& p) {
|
||||
return;
|
||||
|
||||
auto& eng = p.get_engine();
|
||||
node.set_unique_id(node.get_unique_id() + "_reorder");
|
||||
auto new_impl = node.type()->choose_impl(eng, node);
|
||||
node.set_selected_impl(std::move(new_impl));
|
||||
};
|
||||
|
@ -305,7 +305,12 @@ public:
|
||||
|
||||
bool need_lockable_memory() const;
|
||||
|
||||
std::string get_unique_id() const { return unique_id; }
|
||||
void set_unique_id(std::string id) { unique_id = id; }
|
||||
|
||||
protected:
|
||||
std::string unique_id;
|
||||
|
||||
std::shared_ptr<primitive> desc;
|
||||
program_impl& myprog;
|
||||
|
||||
|
@ -734,6 +734,7 @@ void set_params(const program_node& node, kernel_selector::params& params) {
|
||||
const auto& program = node.get_program();
|
||||
const auto& device_info = program.get_engine().get_device_info();
|
||||
|
||||
params.uniqueID = std::to_string(program.get_id()) + "_" + node.get_unique_id();
|
||||
params.engineInfo.bSubGroupSupport = device_info.supports_subgroups;
|
||||
params.engineInfo.bSubGroupShortSupport = device_info.supports_subgroups_short;
|
||||
params.engineInfo.bSubGroupCharSupport = device_info.supports_subgroups_char;
|
||||
|
@ -94,7 +94,6 @@ program_impl::program_impl(engine& engine_ref,
|
||||
tuning_cache(nullptr),
|
||||
is_body_program(is_body_program) {
|
||||
init_primitives();
|
||||
kernel_selector::KernelBase::ResetCounter();
|
||||
set_options();
|
||||
pm = std::unique_ptr<pass_manager>(new pass_manager(*this));
|
||||
prepare_nodes(topology);
|
||||
|
Loading…
Reference in New Issue
Block a user