[GPU] Added shape agnostic ref kernel for Reduce and Quantize (#15183)
* [GPU] Added shape agnostic ref kernel for quantize * [GPU] Added shape agnostic ref kernel for Reduce
This commit is contained in:
parent
3bbaa16d90
commit
8e073819c3
@ -43,6 +43,8 @@ protected:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
args.outputs = { instance.output_memory_ptr() };
|
args.outputs = { instance.output_memory_ptr() };
|
||||||
|
args.shape_info = instance.shape_info_memory_ptr();
|
||||||
|
|
||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,122 +83,74 @@ public:
|
|||||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||||
quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
|
quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
|
||||||
}
|
}
|
||||||
const auto& output_layout = impl_param.get_output_layout();
|
|
||||||
quantize_params.outputs = { convert_data_tensor(output_layout) };
|
|
||||||
|
|
||||||
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
|
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
|
||||||
auto best_kernel = kernel_selector.get_best_kernel(quantize_params, quantize_optional_params);
|
auto best_kernel = kernel_selector.get_best_kernel(quantize_params, quantize_optional_params);
|
||||||
|
|
||||||
return make_unique<quantize_impl>(best_kernel);
|
return make_unique<quantize_impl>(best_kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||||
|
auto quantize_params = get_default_params<kernel_selector::quantize_params>(impl_param);
|
||||||
|
const auto& output_layout = impl_param.get_output_layout();
|
||||||
|
quantize_params.packed_binary_output = output_layout.data_type == data_types::bin;
|
||||||
|
(_kernel_data.update_dispatch_data_func)(quantize_params, _kernel_data);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
attach_quantize_impl::attach_quantize_impl() {
|
attach_quantize_impl::attach_quantize_impl() {
|
||||||
implementation_map<quantize>::add(impl_types::ocl, quantize_impl::create, {
|
std::set<implementation_map<quantize>::key_type> keys;
|
||||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::fs_b_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::u8, format::fs_b_yx_fsv32),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
auto types = {
|
||||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
data_types::f16,
|
||||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
data_types::f32,
|
||||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
data_types::i8,
|
||||||
|
data_types::u8
|
||||||
|
};
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::byxf),
|
auto formats = {
|
||||||
std::make_tuple(data_types::f16, format::byxf),
|
format::bfyx,
|
||||||
std::make_tuple(data_types::u8, format::byxf),
|
format::byxf,
|
||||||
std::make_tuple(data_types::i8, format::byxf),
|
format::b_fs_yx_fsv4,
|
||||||
|
format::b_fs_yx_fsv16,
|
||||||
|
format::b_fs_yx_fsv32,
|
||||||
|
format::fs_b_yx_fsv32,
|
||||||
|
format::bs_fs_yx_bsv16_fsv16,
|
||||||
|
format::bs_fs_yx_bsv16_fsv32,
|
||||||
|
format::bs_fs_yx_bsv32_fsv16,
|
||||||
|
format::bs_fs_yx_bsv32_fsv32,
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
format::bfzyx,
|
||||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
format::b_fs_zyx_fsv16,
|
||||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
format::b_fs_zyx_fsv32,
|
||||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
format::bs_fs_zyx_bsv16_fsv16,
|
||||||
|
format::bs_fs_zyx_bsv16_fsv32,
|
||||||
|
format::bs_fs_zyx_bsv32_fsv16,
|
||||||
|
format::bs_fs_zyx_bsv32_fsv32,
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
format::bfwzyx
|
||||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
};
|
||||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
|
auto dyn_formats = {
|
||||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
|
format::bfyx,
|
||||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
format::bfzyx,
|
||||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
format::bfwzyx
|
||||||
|
};
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
for (const auto type : types) {
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
for (const auto format : formats) {
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
keys.emplace(type, format);
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv32),
|
keys.emplace(data_types::f16, format::yxfb);
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv32),
|
keys.emplace(data_types::f32, format::yxfb);
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv32),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
implementation_map<quantize>::add(impl_types::ocl, shape_types::static_shape, quantize_impl::create, keys);
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv32),
|
implementation_map<quantize>::add(impl_types::ocl, shape_types::dynamic_shape, quantize_impl::create, types, dyn_formats);
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv32),
|
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv32),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bfyx),
|
|
||||||
std::make_tuple(data_types::f16, format::bfyx),
|
|
||||||
std::make_tuple(data_types::i32, format::bfyx),
|
|
||||||
std::make_tuple(data_types::u8, format::bfyx),
|
|
||||||
std::make_tuple(data_types::i8, format::bfyx),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::byxf),
|
|
||||||
std::make_tuple(data_types::f16, format::byxf),
|
|
||||||
std::make_tuple(data_types::i32, format::byxf),
|
|
||||||
std::make_tuple(data_types::u8, format::byxf),
|
|
||||||
std::make_tuple(data_types::i8, format::byxf),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::yxfb),
|
|
||||||
std::make_tuple(data_types::f16, format::yxfb),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bfzyx),
|
|
||||||
std::make_tuple(data_types::f16, format::bfzyx),
|
|
||||||
std::make_tuple(data_types::i32, format::bfzyx),
|
|
||||||
std::make_tuple(data_types::u8, format::bfzyx),
|
|
||||||
std::make_tuple(data_types::i8, format::bfzyx),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
|
||||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
|
||||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
|
||||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
|
||||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
|
||||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
|
||||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
|
||||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
|
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
|
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv32_fsv32),
|
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv32_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv32_fsv32),
|
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv32_fsv32),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
|
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
|
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
|
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv32_fsv16),
|
|
||||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv32_fsv16),
|
|
||||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv32_fsv16),
|
|
||||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv32_fsv16),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
@ -89,48 +89,50 @@ struct reduce_impl : typed_primitive_impl_ocl<reduce> {
|
|||||||
|
|
||||||
return {params, optional_params};
|
return {params, optional_params};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||||
|
auto kernel_params = get_kernel_params(impl_param);
|
||||||
|
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
attach_reduce_impl::attach_reduce_impl() {
|
attach_reduce_impl::attach_reduce_impl() {
|
||||||
implementation_map<reduce>::add(impl_types::ocl, typed_primitive_impl_ocl<reduce>::create<reduce_impl>, {
|
auto types = {
|
||||||
std::make_tuple(data_types::f32, format::bfyx),
|
data_types::f32,
|
||||||
std::make_tuple(data_types::f16, format::bfyx),
|
data_types::f16,
|
||||||
std::make_tuple(data_types::i32, format::bfyx),
|
data_types::i32,
|
||||||
std::make_tuple(data_types::i8, format::bfyx),
|
data_types::i8,
|
||||||
std::make_tuple(data_types::u8, format::bfyx),
|
data_types::u8
|
||||||
|
};
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bfzyx),
|
auto static_formats = {
|
||||||
std::make_tuple(data_types::f16, format::bfzyx),
|
format::bfyx,
|
||||||
std::make_tuple(data_types::i32, format::bfzyx),
|
format::bfzyx,
|
||||||
std::make_tuple(data_types::i8, format::bfzyx),
|
format::bfwzyx,
|
||||||
std::make_tuple(data_types::u8, format::bfzyx),
|
format::b_fs_yx_fsv16,
|
||||||
|
format::b_fs_yx_fsv32,
|
||||||
|
format::b_fs_zyx_fsv16
|
||||||
|
};
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
implementation_map<reduce>::add(impl_types::ocl,
|
||||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
shape_types::static_shape,
|
||||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
typed_primitive_impl_ocl<reduce>::create<reduce_impl>,
|
||||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
types,
|
||||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
static_formats);
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
auto dyn_formats = {
|
||||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
format::bfyx,
|
||||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
|
format::bfzyx,
|
||||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
format::bfwzyx
|
||||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
};
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
implementation_map<reduce>::add(impl_types::ocl,
|
||||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
shape_types::dynamic_shape,
|
||||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
typed_primitive_impl_ocl<reduce>::create<reduce_impl>,
|
||||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
types,
|
||||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
dyn_formats);
|
||||||
|
|
||||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
|
||||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
@ -8,12 +8,14 @@
|
|||||||
REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
|
REQD_SUB_GROUP_SIZE(SUB_GROUP_SIZE)
|
||||||
#endif
|
#endif
|
||||||
__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2)))
|
__attribute__((reqd_work_group_size(LWS_0, LWS_1, LWS_2)))
|
||||||
KERNEL(quantize_ref)(const __global INPUT0_TYPE* input,
|
KERNEL(quantize_ref)(
|
||||||
const __global INPUT1_TYPE* input_low,
|
OPTIONAL_SHAPE_INFO_ARG
|
||||||
const __global INPUT2_TYPE* input_high,
|
const __global INPUT0_TYPE* input,
|
||||||
const __global INPUT3_TYPE* output_low,
|
const __global INPUT1_TYPE* input_low,
|
||||||
const __global INPUT4_TYPE* output_high,
|
const __global INPUT2_TYPE* input_high,
|
||||||
__global OUTPUT_TYPE* output)
|
const __global INPUT3_TYPE* output_low,
|
||||||
|
const __global INPUT4_TYPE* output_high,
|
||||||
|
__global OUTPUT_TYPE* output)
|
||||||
{
|
{
|
||||||
const int b = get_global_id(0);
|
const int b = get_global_id(0);
|
||||||
const int of = get_global_id(1);
|
const int of = get_global_id(1);
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
#include "include/batch_headers/fetch_data.cl"
|
#include "include/batch_headers/fetch_data.cl"
|
||||||
|
|
||||||
inline uint FUNC(calc_linear_offset)(uint b, uint f, uint w, uint z, uint y, uint x)
|
inline uint FUNC(calc_linear_offset)(OPTIONAL_SHAPE_INFO_ARG uint b, uint f, uint w, uint z, uint y, uint x)
|
||||||
{
|
{
|
||||||
uint index = b * OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W * OUTPUT_FEATURE_NUM +
|
uint index = b * OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W * OUTPUT_FEATURE_NUM +
|
||||||
f * OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W +
|
f * OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z * OUTPUT_SIZE_W +
|
||||||
@ -17,6 +17,7 @@ inline uint FUNC(calc_linear_offset)(uint b, uint f, uint w, uint z, uint y, uin
|
|||||||
}
|
}
|
||||||
|
|
||||||
KERNEL(reduce_ref)(
|
KERNEL(reduce_ref)(
|
||||||
|
OPTIONAL_SHAPE_INFO_ARG
|
||||||
const __global INPUT0_TYPE* data,
|
const __global INPUT0_TYPE* data,
|
||||||
__global OUTPUT_TYPE* output
|
__global OUTPUT_TYPE* output
|
||||||
#if HAS_FUSED_OPS_DECLS
|
#if HAS_FUSED_OPS_DECLS
|
||||||
@ -51,7 +52,7 @@ KERNEL(reduce_ref)(
|
|||||||
const uint out_idx = OUTPUT_GET_INDEX(b, f, w, z, y, x);
|
const uint out_idx = OUTPUT_GET_INDEX(b, f, w, z, y, x);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const uint linear_idx = FUNC_CALL(calc_linear_offset)(b, f, w, z, y, x);
|
const uint linear_idx = FUNC_CALL(calc_linear_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, w, z, y, x);
|
||||||
if (linear_idx >= COMPUTATIONAL_OPERATIONS_NUMBER)
|
if (linear_idx >= COMPUTATIONAL_OPERATIONS_NUMBER)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -61,17 +61,25 @@ KernelsData QuantizeKernelBase::GetKernelsData(const Params& params, const optio
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto dispatchData = SetDefault(newParams, options);
|
auto dispatchData = SetDefault(newParams);
|
||||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||||
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
auto cldnn_jit = GetJitConstants(newParams, dispatchData);
|
||||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||||
|
|
||||||
|
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||||
|
const auto& prim_params = static_cast<const quantize_params&>(params);
|
||||||
|
auto dispatchData = SetDefault(prim_params);
|
||||||
|
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||||
|
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||||
|
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||||
|
};
|
||||||
|
|
||||||
auto& kernel = kd.kernels[0];
|
auto& kernel = kd.kernels[0];
|
||||||
|
|
||||||
kernel.params.workGroups.global = dispatchData.gws;
|
kernel.params.workGroups.global = dispatchData.gws;
|
||||||
kernel.params.workGroups.local = dispatchData.lws;
|
kernel.params.workGroups.local = dispatchData.lws;
|
||||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, EXE_MODE_DEFAULT);
|
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, EXE_MODE_DEFAULT);
|
||||||
kernel.params.arguments = GetArgsDesc(static_cast<int>(newParams.inputs.size()), false, false);
|
kernel.params.arguments = GetArgsDesc(static_cast<int>(newParams.inputs.size()), false, false, 0, 1, newParams.outputs[0].is_dynamic());
|
||||||
|
|
||||||
return {kd};
|
return {kd};
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,6 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const;
|
virtual JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const;
|
||||||
virtual CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const = 0;
|
virtual CommonDispatchData SetDefault(const quantize_params& params) const = 0;
|
||||||
};
|
};
|
||||||
} // namespace kernel_selector
|
} // namespace kernel_selector
|
||||||
|
@ -25,10 +25,11 @@ ParamsKey QuantizeKernelRef::GetSupportedKey() const {
|
|||||||
k.EnableBatching();
|
k.EnableBatching();
|
||||||
k.EnableDifferentTypes();
|
k.EnableDifferentTypes();
|
||||||
k.EnableQuantizePackedBinaryOutput();
|
k.EnableQuantizePackedBinaryOutput();
|
||||||
|
k.EnableDynamicShapesSupport();
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params, const optional_params&) const {
|
CommonDispatchData QuantizeKernelRef::SetDefault(const quantize_params& params) const {
|
||||||
CommonDispatchData dispatchData;
|
CommonDispatchData dispatchData;
|
||||||
|
|
||||||
auto output = params.outputs[0];
|
auto output = params.outputs[0];
|
||||||
|
@ -16,7 +16,7 @@ public:
|
|||||||
virtual ~QuantizeKernelRef() {}
|
virtual ~QuantizeKernelRef() {}
|
||||||
|
|
||||||
JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override;
|
JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override;
|
||||||
CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
|
CommonDispatchData SetDefault(const quantize_params& params) const override;
|
||||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||||
bool Validate(const Params& p, const optional_params& o) const override;
|
bool Validate(const Params& p, const optional_params& o) const override;
|
||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
|
@ -31,7 +31,7 @@ ParamsKey QuantizeKernelScaleShift::GetSupportedKey() const {
|
|||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& params, const optional_params&) const {
|
CommonDispatchData QuantizeKernelScaleShift::SetDefault(const quantize_params& params) const {
|
||||||
CommonDispatchData dispatchData;
|
CommonDispatchData dispatchData;
|
||||||
|
|
||||||
auto output = params.outputs[0];
|
auto output = params.outputs[0];
|
||||||
|
@ -16,7 +16,7 @@ public:
|
|||||||
virtual ~QuantizeKernelScaleShift() {}
|
virtual ~QuantizeKernelScaleShift() {}
|
||||||
|
|
||||||
JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override;
|
JitConstants GetJitConstants(const quantize_params& params, const CommonDispatchData& dispatchData) const override;
|
||||||
CommonDispatchData SetDefault(const quantize_params& params, const optional_params&) const override;
|
CommonDispatchData SetDefault(const quantize_params& params) const override;
|
||||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||||
bool Validate(const Params& p, const optional_params& o) const override;
|
bool Validate(const Params& p, const optional_params& o) const override;
|
||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
|
@ -108,7 +108,7 @@ DeviceFeaturesKey ReduceKernel_b_fs_yx_fsv16::get_required_device_features_key(c
|
|||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommonDispatchData ReduceKernel_b_fs_yx_fsv16::SetDefault(const reduce_params& params, const optional_params&) const {
|
CommonDispatchData ReduceKernel_b_fs_yx_fsv16::SetDefault(const reduce_params& params) const {
|
||||||
CommonDispatchData dispatchData;
|
CommonDispatchData dispatchData;
|
||||||
|
|
||||||
auto in_dims = calc_in_dims(params);
|
auto in_dims = calc_in_dims(params);
|
||||||
|
@ -12,7 +12,7 @@ class ReduceKernel_b_fs_yx_fsv16 : public ReduceKernelBase {
|
|||||||
public:
|
public:
|
||||||
ReduceKernel_b_fs_yx_fsv16() : ReduceKernelBase("reduce_gpu_b_fs_yx_fsv16") {}
|
ReduceKernel_b_fs_yx_fsv16() : ReduceKernelBase("reduce_gpu_b_fs_yx_fsv16") {}
|
||||||
virtual ~ReduceKernel_b_fs_yx_fsv16() {}
|
virtual ~ReduceKernel_b_fs_yx_fsv16() {}
|
||||||
CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
|
CommonDispatchData SetDefault(const reduce_params& params) const override;
|
||||||
JitConstants GetJitConstants(const reduce_params& params) const override;
|
JitConstants GetJitConstants(const reduce_params& params) const override;
|
||||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||||
|
@ -28,7 +28,29 @@ bool ReduceKernelBase::Validate(const Params& p, const optional_params&) const {
|
|||||||
JitConstants ReduceKernelBase::GetJitConstants(const reduce_params& params) const {
|
JitConstants ReduceKernelBase::GetJitConstants(const reduce_params& params) const {
|
||||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||||
|
|
||||||
jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", params.outputs[0].LogicalSize()));
|
const auto& output = params.outputs[0];
|
||||||
|
if (output.is_dynamic()) {
|
||||||
|
size_t output_offset = (1 + GetFusedPrimitiveInputsCount(params)) * 6;
|
||||||
|
auto x = toCodeString(output.X(), output_offset + 5);
|
||||||
|
auto y = toCodeString(output.Y(), output_offset + 4);
|
||||||
|
auto z = toCodeString(output.Z(), output_offset + 3);
|
||||||
|
auto w = toCodeString(output.W(), output_offset + 2);
|
||||||
|
auto f = toCodeString(output.Feature(), output_offset + 1);
|
||||||
|
auto b = toCodeString(output.Batch(), output_offset);
|
||||||
|
|
||||||
|
auto multiply = [](std::vector<std::string> dims) -> std::string {
|
||||||
|
std::string res = "(";
|
||||||
|
for (size_t i = 0; i < dims.size() - 1; ++i) {
|
||||||
|
res += dims[i] + "*";
|
||||||
|
}
|
||||||
|
res += dims.back() + ")";
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", multiply({x, y, z, w, f, b})));
|
||||||
|
} else {
|
||||||
|
jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", params.outputs[0].LogicalSize()));
|
||||||
|
}
|
||||||
|
|
||||||
jit.AddConstant(MakeJitConstant("REDUCE_" + toString(params.reduceMode) + "_MODE", 1));
|
jit.AddConstant(MakeJitConstant("REDUCE_" + toString(params.reduceMode) + "_MODE", 1));
|
||||||
jit.AddConstant(MakeJitConstant("KEEP_DIMS", params.keepDims));
|
jit.AddConstant(MakeJitConstant("KEEP_DIMS", params.keepDims));
|
||||||
|
|
||||||
@ -226,7 +248,7 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
const reduce_params& params = static_cast<const reduce_params&>(p);
|
const reduce_params& params = static_cast<const reduce_params&>(p);
|
||||||
DispatchData dispatchData = SetDefault(params, options);
|
DispatchData dispatchData = SetDefault(params);
|
||||||
|
|
||||||
KernelData kd = KernelData::Default<reduce_params>(params);
|
KernelData kd = KernelData::Default<reduce_params>(params);
|
||||||
|
|
||||||
@ -234,6 +256,14 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
|
|||||||
auto entry_point = GetEntryPoint(kernelName, params.layerID, params, options);
|
auto entry_point = GetEntryPoint(kernelName, params.layerID, params, options);
|
||||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||||
|
|
||||||
|
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||||
|
const auto& prim_params = static_cast<const reduce_params&>(params);
|
||||||
|
auto dispatchData = SetDefault(prim_params);
|
||||||
|
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||||
|
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||||
|
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||||
|
};
|
||||||
|
|
||||||
auto& kernel = kd.kernels[0];
|
auto& kernel = kd.kernels[0];
|
||||||
FillCLKernelData(kernel,
|
FillCLKernelData(kernel,
|
||||||
dispatchData,
|
dispatchData,
|
||||||
@ -245,7 +275,9 @@ KernelsData ReduceKernelBase::GetCommonKernelsData(const Params& p,
|
|||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
1,
|
1,
|
||||||
GetFusedPrimitiveInputsCount(params));
|
GetFusedPrimitiveInputsCount(params),
|
||||||
|
1,
|
||||||
|
params.inputs[0].is_dynamic());
|
||||||
|
|
||||||
return {kd};
|
return {kd};
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
bool Validate(const Params&, const optional_params&) const override;
|
bool Validate(const Params&, const optional_params&) const override;
|
||||||
virtual JitConstants GetJitConstants(const reduce_params& params) const;
|
virtual JitConstants GetJitConstants(const reduce_params& params) const;
|
||||||
virtual CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const = 0;
|
virtual CommonDispatchData SetDefault(const reduce_params& params) const = 0;
|
||||||
Datatype GetAccumulatorType(const reduce_params& p) const;
|
Datatype GetAccumulatorType(const reduce_params& p) const;
|
||||||
Datatype GetFinalAccumulatorType(const reduce_params& p) const;
|
Datatype GetFinalAccumulatorType(const reduce_params& p) const;
|
||||||
Datatype GetActivationType(const reduce_params& params) const;
|
Datatype GetActivationType(const reduce_params& params) const;
|
||||||
|
@ -27,10 +27,11 @@ ParamsKey ReduceKernelRef::GetSupportedKey() const {
|
|||||||
k.EnableTensorPitches();
|
k.EnableTensorPitches();
|
||||||
k.EnableBatching();
|
k.EnableBatching();
|
||||||
k.EnableDifferentTypes();
|
k.EnableDifferentTypes();
|
||||||
|
k.EnableDynamicShapesSupport();
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommonDispatchData ReduceKernelRef::SetDefault(const reduce_params& params, const optional_params&) const {
|
CommonDispatchData ReduceKernelRef::SetDefault(const reduce_params& params) const {
|
||||||
CommonDispatchData dispatchData;
|
CommonDispatchData dispatchData;
|
||||||
auto in_layout = params.inputs[0].GetLayout();
|
auto in_layout = params.inputs[0].GetLayout();
|
||||||
auto out_layout = params.outputs[0].GetLayout();
|
auto out_layout = params.outputs[0].GetLayout();
|
||||||
|
@ -12,7 +12,7 @@ class ReduceKernelRef : public ReduceKernelBase {
|
|||||||
public:
|
public:
|
||||||
ReduceKernelRef() : ReduceKernelBase("reduce_ref") {}
|
ReduceKernelRef() : ReduceKernelBase("reduce_ref") {}
|
||||||
virtual ~ReduceKernelRef() {}
|
virtual ~ReduceKernelRef() {}
|
||||||
CommonDispatchData SetDefault(const reduce_params& params, const optional_params&) const override;
|
CommonDispatchData SetDefault(const reduce_params& params) const override;
|
||||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
|
@ -91,9 +91,7 @@ static void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
|
|||||||
auto reorder_prim = cldnn::reorder(reorderLayerName,
|
auto reorder_prim = cldnn::reorder(reorderLayerName,
|
||||||
cldnn::input_info(resultLayerName),
|
cldnn::input_info(resultLayerName),
|
||||||
out_format,
|
out_format,
|
||||||
out_dt,
|
out_dt);
|
||||||
std::vector<float>(),
|
|
||||||
cldnn::reorder_mean_mode::subtract);
|
|
||||||
p.add_primitive(*op, reorder_prim);
|
p.add_primitive(*op, reorder_prim);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <intel_gpu/primitives/input_layout.hpp>
|
#include <intel_gpu/primitives/input_layout.hpp>
|
||||||
#include <intel_gpu/primitives/quantize.hpp>
|
#include <intel_gpu/primitives/quantize.hpp>
|
||||||
#include <intel_gpu/primitives/data.hpp>
|
#include <intel_gpu/primitives/data.hpp>
|
||||||
|
#include "quantize_inst.h"
|
||||||
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
@ -582,6 +583,107 @@ TEST(quantize_gpu, quantize_levels_256_3d_unsigned) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(quantize_gpu, dynamic) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
auto input = engine.allocate_memory({ { 1, 16, 2, 2 }, data_types::f32, format::bfyx });
|
||||||
|
auto input_low = engine.allocate_memory({ { 1, 16, 1, 1 }, data_types::f32, format::bfyx });
|
||||||
|
auto input_high = engine.allocate_memory({ { 1, 16, 1, 1 }, data_types::f32, format::bfyx });
|
||||||
|
auto output_low = engine.allocate_memory({ { 1, 1, 1, 1 }, data_types::f32, format::bfyx });
|
||||||
|
auto output_high = engine.allocate_memory({ { 1, 1, 1, 1 }, data_types::f32, format::bfyx });
|
||||||
|
|
||||||
|
layout in_dyn_layout { ov::PartialShape::dynamic(4), data_types::f32, format::bfyx };
|
||||||
|
|
||||||
|
set_values(input, { -1.0f, 2.0f, 3.0f, 4.0f,
|
||||||
|
5.0f, 2.0f, 2.0f, 3.0f,
|
||||||
|
4.0f, 6.0f, 3.0f, 3.0f,
|
||||||
|
3.0f, 5.0f, 1.0f, 1.0f,
|
||||||
|
|
||||||
|
1.0f, 1.0f, 1.0f, 1.0f,
|
||||||
|
4.0f, 6.0f, 3.0f, 3.0f,
|
||||||
|
3.0f, 5.0f, 1.0f, 1.0f,
|
||||||
|
1.0f, 1.0f, 1.0f, 1.0f,
|
||||||
|
|
||||||
|
-1.0f, 2.0f, 3.0f, 4.0f,
|
||||||
|
5.0f, 2.0f, 2.0f, 3.0f,
|
||||||
|
4.0f, 6.0f, 3.0f, 3.0f,
|
||||||
|
3.0f, 5.0f, 1.0f, 1.0f,
|
||||||
|
|
||||||
|
1.0f, 1.0f, 1.0f, 1.0f,
|
||||||
|
4.0f, 6.0f, 3.0f, 3.0f,
|
||||||
|
3.0f, 5.0f, 1.0f, 1.0f,
|
||||||
|
1.0f, 1.0f, 1.0f, 1.0f });
|
||||||
|
|
||||||
|
set_values(input_low, { 0.0f, 1.0f, 2.0f, 3.0f,
|
||||||
|
4.0f, 5.0f, 6.0f, 7.0f,
|
||||||
|
7.0f, 6.0f, 5.0f, 4.0f,
|
||||||
|
3.0f, 2.0f, 1.0f, 0.0f });
|
||||||
|
|
||||||
|
set_values(input_high, { 0.0f, 1.0f, 2.0f, 3.0f,
|
||||||
|
4.0f, 5.0f, 6.0f, 7.0f,
|
||||||
|
7.0f, 6.0f, 5.0f, 4.0f,
|
||||||
|
3.0f, 2.0f, 1.0f, 0.0f });
|
||||||
|
|
||||||
|
set_values(output_low, { -1.0f });
|
||||||
|
set_values(output_high, { 1.0f });
|
||||||
|
|
||||||
|
// 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1
|
||||||
|
// 1 1 1 1 0 1 0 0 0 0 1 1 0 1 1 1
|
||||||
|
// 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1
|
||||||
|
// 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 1
|
||||||
|
std::vector<float> ref_data = { -1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1,
|
||||||
|
-1, 1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
-1, 1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
-1, 1, -1, -1,
|
||||||
|
-1, 1, -1, -1,
|
||||||
|
-1, -1, -1, -1,
|
||||||
|
1, 1, 1, 1,
|
||||||
|
1, 1, -1, -1,
|
||||||
|
1, 1, 1, 1 };
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(
|
||||||
|
input_layout("input", in_dyn_layout),
|
||||||
|
data("input_low", input_low),
|
||||||
|
data("input_high", input_high),
|
||||||
|
data("output_low", output_low),
|
||||||
|
data("output_high", output_high),
|
||||||
|
quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32)
|
||||||
|
);
|
||||||
|
|
||||||
|
ExecutionConfig config;
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto inst = network.get_primitive("quantize");
|
||||||
|
auto impl = inst->get_impl();
|
||||||
|
ASSERT_TRUE(impl != nullptr);
|
||||||
|
ASSERT_TRUE(impl->is_dynamic());
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
auto output = outputs.at("quantize").get_memory();
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
// Check that layout and memory contains logical size of tensor
|
||||||
|
ASSERT_EQ(output->count(), (size_t)64);
|
||||||
|
ASSERT_EQ(output->get_layout().count(), (size_t)64);
|
||||||
|
|
||||||
|
ASSERT_EQ(output->size(), ref_data.size() * sizeof(uint32_t));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ref_data.size(); ++i) {
|
||||||
|
ASSERT_EQ(output_ptr[i], ref_data[i]) << " index = " << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct quantize_random_test_params {
|
struct quantize_random_test_params {
|
||||||
data_types input_type;
|
data_types input_type;
|
||||||
data_types output_type;
|
data_types output_type;
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <intel_gpu/primitives/input_layout.hpp>
|
#include <intel_gpu/primitives/input_layout.hpp>
|
||||||
#include <intel_gpu/primitives/reduce.hpp>
|
#include <intel_gpu/primitives/reduce.hpp>
|
||||||
#include <intel_gpu/primitives/data.hpp>
|
#include <intel_gpu/primitives/data.hpp>
|
||||||
|
#include "reduce_inst.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -1641,6 +1642,43 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(reduce_gpu, dynamic) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
auto input = engine.allocate_memory({data_types::f32, format::bfwzyx, {2, 3, 1, 1, 1, 1}});
|
||||||
|
|
||||||
|
layout in_dyn_layout { ov::PartialShape::dynamic(6), data_types::f32, format::bfwzyx };
|
||||||
|
|
||||||
|
set_values(input, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", in_dyn_layout));
|
||||||
|
topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 1));
|
||||||
|
|
||||||
|
ExecutionConfig config;
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto inst = network.get_primitive("reduce");
|
||||||
|
auto impl = inst->get_impl();
|
||||||
|
ASSERT_TRUE(impl != nullptr);
|
||||||
|
ASSERT_TRUE(impl->is_dynamic());
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "reduce");
|
||||||
|
|
||||||
|
auto output = outputs.at("reduce").get_memory();
|
||||||
|
|
||||||
|
std::vector<float> ref_data = {0.0f, 60.0f};
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ref_data.size(); ++i) {
|
||||||
|
ASSERT_TRUE(are_equal(ref_data[i], output_ptr[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <data_types InputT, data_types OutputT>
|
template <data_types InputT, data_types OutputT>
|
||||||
class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParam<TestParamType_general_reduce_gpu> {
|
class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParam<TestParamType_general_reduce_gpu> {
|
||||||
protected:
|
protected:
|
||||||
|
Loading…
Reference in New Issue
Block a user