[GPU] Added shape agnostic ref kernels for Select and Activation (#15016)
* [GPU] Added Select shape agnostic support * [GPU] Added Activation shape agnostic support
This commit is contained in:
parent
864b5075b7
commit
36df508baf
@ -911,7 +911,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
std::vector<std::pair<program_node*, int32_t>> parents = node.get_dependencies();
|
std::vector<std::pair<program_node*, int32_t>> parents = node.get_dependencies();
|
||||||
std::list<cldnn::program_node*> users = node.get_users();
|
|
||||||
|
|
||||||
std::vector<bool> can_fuse_parents = { false, false };
|
std::vector<bool> can_fuse_parents = { false, false };
|
||||||
|
|
||||||
|
@ -64,12 +64,37 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
|
|||||||
|
|
||||||
return {params, optional_params};
|
return {params, optional_params};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||||
|
auto kernel_params = get_kernel_params(impl_param);
|
||||||
|
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
attach_activation_impl::attach_activation_impl() {
|
attach_activation_impl::attach_activation_impl() {
|
||||||
implementation_map<activation>::add(impl_types::ocl, typed_primitive_impl_ocl<activation>::create<activation_impl>, {
|
auto dyn_types = {
|
||||||
|
data_types::f32,
|
||||||
|
data_types::f16,
|
||||||
|
data_types::i8,
|
||||||
|
data_types::u8,
|
||||||
|
data_types::i32
|
||||||
|
};
|
||||||
|
|
||||||
|
auto dyn_formats = {
|
||||||
|
format::bfyx,
|
||||||
|
format::bfzyx,
|
||||||
|
format::bfwzyx
|
||||||
|
};
|
||||||
|
|
||||||
|
implementation_map<activation>::add(impl_types::ocl,
|
||||||
|
shape_types::dynamic_shape,
|
||||||
|
typed_primitive_impl_ocl<activation>::create<activation_impl>,
|
||||||
|
dyn_types,
|
||||||
|
dyn_formats);
|
||||||
|
|
||||||
|
implementation_map<activation>::add(impl_types::ocl, shape_types::static_shape, typed_primitive_impl_ocl<activation>::create<activation_impl>, {
|
||||||
std::make_tuple(data_types::f32, format::yxfb),
|
std::make_tuple(data_types::f32, format::yxfb),
|
||||||
std::make_tuple(data_types::f16, format::yxfb),
|
std::make_tuple(data_types::f16, format::yxfb),
|
||||||
std::make_tuple(data_types::f32, format::bfyx),
|
std::make_tuple(data_types::f32, format::bfyx),
|
||||||
|
@ -168,7 +168,7 @@ public:
|
|||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
attach_pooling_impl::attach_pooling_impl() {
|
attach_pooling_impl::attach_pooling_impl() {
|
||||||
std::set<implementation_map<resample>::key_type> keys;
|
std::set<implementation_map<pooling>::key_type> keys;
|
||||||
|
|
||||||
auto types = { data_types::f16, data_types::f32, data_types::i8, data_types::u8 };
|
auto types = { data_types::f16, data_types::f32, data_types::i8, data_types::u8 };
|
||||||
auto formats = { format::bfyx,
|
auto formats = { format::bfyx,
|
||||||
|
@ -30,15 +30,20 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
|||||||
auto params = get_default_params<kernel_selector::select_params>(impl_param);
|
auto params = get_default_params<kernel_selector::select_params>(impl_param);
|
||||||
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
|
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
|
||||||
|
|
||||||
std::vector<layout> layouts = impl_param.input_layouts;
|
std::vector<layout> input_layouts = impl_param.input_layouts;
|
||||||
auto o_layout = impl_param.get_output_layout();
|
auto o_layout = impl_param.get_output_layout();
|
||||||
|
|
||||||
auto broadcastable = [&](layout a, layout b) {
|
auto broadcastable = [&](const layout& a, const layout& b) {
|
||||||
auto dims_a = a.get_dims();
|
if (a.is_dynamic() || b.is_dynamic()) {
|
||||||
auto dims_b = b.get_dims();
|
return false;
|
||||||
size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size();
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < min_size; i++) {
|
auto dims_a = a.get_partial_shape();
|
||||||
|
auto dims_b = b.get_partial_shape();
|
||||||
|
|
||||||
|
size_t min_size = std::min(dims_a.size(), dims_b.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < min_size; ++i) {
|
||||||
if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
|
if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -46,41 +51,61 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
for (size_t i = 0; i < layouts.size(); i++) {
|
for (auto& l : input_layouts) {
|
||||||
auto shape = layouts[i].get_shape();
|
auto pshape = l.get_partial_shape();
|
||||||
auto shape_size = shape.size();
|
auto rank = pshape.size();
|
||||||
if (shape_size < 4 && !broadcastable(o_layout, layouts[i])) {
|
|
||||||
shape.insert(shape.begin(), 4 - shape_size, 1);
|
if (rank < 4 && !broadcastable(o_layout, l)) {
|
||||||
layout new_layout = layouts[i];
|
pshape.insert(pshape.begin(), 4 - rank, 1);
|
||||||
new_layout.set_partial_shape(shape);
|
layout new_layout = l;
|
||||||
layouts[i] = new_layout;
|
new_layout.set_partial_shape(pshape);
|
||||||
|
l = new_layout;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 1; i < layouts.size(); i++) {
|
for (size_t i = 1; i < input_layouts.size(); ++i) {
|
||||||
params.inputs.push_back(convert_data_tensor(layouts[i]));
|
params.inputs.push_back(convert_data_tensor(input_layouts[i]));
|
||||||
}
|
}
|
||||||
return {params, optional_params};
|
return {params, optional_params};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||||
|
auto kernel_params = get_kernel_params(impl_param);
|
||||||
|
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
attach_select_impl::attach_select_impl() {
|
attach_select_impl::attach_select_impl() {
|
||||||
implementation_map<select>::add(impl_types::ocl, typed_primitive_impl_ocl<select>::create<select_impl>, {
|
auto types = {
|
||||||
std::make_tuple(data_types::f32, format::yxfb),
|
data_types::f32,
|
||||||
std::make_tuple(data_types::f16, format::yxfb),
|
data_types::f16,
|
||||||
std::make_tuple(data_types::i8, format::yxfb),
|
data_types::i8,
|
||||||
std::make_tuple(data_types::u8, format::yxfb),
|
data_types::u8
|
||||||
std::make_tuple(data_types::f32, format::bfyx),
|
};
|
||||||
std::make_tuple(data_types::f16, format::bfyx),
|
|
||||||
std::make_tuple(data_types::i8, format::bfyx),
|
auto static_formats = {
|
||||||
std::make_tuple(data_types::u8, format::bfyx),
|
format::bfyx,
|
||||||
std::make_tuple(data_types::f32, format::byxf),
|
format::byxf,
|
||||||
std::make_tuple(data_types::f16, format::byxf),
|
format::yxfb,
|
||||||
std::make_tuple(data_types::i8, format::byxf),
|
};
|
||||||
std::make_tuple(data_types::u8, format::byxf),
|
|
||||||
});
|
implementation_map<select>::add(impl_types::ocl,
|
||||||
|
shape_types::static_shape,
|
||||||
|
typed_primitive_impl_ocl<select>::create<select_impl>,
|
||||||
|
types,
|
||||||
|
static_formats);
|
||||||
|
|
||||||
|
auto dyn_formats = {
|
||||||
|
format::bfyx
|
||||||
|
};
|
||||||
|
|
||||||
|
implementation_map<select>::add(impl_types::ocl,
|
||||||
|
shape_types::dynamic_shape,
|
||||||
|
typed_primitive_impl_ocl<select>::create<select_impl>,
|
||||||
|
types,
|
||||||
|
dyn_formats);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
@ -413,7 +413,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||||
ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
|
ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
|
||||||
|
|
||||||
if (ps.size() < 4) {
|
if (ps.size() < 4) {
|
||||||
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
|
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
|
||||||
|
@ -32,6 +32,9 @@ public:
|
|||||||
template<typename ShapeType>
|
template<typename ShapeType>
|
||||||
static std::vector<layout> calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param);
|
static std::vector<layout> calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param);
|
||||||
static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param);
|
static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param);
|
||||||
|
static std::vector<size_t> extend_shape_to_6d(ov::PartialShape ps);
|
||||||
|
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
|
||||||
|
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
|
||||||
static std::string to_string(select_node const& node);
|
static std::string to_string(select_node const& node);
|
||||||
typed_primitive_inst(network& network, select_node const& node);
|
typed_primitive_inst(network& network, select_node const& node);
|
||||||
};
|
};
|
||||||
|
@ -55,6 +55,23 @@ std::vector<layout> select_inst::calc_output_layouts(const select_node& /*node*/
|
|||||||
return {{output_shapes[0], dt, format::get_default_format(output_shapes[0].size())}};
|
return {{output_shapes[0], dt, format::get_default_format(output_shapes[0].size())}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> select_inst::extend_shape_to_6d(ov::PartialShape ps) {
|
||||||
|
if (ps.size() < 4) {
|
||||||
|
ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||||
|
return l.transform(format::bfwzyx).to_shape();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> select_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||||
|
return extend_shape_to_6d(orig_impl_param.get_input_layout(input_idx).get_partial_shape());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> select_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
|
||||||
|
return extend_shape_to_6d(orig_impl_param.get_output_layout(output_idx).get_partial_shape());
|
||||||
|
}
|
||||||
|
|
||||||
std::string select_inst::to_string(select_node const& node) {
|
std::string select_inst::to_string(select_node const& node) {
|
||||||
auto node_info = node.desc_to_json();
|
auto node_info = node.desc_to_json();
|
||||||
auto desc = node.get_primitive();
|
auto desc = node.get_primitive();
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
// TODO: move it from layout based to memory based
|
// TODO: move it from layout based to memory based
|
||||||
KERNEL(activation)(
|
KERNEL(activation)(
|
||||||
|
OPTIONAL_SHAPE_INFO_ARG
|
||||||
__global INPUT0_TYPE* input,
|
__global INPUT0_TYPE* input,
|
||||||
__global OUTPUT_TYPE* output
|
__global OUTPUT_TYPE* output
|
||||||
#if HAS_FUSED_OPS_DECLS
|
#if HAS_FUSED_OPS_DECLS
|
||||||
@ -32,7 +33,7 @@ KERNEL(activation)(
|
|||||||
const unsigned x = get_global_id(0);
|
const unsigned x = get_global_id(0);
|
||||||
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
|
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
|
||||||
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
|
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
|
||||||
#if OUTPUT_BATCH_NUM == 1
|
#if OUTPUT_BATCH_NUM_CONST == 1
|
||||||
const unsigned feature = (uint)get_global_id(2);
|
const unsigned feature = (uint)get_global_id(2);
|
||||||
const unsigned batch = 0;
|
const unsigned batch = 0;
|
||||||
#else
|
#else
|
||||||
@ -44,7 +45,7 @@ KERNEL(activation)(
|
|||||||
const unsigned x = (uint)get_global_id(1);
|
const unsigned x = (uint)get_global_id(1);
|
||||||
const unsigned y = (uint)get_global_id(2);
|
const unsigned y = (uint)get_global_id(2);
|
||||||
#define z 0
|
#define z 0
|
||||||
#if OUTPUT_BATCH_NUM == 1
|
#if OUTPUT_BATCH_NUM_CONST == 1
|
||||||
const unsigned feature = (uint)get_global_id(0);
|
const unsigned feature = (uint)get_global_id(0);
|
||||||
const unsigned batch = 0;
|
const unsigned batch = 0;
|
||||||
#else
|
#else
|
||||||
@ -60,7 +61,7 @@ KERNEL(activation)(
|
|||||||
#define z 0
|
#define z 0
|
||||||
const unsigned x = (uint)get_global_id(0);
|
const unsigned x = (uint)get_global_id(0);
|
||||||
const unsigned y = (uint)get_global_id(1);
|
const unsigned y = (uint)get_global_id(1);
|
||||||
#if OUTPUT_BATCH_NUM == 1
|
#if OUTPUT_BATCH_NUM_CONST == 1
|
||||||
const unsigned feature = (uint)get_global_id(2);
|
const unsigned feature = (uint)get_global_id(2);
|
||||||
const unsigned batch = 0;
|
const unsigned batch = 0;
|
||||||
#else
|
#else
|
||||||
@ -72,11 +73,11 @@ KERNEL(activation)(
|
|||||||
|
|
||||||
// GWS.feature and GWS.batch is aligned to 16. Otherwise, there are some idling WIs.
|
// GWS.feature and GWS.batch is aligned to 16. Otherwise, there are some idling WIs.
|
||||||
#if (defined(OUTPUT_LAYOUT_B_FS_YX_FSV16) || defined(OUTPUT_LAYOUT_B_FS_YX_FSV32)) \
|
#if (defined(OUTPUT_LAYOUT_B_FS_YX_FSV16) || defined(OUTPUT_LAYOUT_B_FS_YX_FSV32)) \
|
||||||
&& OUTPUT_FEATURE_NUM % 16 != 0
|
&& (OUTPUT_FEATURE_NUM_CONST % 16 != 0 || IS_DYNAMIC)
|
||||||
if (feature >= OUTPUT_FEATURE_NUM)
|
if (feature >= OUTPUT_FEATURE_NUM)
|
||||||
return;
|
return;
|
||||||
#elif (defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV16) || defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32)) \
|
#elif (defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV16) || defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32)) \
|
||||||
&& (OUTPUT_FEATURE_NUM % 16 != 0 || OUTPUT_BATCH_NUM % 16 != 0)
|
&& (OUTPUT_FEATURE_NUM_CONST % 16 != 0 || OUTPUT_BATCH_NUM_CONST % 16 != 0 || IS_DYNAMIC)
|
||||||
if (batch >= OUTPUT_BATCH_NUM || feature >= OUTPUT_FEATURE_NUM)
|
if (batch >= OUTPUT_BATCH_NUM || feature >= OUTPUT_FEATURE_NUM)
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
|
@ -4,18 +4,23 @@
|
|||||||
|
|
||||||
#include "include/batch_headers/fetch_data.cl"
|
#include "include/batch_headers/fetch_data.cl"
|
||||||
|
|
||||||
#define GET_INDEX(prefix, num) \
|
#ifdef IS_DYNAMIC
|
||||||
CAT(CAT(prefix, num), _OFFSET) + \
|
#define GET_INDEX(prefix) GET_DATA_INDEX_SAFE(prefix, d4, d3, d2, d1)
|
||||||
(d1 % CAT(CAT(prefix, num), _SIZES)[0])*CAT(CAT(prefix, num), _PITCHES)[0] + \
|
#else
|
||||||
(d2 % CAT(CAT(prefix, num), _SIZES)[1])*CAT(CAT(prefix, num), _PITCHES)[1] + \
|
#define GET_INDEX(prefix) \
|
||||||
(d3 % CAT(CAT(prefix, num), _SIZES)[2])*CAT(CAT(prefix, num), _PITCHES)[2] + \
|
CAT(prefix, _OFFSET) + \
|
||||||
(d4 % CAT(CAT(prefix, num), _SIZES)[3])*CAT(CAT(prefix, num), _PITCHES)[3]
|
(d1 % CAT(prefix, _SIZES)[0])*CAT(prefix, _PITCHES)[0] + \
|
||||||
|
(d2 % CAT(prefix, _SIZES)[1])*CAT(prefix, _PITCHES)[1] + \
|
||||||
|
(d3 % CAT(prefix, _SIZES)[2])*CAT(prefix, _PITCHES)[2] + \
|
||||||
|
(d4 % CAT(prefix, _SIZES)[3])*CAT(prefix, _PITCHES)[3]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define INPUT_0 input0[GET_INDEX(INPUT, 0)]
|
#define INPUT_0 input0[GET_INDEX(INPUT0)]
|
||||||
#define INPUT_1 input1[GET_INDEX(INPUT, 1)]
|
#define INPUT_1 input1[GET_INDEX(INPUT1)]
|
||||||
#define INPUT_2 input2[GET_INDEX(INPUT, 2)]
|
#define INPUT_2 input2[GET_INDEX(INPUT2)]
|
||||||
|
|
||||||
KERNEL(select)(
|
KERNEL(select)(
|
||||||
|
OPTIONAL_SHAPE_INFO_ARG
|
||||||
INPUTS_DECLS
|
INPUTS_DECLS
|
||||||
__global OUTPUT_TYPE* output)
|
__global OUTPUT_TYPE* output)
|
||||||
{
|
{
|
||||||
@ -24,14 +29,19 @@ const uint d1 = (uint) get_global_id(0);
|
|||||||
const uint d2 = (uint) get_global_id(1);
|
const uint d2 = (uint) get_global_id(1);
|
||||||
const uint d34 = (uint) get_global_id(2);
|
const uint d34 = (uint) get_global_id(2);
|
||||||
|
|
||||||
const uint d3 = d34 % OUTPUT_SIZES[2];
|
#ifdef IS_DYNAMIC
|
||||||
const uint d4 = d34 / OUTPUT_SIZES[2];
|
const uint d3 = d34 % OUTPUT_FEATURE_NUM;
|
||||||
|
const uint d4 = d34 / OUTPUT_FEATURE_NUM;
|
||||||
|
#else
|
||||||
|
const uint d3 = d34 % OUTPUT_SIZES[2];
|
||||||
|
const uint d4 = d34 / OUTPUT_SIZES[2];
|
||||||
|
#endif
|
||||||
|
|
||||||
uint output_offset = OUTPUT_OFFSET +
|
#ifdef IS_DYNAMIC
|
||||||
d1*OUTPUT_PITCHES[0] +
|
uint output_offset = OUTPUT_GET_INDEX(d4, d3, d2, d1);
|
||||||
d2*OUTPUT_PITCHES[1] +
|
#else
|
||||||
d3*OUTPUT_PITCHES[2] +
|
uint output_offset = GET_DATA_INDEX_RAW(OUTPUT, d1, d2, d3, d4);
|
||||||
d4*OUTPUT_PITCHES[3];
|
#endif
|
||||||
|
|
||||||
const OUTPUT_TYPE res = select(INPUT_2, INPUT_1, MASK);
|
const OUTPUT_TYPE res = select(INPUT_2, INPUT_1, MASK);
|
||||||
|
|
||||||
|
@ -54,6 +54,14 @@ JitConstants ActivationKernelBase::GetJitConstants(const activation_params& para
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.has_dynamic_outputs()) {
|
||||||
|
jit.AddConstant(MakeJitConstant("OUTPUT_BATCH_NUM_CONST", 0));
|
||||||
|
jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_CONST", 0));
|
||||||
|
} else {
|
||||||
|
jit.AddConstant(MakeJitConstant("OUTPUT_BATCH_NUM_CONST", params.outputs[0].Batch().v));
|
||||||
|
jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_CONST", params.outputs[0].Feature().v));
|
||||||
|
}
|
||||||
|
|
||||||
return jit;
|
return jit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,7 +86,6 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
KernelData kd = KernelData::Default<activation_params>(params);
|
KernelData kd = KernelData::Default<activation_params>(params);
|
||||||
|
|
||||||
activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
|
activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
|
||||||
|
|
||||||
auto dispatchData = SetDefault(newParams);
|
auto dispatchData = SetDefault(newParams);
|
||||||
@ -86,9 +93,18 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
|
|||||||
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
|
||||||
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
|
||||||
|
|
||||||
|
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||||
|
const auto& prim_params = static_cast<const activation_params&>(params);
|
||||||
|
auto dispatchData = SetDefault(prim_params);
|
||||||
|
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||||
|
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||||
|
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||||
|
};
|
||||||
|
|
||||||
auto& kernel = kd.kernels[0];
|
auto& kernel = kd.kernels[0];
|
||||||
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
|
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
|
||||||
EXE_MODE_DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params));
|
EXE_MODE_DEFAULT, false, false, 1,
|
||||||
|
GetFusedPrimitiveInputsCount(params), 1, newParams.outputs[0].is_dynamic());
|
||||||
|
|
||||||
if (!newParams.inputActivationParams.empty()) {
|
if (!newParams.inputActivationParams.empty()) {
|
||||||
kernel.params.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
|
kernel.params.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});
|
||||||
|
@ -27,6 +27,7 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
|
|||||||
k.EnableTensorOffset();
|
k.EnableTensorOffset();
|
||||||
k.EnableTensorPitches();
|
k.EnableTensorPitches();
|
||||||
k.EnableBatching();
|
k.EnableBatching();
|
||||||
|
k.EnableDynamicShapesSupport();
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,13 +129,21 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
|
|||||||
|
|
||||||
DispatchData dispatchData = SetDefault(newParams);
|
DispatchData dispatchData = SetDefault(newParams);
|
||||||
|
|
||||||
|
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
|
||||||
|
const auto& prim_params = static_cast<const select_params&>(params);
|
||||||
|
auto dispatchData = SetDefault(prim_params);
|
||||||
|
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
|
||||||
|
kd.kernels[0].params.workGroups.global = dispatchData.gws;
|
||||||
|
kd.kernels[0].params.workGroups.local = dispatchData.lws;
|
||||||
|
};
|
||||||
|
|
||||||
auto& kernel = kd.kernels[0];
|
auto& kernel = kd.kernels[0];
|
||||||
|
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
|
||||||
kernel.params.workGroups.global = dispatchData.gws;
|
"", false, false,
|
||||||
kernel.params.workGroups.local = dispatchData.lws;
|
(uint32_t)newParams.inputs.size(),
|
||||||
|
0,
|
||||||
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, EXE_MODE_DEFAULT);
|
1,
|
||||||
kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
|
newParams.outputs[0].is_dynamic());
|
||||||
|
|
||||||
return {kd};
|
return {kd};
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,7 @@ ParamsKey SelectKernelRef::GetSupportedKey() const {
|
|||||||
k.EnableTensorPitches();
|
k.EnableTensorPitches();
|
||||||
k.EnableTensorOffset();
|
k.EnableTensorOffset();
|
||||||
k.EnableDifferentTypes();
|
k.EnableDifferentTypes();
|
||||||
|
k.EnableDynamicShapesSupport();
|
||||||
|
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
@ -23,9 +23,7 @@ static void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1
|
|||||||
auto reorderPrim = cldnn::reorder(layerName,
|
auto reorderPrim = cldnn::reorder(layerName,
|
||||||
inputs[0],
|
inputs[0],
|
||||||
cldnn::format::any,
|
cldnn::format::any,
|
||||||
outDataType,
|
outDataType);
|
||||||
std::vector<float>(),
|
|
||||||
cldnn::reorder_mean_mode::subtract);
|
|
||||||
p.add_primitive(*op, reorderPrim);
|
p.add_primitive(*op, reorderPrim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ class fc_fp32_activation_dynamic : public FullyConnectedFusingTest {};
|
|||||||
TEST_P(fc_fp32_activation_dynamic, basic) {
|
TEST_P(fc_fp32_activation_dynamic, basic) {
|
||||||
auto p = GetParam();
|
auto p = GetParam();
|
||||||
auto test_input_layout = get_input_layout(p);
|
auto test_input_layout = get_input_layout(p);
|
||||||
auto dynamic_input_layout = layout{ov::PartialShape::dynamic(test_input_layout.get_rank()), test_input_layout.data_type, test_input_layout.format};
|
auto dynamic_input_layout = layout{ov::PartialShape::dynamic(test_input_layout.get_partial_shape().size()), test_input_layout.data_type, test_input_layout.format};
|
||||||
create_topologies(
|
create_topologies(
|
||||||
input_layout("input", dynamic_input_layout),
|
input_layout("input", dynamic_input_layout),
|
||||||
data("weights", get_mem(get_weights_layout(p))),
|
data("weights", get_mem(get_weights_layout(p))),
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <intel_gpu/primitives/activation.hpp>
|
#include <intel_gpu/primitives/activation.hpp>
|
||||||
#include <intel_gpu/primitives/data.hpp>
|
#include <intel_gpu/primitives/data.hpp>
|
||||||
#include <intel_gpu/primitives/reorder.hpp>
|
#include <intel_gpu/primitives/reorder.hpp>
|
||||||
|
#include "activation_inst.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -15,9 +16,84 @@
|
|||||||
using namespace cldnn;
|
using namespace cldnn;
|
||||||
using namespace ::tests;
|
using namespace ::tests;
|
||||||
|
|
||||||
|
TEST(activation_f32_fw_gpu, dynamic) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
ov::PartialShape in_shape = { 1, 1, 4, 2 };
|
||||||
|
layout in_layout { ov::PartialShape::dynamic(in_shape.size()), data_types::f32, format::bfyx };
|
||||||
|
|
||||||
|
auto input = engine.allocate_memory({ in_shape, data_types::f32, format::bfyx });
|
||||||
|
set_values(input, { -0.12f, 0.56f, 0.45f, -0.789f, 42.f, 0.999f, 0.7899f, 0.f});
|
||||||
|
|
||||||
|
std::vector<activation_func> funcs = {
|
||||||
|
activation_func::gelu,
|
||||||
|
activation_func::relu,
|
||||||
|
activation_func::hyperbolic_tan,
|
||||||
|
activation_func::sqrt
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto func : funcs) {
|
||||||
|
topology topology(input_layout("input", in_layout));
|
||||||
|
topology.add(activation("activation", input_info("input"), func));
|
||||||
|
|
||||||
|
ExecutionConfig config;
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
network network(engine, topology, config);
|
||||||
|
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
|
||||||
|
auto inst = network.get_primitive("activation");
|
||||||
|
auto impl = inst->get_impl();
|
||||||
|
ASSERT_TRUE(impl != nullptr);
|
||||||
|
ASSERT_TRUE(impl->is_dynamic());
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "activation");
|
||||||
|
|
||||||
|
auto output_memory = outputs.at("activation").get_memory();
|
||||||
|
auto output_layout = output_memory->get_layout();
|
||||||
|
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
|
||||||
|
cldnn::mem_lock<float> input_ptr(input, get_test_stream());
|
||||||
|
|
||||||
|
int y_size = output_layout.spatial(1);
|
||||||
|
int x_size = output_layout.spatial(0);
|
||||||
|
int f_size = output_layout.feature();
|
||||||
|
int b_size = output_layout.batch();
|
||||||
|
|
||||||
|
ASSERT_EQ(output_layout.format, format::bfyx);
|
||||||
|
ASSERT_EQ(y_size, 4);
|
||||||
|
ASSERT_EQ(x_size, 2);
|
||||||
|
ASSERT_EQ(f_size, 1);
|
||||||
|
ASSERT_EQ(b_size, 1);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < output_layout.get_linear_size(); ++i) {
|
||||||
|
switch (func) {
|
||||||
|
case activation_func::gelu:
|
||||||
|
ASSERT_NEAR(0.5f * static_cast<float>(input_ptr[i]) * (1.f + std::erf(static_cast<float>((input_ptr[i])) / std::sqrt(2.0f))),
|
||||||
|
output_ptr[i], 1e-5f);
|
||||||
|
break;
|
||||||
|
case activation_func::relu:
|
||||||
|
ASSERT_EQ(std::max(input_ptr[i], static_cast<float>(0)), output_ptr[i]);
|
||||||
|
break;
|
||||||
|
case activation_func::hyperbolic_tan:
|
||||||
|
ASSERT_FLOAT_EQ(std::tanh(static_cast<float>(input_ptr[i])), output_ptr[i]);
|
||||||
|
break;
|
||||||
|
case activation_func::sqrt:
|
||||||
|
if (input_ptr[i] >= 0) {
|
||||||
|
ASSERT_FLOAT_EQ(std::sqrt(static_cast<float>(input_ptr[i])), output_ptr[i]);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(activation_f32_fw_gpu, not_basic_yxfb) {
|
TEST(activation_f32_fw_gpu, not_basic_yxfb) {
|
||||||
// Input:
|
// Input:
|
||||||
// 1 0 -3 4 5
|
// 1 0 -3 4 5
|
||||||
// 0 2 3 4 -6
|
// 0 2 3 4 -6
|
||||||
// 3 -3 3 0 1
|
// 3 -3 3 0 1
|
||||||
// 1 1 1 -1 0
|
// 1 1 1 -1 0
|
||||||
|
@ -5,7 +5,8 @@
|
|||||||
#include "test_utils.h"
|
#include "test_utils.h"
|
||||||
|
|
||||||
#include <intel_gpu/primitives/input_layout.hpp>
|
#include <intel_gpu/primitives/input_layout.hpp>
|
||||||
#include "intel_gpu/primitives/select.hpp"
|
#include <intel_gpu/primitives/select.hpp>
|
||||||
|
#include "select_inst.h"
|
||||||
|
|
||||||
using namespace cldnn;
|
using namespace cldnn;
|
||||||
using namespace ::tests;
|
using namespace ::tests;
|
||||||
@ -2292,3 +2293,104 @@ TEST(select_gpu_fp32, select_numpy_broadcast_mask_u8_1x1x3) {
|
|||||||
ASSERT_EQ(answers[i], output_ptr[i]);
|
ASSERT_EQ(answers[i], output_ptr[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(select_gpu_f32, dynamic) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
ov::PartialShape in1_shape = { 2, 2, 2, 2 };
|
||||||
|
ov::PartialShape in2_shape = { 2, 2, 2, 2 };
|
||||||
|
ov::PartialShape mask_shape = { 2, 2, 2, 1 };
|
||||||
|
|
||||||
|
layout input1_layout { ov::PartialShape::dynamic(in1_shape.size()), data_types::f32, format::bfyx };
|
||||||
|
layout input2_layout { ov::PartialShape::dynamic(in2_shape.size()), data_types::f32, format::bfyx };
|
||||||
|
layout mask_layout { ov::PartialShape::dynamic(mask_shape.size()), data_types::f32, format::bfyx };
|
||||||
|
|
||||||
|
auto input1 = engine.allocate_memory({ in1_shape, data_types::f32, format::bfyx });
|
||||||
|
auto input2 = engine.allocate_memory({ in2_shape, data_types::f32, format::bfyx });
|
||||||
|
auto mask = engine.allocate_memory({ mask_shape, data_types::f32, format::bfyx });
|
||||||
|
|
||||||
|
set_values(input1, {
|
||||||
|
1.f, 0.f,
|
||||||
|
5.f, 1.5f,
|
||||||
|
|
||||||
|
2.f, 0.f,
|
||||||
|
6.f, 5.2f,
|
||||||
|
|
||||||
|
3.f, 0.5f,
|
||||||
|
7.f, 12.f,
|
||||||
|
|
||||||
|
4.f, -0.5f,
|
||||||
|
8.f, 8.f
|
||||||
|
});
|
||||||
|
|
||||||
|
set_values(input2, {
|
||||||
|
0.5f, 2.5f,
|
||||||
|
1.5f, 3.f,
|
||||||
|
|
||||||
|
5.f, 7.f,
|
||||||
|
2.f, 4.f,
|
||||||
|
|
||||||
|
15.f, 17.f,
|
||||||
|
8.f, 10.f,
|
||||||
|
|
||||||
|
-2.f, 6.5f,
|
||||||
|
-0.5f, -2.5f
|
||||||
|
});
|
||||||
|
|
||||||
|
set_values(mask, {
|
||||||
|
0.f,
|
||||||
|
0.f,
|
||||||
|
|
||||||
|
1.f,
|
||||||
|
1.f,
|
||||||
|
|
||||||
|
0.f,
|
||||||
|
1.f,
|
||||||
|
|
||||||
|
1.f,
|
||||||
|
0.f,
|
||||||
|
});
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input1", input1_layout));
|
||||||
|
topology.add(input_layout("input2", input2_layout));
|
||||||
|
topology.add(input_layout("mask", mask_layout));
|
||||||
|
topology.add(cldnn::select("select", input_info("mask"), input_info("input1"), input_info("input2")));
|
||||||
|
|
||||||
|
ExecutionConfig config;
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
network network(engine, topology, config);
|
||||||
|
|
||||||
|
network.set_input_data("input1", input1);
|
||||||
|
network.set_input_data("input2", input2);
|
||||||
|
network.set_input_data("mask", mask);
|
||||||
|
|
||||||
|
auto inst = network.get_primitive("select");
|
||||||
|
auto impl = inst->get_impl();
|
||||||
|
ASSERT_TRUE(impl != nullptr);
|
||||||
|
ASSERT_TRUE(impl->is_dynamic());
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
auto output = outputs.at("select").get_memory();
|
||||||
|
|
||||||
|
float answers[16] = {
|
||||||
|
0.5f, 2.5f,
|
||||||
|
1.5f, 3.f,
|
||||||
|
|
||||||
|
2.f, 0.f,
|
||||||
|
6.f, 5.2f,
|
||||||
|
|
||||||
|
15.f, 17.f,
|
||||||
|
7.f, 12.f,
|
||||||
|
|
||||||
|
4.f, -0.5f,
|
||||||
|
-0.5f, -2.5f
|
||||||
|
};
|
||||||
|
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user