[GPU] Added shape agnostic ref kernels for Select and Activation (#15016)

* [GPU] Added Select shape agnostic support

* [GPU] Added Activation shape agnostic support
This commit is contained in:
Roman Lyamin 2023-02-02 10:08:36 +04:00 committed by GitHub
parent 864b5075b7
commit 36df508baf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 351 additions and 69 deletions

View File

@ -911,7 +911,6 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
return; return;
std::vector<std::pair<program_node*, int32_t>> parents = node.get_dependencies(); std::vector<std::pair<program_node*, int32_t>> parents = node.get_dependencies();
std::list<cldnn::program_node*> users = node.get_users();
std::vector<bool> can_fuse_parents = { false, false }; std::vector<bool> can_fuse_parents = { false, false };

View File

@ -64,12 +64,37 @@ struct activation_impl : typed_primitive_impl_ocl<activation> {
return {params, optional_params}; return {params, optional_params};
} }
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
}; };
namespace detail { namespace detail {
attach_activation_impl::attach_activation_impl() { attach_activation_impl::attach_activation_impl() {
implementation_map<activation>::add(impl_types::ocl, typed_primitive_impl_ocl<activation>::create<activation_impl>, { auto dyn_types = {
data_types::f32,
data_types::f16,
data_types::i8,
data_types::u8,
data_types::i32
};
auto dyn_formats = {
format::bfyx,
format::bfzyx,
format::bfwzyx
};
implementation_map<activation>::add(impl_types::ocl,
shape_types::dynamic_shape,
typed_primitive_impl_ocl<activation>::create<activation_impl>,
dyn_types,
dyn_formats);
implementation_map<activation>::add(impl_types::ocl, shape_types::static_shape, typed_primitive_impl_ocl<activation>::create<activation_impl>, {
std::make_tuple(data_types::f32, format::yxfb), std::make_tuple(data_types::f32, format::yxfb),
std::make_tuple(data_types::f16, format::yxfb), std::make_tuple(data_types::f16, format::yxfb),
std::make_tuple(data_types::f32, format::bfyx), std::make_tuple(data_types::f32, format::bfyx),

View File

@ -168,7 +168,7 @@ public:
namespace detail { namespace detail {
attach_pooling_impl::attach_pooling_impl() { attach_pooling_impl::attach_pooling_impl() {
std::set<implementation_map<resample>::key_type> keys; std::set<implementation_map<pooling>::key_type> keys;
auto types = { data_types::f16, data_types::f32, data_types::i8, data_types::u8 }; auto types = { data_types::f16, data_types::f32, data_types::i8, data_types::u8 };
auto formats = { format::bfyx, auto formats = { format::bfyx,

View File

@ -30,15 +30,20 @@ struct select_impl : typed_primitive_impl_ocl<select> {
auto params = get_default_params<kernel_selector::select_params>(impl_param); auto params = get_default_params<kernel_selector::select_params>(impl_param);
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program()); auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
std::vector<layout> layouts = impl_param.input_layouts; std::vector<layout> input_layouts = impl_param.input_layouts;
auto o_layout = impl_param.get_output_layout(); auto o_layout = impl_param.get_output_layout();
auto broadcastable = [&](layout a, layout b) { auto broadcastable = [&](const layout& a, const layout& b) {
auto dims_a = a.get_dims(); if (a.is_dynamic() || b.is_dynamic()) {
auto dims_b = b.get_dims(); return false;
size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size(); }
for (size_t i = 0; i < min_size; i++) { auto dims_a = a.get_partial_shape();
auto dims_b = b.get_partial_shape();
size_t min_size = std::min(dims_a.size(), dims_b.size());
for (size_t i = 0; i < min_size; ++i) {
if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) { if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
return false; return false;
} }
@ -46,41 +51,61 @@ struct select_impl : typed_primitive_impl_ocl<select> {
return true; return true;
}; };
for (size_t i = 0; i < layouts.size(); i++) { for (auto& l : input_layouts) {
auto shape = layouts[i].get_shape(); auto pshape = l.get_partial_shape();
auto shape_size = shape.size(); auto rank = pshape.size();
if (shape_size < 4 && !broadcastable(o_layout, layouts[i])) {
shape.insert(shape.begin(), 4 - shape_size, 1); if (rank < 4 && !broadcastable(o_layout, l)) {
layout new_layout = layouts[i]; pshape.insert(pshape.begin(), 4 - rank, 1);
new_layout.set_partial_shape(shape); layout new_layout = l;
layouts[i] = new_layout; new_layout.set_partial_shape(pshape);
l = new_layout;
} }
} }
for (size_t i = 1; i < layouts.size(); i++) { for (size_t i = 1; i < input_layouts.size(); ++i) {
params.inputs.push_back(convert_data_tensor(layouts[i])); params.inputs.push_back(convert_data_tensor(input_layouts[i]));
} }
return {params, optional_params}; return {params, optional_params};
} }
void update_dispatch_data(const kernel_impl_params& impl_param) override {
auto kernel_params = get_kernel_params(impl_param);
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
}
}; };
namespace detail { namespace detail {
attach_select_impl::attach_select_impl() { attach_select_impl::attach_select_impl() {
implementation_map<select>::add(impl_types::ocl, typed_primitive_impl_ocl<select>::create<select_impl>, { auto types = {
std::make_tuple(data_types::f32, format::yxfb), data_types::f32,
std::make_tuple(data_types::f16, format::yxfb), data_types::f16,
std::make_tuple(data_types::i8, format::yxfb), data_types::i8,
std::make_tuple(data_types::u8, format::yxfb), data_types::u8
std::make_tuple(data_types::f32, format::bfyx), };
std::make_tuple(data_types::f16, format::bfyx),
std::make_tuple(data_types::i8, format::bfyx), auto static_formats = {
std::make_tuple(data_types::u8, format::bfyx), format::bfyx,
std::make_tuple(data_types::f32, format::byxf), format::byxf,
std::make_tuple(data_types::f16, format::byxf), format::yxfb,
std::make_tuple(data_types::i8, format::byxf), };
std::make_tuple(data_types::u8, format::byxf),
}); implementation_map<select>::add(impl_types::ocl,
shape_types::static_shape,
typed_primitive_impl_ocl<select>::create<select_impl>,
types,
static_formats);
auto dyn_formats = {
format::bfyx
};
implementation_map<select>::add(impl_types::ocl,
shape_types::dynamic_shape,
typed_primitive_impl_ocl<select>::create<select_impl>,
types,
dyn_formats);
} }
} // namespace detail } // namespace detail

View File

@ -413,7 +413,7 @@ public:
} }
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) { static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape(); ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
if (ps.size() < 4) { if (ps.size() < 4) {
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1)); ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));

View File

@ -32,6 +32,9 @@ public:
template<typename ShapeType> template<typename ShapeType>
static std::vector<layout> calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param); static std::vector<layout> calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param);
static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param); static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param);
static std::vector<size_t> extend_shape_to_6d(ov::PartialShape ps);
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
static std::string to_string(select_node const& node); static std::string to_string(select_node const& node);
typed_primitive_inst(network& network, select_node const& node); typed_primitive_inst(network& network, select_node const& node);
}; };

View File

@ -55,6 +55,23 @@ std::vector<layout> select_inst::calc_output_layouts(const select_node& /*node*/
return {{output_shapes[0], dt, format::get_default_format(output_shapes[0].size())}}; return {{output_shapes[0], dt, format::get_default_format(output_shapes[0].size())}};
} }
std::vector<size_t> select_inst::extend_shape_to_6d(ov::PartialShape ps) {
if (ps.size() < 4) {
ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
}
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
return l.transform(format::bfwzyx).to_shape();
}
std::vector<size_t> select_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
return extend_shape_to_6d(orig_impl_param.get_input_layout(input_idx).get_partial_shape());
}
std::vector<size_t> select_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
return extend_shape_to_6d(orig_impl_param.get_output_layout(output_idx).get_partial_shape());
}
std::string select_inst::to_string(select_node const& node) { std::string select_inst::to_string(select_node const& node) {
auto node_info = node.desc_to_json(); auto node_info = node.desc_to_json();
auto desc = node.get_primitive(); auto desc = node.get_primitive();

View File

@ -12,6 +12,7 @@
// TODO: move it from layout based to memory based // TODO: move it from layout based to memory based
KERNEL(activation)( KERNEL(activation)(
OPTIONAL_SHAPE_INFO_ARG
__global INPUT0_TYPE* input, __global INPUT0_TYPE* input,
__global OUTPUT_TYPE* output __global OUTPUT_TYPE* output
#if HAS_FUSED_OPS_DECLS #if HAS_FUSED_OPS_DECLS
@ -32,7 +33,7 @@ KERNEL(activation)(
const unsigned x = get_global_id(0); const unsigned x = get_global_id(0);
const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y; const uint y = (uint)get_global_id(1) % OUTPUT_SIZE_Y;
const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y; const uint z = (uint)get_global_id(1) / OUTPUT_SIZE_Y;
#if OUTPUT_BATCH_NUM == 1 #if OUTPUT_BATCH_NUM_CONST == 1
const unsigned feature = (uint)get_global_id(2); const unsigned feature = (uint)get_global_id(2);
const unsigned batch = 0; const unsigned batch = 0;
#else #else
@ -44,7 +45,7 @@ KERNEL(activation)(
const unsigned x = (uint)get_global_id(1); const unsigned x = (uint)get_global_id(1);
const unsigned y = (uint)get_global_id(2); const unsigned y = (uint)get_global_id(2);
#define z 0 #define z 0
#if OUTPUT_BATCH_NUM == 1 #if OUTPUT_BATCH_NUM_CONST == 1
const unsigned feature = (uint)get_global_id(0); const unsigned feature = (uint)get_global_id(0);
const unsigned batch = 0; const unsigned batch = 0;
#else #else
@ -60,7 +61,7 @@ KERNEL(activation)(
#define z 0 #define z 0
const unsigned x = (uint)get_global_id(0); const unsigned x = (uint)get_global_id(0);
const unsigned y = (uint)get_global_id(1); const unsigned y = (uint)get_global_id(1);
#if OUTPUT_BATCH_NUM == 1 #if OUTPUT_BATCH_NUM_CONST == 1
const unsigned feature = (uint)get_global_id(2); const unsigned feature = (uint)get_global_id(2);
const unsigned batch = 0; const unsigned batch = 0;
#else #else
@ -72,11 +73,11 @@ KERNEL(activation)(
// GWS.feature and GWS.batch is aligned to 16. Otherwise, there are some idling WIs. // GWS.feature and GWS.batch is aligned to 16. Otherwise, there are some idling WIs.
#if (defined(OUTPUT_LAYOUT_B_FS_YX_FSV16) || defined(OUTPUT_LAYOUT_B_FS_YX_FSV32)) \ #if (defined(OUTPUT_LAYOUT_B_FS_YX_FSV16) || defined(OUTPUT_LAYOUT_B_FS_YX_FSV32)) \
&& OUTPUT_FEATURE_NUM % 16 != 0 && (OUTPUT_FEATURE_NUM_CONST % 16 != 0 || IS_DYNAMIC)
if (feature >= OUTPUT_FEATURE_NUM) if (feature >= OUTPUT_FEATURE_NUM)
return; return;
#elif (defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV16) || defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32)) \ #elif (defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV16) || defined(OUTPUT_LAYOUT_BS_FS_YX_BSV32_FSV32)) \
&& (OUTPUT_FEATURE_NUM % 16 != 0 || OUTPUT_BATCH_NUM % 16 != 0) && (OUTPUT_FEATURE_NUM_CONST % 16 != 0 || OUTPUT_BATCH_NUM_CONST % 16 != 0 || IS_DYNAMIC)
if (batch >= OUTPUT_BATCH_NUM || feature >= OUTPUT_FEATURE_NUM) if (batch >= OUTPUT_BATCH_NUM || feature >= OUTPUT_FEATURE_NUM)
return; return;
#endif #endif

View File

@ -4,18 +4,23 @@
#include "include/batch_headers/fetch_data.cl" #include "include/batch_headers/fetch_data.cl"
#define GET_INDEX(prefix, num) \ #ifdef IS_DYNAMIC
CAT(CAT(prefix, num), _OFFSET) + \ #define GET_INDEX(prefix) GET_DATA_INDEX_SAFE(prefix, d4, d3, d2, d1)
(d1 % CAT(CAT(prefix, num), _SIZES)[0])*CAT(CAT(prefix, num), _PITCHES)[0] + \ #else
(d2 % CAT(CAT(prefix, num), _SIZES)[1])*CAT(CAT(prefix, num), _PITCHES)[1] + \ #define GET_INDEX(prefix) \
(d3 % CAT(CAT(prefix, num), _SIZES)[2])*CAT(CAT(prefix, num), _PITCHES)[2] + \ CAT(prefix, _OFFSET) + \
(d4 % CAT(CAT(prefix, num), _SIZES)[3])*CAT(CAT(prefix, num), _PITCHES)[3] (d1 % CAT(prefix, _SIZES)[0])*CAT(prefix, _PITCHES)[0] + \
(d2 % CAT(prefix, _SIZES)[1])*CAT(prefix, _PITCHES)[1] + \
(d3 % CAT(prefix, _SIZES)[2])*CAT(prefix, _PITCHES)[2] + \
(d4 % CAT(prefix, _SIZES)[3])*CAT(prefix, _PITCHES)[3]
#endif
#define INPUT_0 input0[GET_INDEX(INPUT, 0)] #define INPUT_0 input0[GET_INDEX(INPUT0)]
#define INPUT_1 input1[GET_INDEX(INPUT, 1)] #define INPUT_1 input1[GET_INDEX(INPUT1)]
#define INPUT_2 input2[GET_INDEX(INPUT, 2)] #define INPUT_2 input2[GET_INDEX(INPUT2)]
KERNEL(select)( KERNEL(select)(
OPTIONAL_SHAPE_INFO_ARG
INPUTS_DECLS INPUTS_DECLS
__global OUTPUT_TYPE* output) __global OUTPUT_TYPE* output)
{ {
@ -24,14 +29,19 @@ const uint d1 = (uint) get_global_id(0);
const uint d2 = (uint) get_global_id(1); const uint d2 = (uint) get_global_id(1);
const uint d34 = (uint) get_global_id(2); const uint d34 = (uint) get_global_id(2);
const uint d3 = d34 % OUTPUT_SIZES[2]; #ifdef IS_DYNAMIC
const uint d4 = d34 / OUTPUT_SIZES[2]; const uint d3 = d34 % OUTPUT_FEATURE_NUM;
const uint d4 = d34 / OUTPUT_FEATURE_NUM;
#else
const uint d3 = d34 % OUTPUT_SIZES[2];
const uint d4 = d34 / OUTPUT_SIZES[2];
#endif
uint output_offset = OUTPUT_OFFSET + #ifdef IS_DYNAMIC
d1*OUTPUT_PITCHES[0] + uint output_offset = OUTPUT_GET_INDEX(d4, d3, d2, d1);
d2*OUTPUT_PITCHES[1] + #else
d3*OUTPUT_PITCHES[2] + uint output_offset = GET_DATA_INDEX_RAW(OUTPUT, d1, d2, d3, d4);
d4*OUTPUT_PITCHES[3]; #endif
const OUTPUT_TYPE res = select(INPUT_2, INPUT_1, MASK); const OUTPUT_TYPE res = select(INPUT_2, INPUT_1, MASK);

View File

@ -54,6 +54,14 @@ JitConstants ActivationKernelBase::GetJitConstants(const activation_params& para
}); });
} }
if (params.has_dynamic_outputs()) {
jit.AddConstant(MakeJitConstant("OUTPUT_BATCH_NUM_CONST", 0));
jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_CONST", 0));
} else {
jit.AddConstant(MakeJitConstant("OUTPUT_BATCH_NUM_CONST", params.outputs[0].Batch().v));
jit.AddConstant(MakeJitConstant("OUTPUT_FEATURE_NUM_CONST", params.outputs[0].Feature().v));
}
return jit; return jit;
} }
@ -78,7 +86,6 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
} }
KernelData kd = KernelData::Default<activation_params>(params); KernelData kd = KernelData::Default<activation_params>(params);
activation_params& newParams = *static_cast<activation_params*>(kd.params.get()); activation_params& newParams = *static_cast<activation_params*>(kd.params.get());
auto dispatchData = SetDefault(newParams); auto dispatchData = SetDefault(newParams);
@ -86,9 +93,18 @@ KernelsData ActivationKernelBase::GetCommonKernelsData(const Params& params, con
auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options); auto entry_point = GetEntryPoint(kernelName, newParams.layerID, params, options);
auto jit = CreateJit(kernelName, cldnn_jit, entry_point); auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const activation_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
};
auto& kernel = kd.kernels[0]; auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point, FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
EXE_MODE_DEFAULT, false, false, 1, GetFusedPrimitiveInputsCount(params)); EXE_MODE_DEFAULT, false, false, 1,
GetFusedPrimitiveInputsCount(params), 1, newParams.outputs[0].is_dynamic());
if (!newParams.inputActivationParams.empty()) { if (!newParams.inputActivationParams.empty()) {
kernel.params.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0}); kernel.params.arguments.push_back({ArgumentDescriptor::Types::SLOPE, 0});

View File

@ -27,6 +27,7 @@ ParamsKey ActivationKernelRef::GetSupportedKey() const {
k.EnableTensorOffset(); k.EnableTensorOffset();
k.EnableTensorPitches(); k.EnableTensorPitches();
k.EnableBatching(); k.EnableBatching();
k.EnableDynamicShapesSupport();
return k; return k;
} }

View File

@ -129,13 +129,21 @@ KernelsData SelectKernelBase::GetCommonKernelsData(const Params& params, const o
DispatchData dispatchData = SetDefault(newParams); DispatchData dispatchData = SetDefault(newParams);
kd.update_dispatch_data_func = [this](const Params& params, KernelData& kd) {
const auto& prim_params = static_cast<const select_params&>(params);
auto dispatchData = SetDefault(prim_params);
OPENVINO_ASSERT(kd.kernels.size() == 1, "[GPU] Invalid kernels size for update dispatch data func");
kd.kernels[0].params.workGroups.global = dispatchData.gws;
kd.kernels[0].params.workGroups.local = dispatchData.lws;
};
auto& kernel = kd.kernels[0]; auto& kernel = kd.kernels[0];
FillCLKernelData(kernel, dispatchData, params.engineInfo, kernelName, jit, entry_point,
kernel.params.workGroups.global = dispatchData.gws; "", false, false,
kernel.params.workGroups.local = dispatchData.lws; (uint32_t)newParams.inputs.size(),
0,
kernel.code.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, EXE_MODE_DEFAULT); 1,
kernel.params.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false); newParams.outputs[0].is_dynamic());
return {kd}; return {kd};
} }

View File

@ -36,6 +36,7 @@ ParamsKey SelectKernelRef::GetSupportedKey() const {
k.EnableTensorPitches(); k.EnableTensorPitches();
k.EnableTensorOffset(); k.EnableTensorOffset();
k.EnableDifferentTypes(); k.EnableDifferentTypes();
k.EnableDynamicShapesSupport();
return k; return k;
} }

View File

@ -23,9 +23,7 @@ static void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1
auto reorderPrim = cldnn::reorder(layerName, auto reorderPrim = cldnn::reorder(layerName,
inputs[0], inputs[0],
cldnn::format::any, cldnn::format::any,
outDataType, outDataType);
std::vector<float>(),
cldnn::reorder_mean_mode::subtract);
p.add_primitive(*op, reorderPrim); p.add_primitive(*op, reorderPrim);
} }

View File

@ -177,7 +177,7 @@ class fc_fp32_activation_dynamic : public FullyConnectedFusingTest {};
TEST_P(fc_fp32_activation_dynamic, basic) { TEST_P(fc_fp32_activation_dynamic, basic) {
auto p = GetParam(); auto p = GetParam();
auto test_input_layout = get_input_layout(p); auto test_input_layout = get_input_layout(p);
auto dynamic_input_layout = layout{ov::PartialShape::dynamic(test_input_layout.get_rank()), test_input_layout.data_type, test_input_layout.format}; auto dynamic_input_layout = layout{ov::PartialShape::dynamic(test_input_layout.get_partial_shape().size()), test_input_layout.data_type, test_input_layout.format};
create_topologies( create_topologies(
input_layout("input", dynamic_input_layout), input_layout("input", dynamic_input_layout),
data("weights", get_mem(get_weights_layout(p))), data("weights", get_mem(get_weights_layout(p))),

View File

@ -8,6 +8,7 @@
#include <intel_gpu/primitives/activation.hpp> #include <intel_gpu/primitives/activation.hpp>
#include <intel_gpu/primitives/data.hpp> #include <intel_gpu/primitives/data.hpp>
#include <intel_gpu/primitives/reorder.hpp> #include <intel_gpu/primitives/reorder.hpp>
#include "activation_inst.h"
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
@ -15,9 +16,84 @@
using namespace cldnn; using namespace cldnn;
using namespace ::tests; using namespace ::tests;
TEST(activation_f32_fw_gpu, dynamic) {
auto& engine = get_test_engine();
ov::PartialShape in_shape = { 1, 1, 4, 2 };
layout in_layout { ov::PartialShape::dynamic(in_shape.size()), data_types::f32, format::bfyx };
auto input = engine.allocate_memory({ in_shape, data_types::f32, format::bfyx });
set_values(input, { -0.12f, 0.56f, 0.45f, -0.789f, 42.f, 0.999f, 0.7899f, 0.f});
std::vector<activation_func> funcs = {
activation_func::gelu,
activation_func::relu,
activation_func::hyperbolic_tan,
activation_func::sqrt
};
for (auto func : funcs) {
topology topology(input_layout("input", in_layout));
topology.add(activation("activation", input_info("input"), func));
ExecutionConfig config;
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input);
auto inst = network.get_primitive("activation");
auto impl = inst->get_impl();
ASSERT_TRUE(impl != nullptr);
ASSERT_TRUE(impl->is_dynamic());
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "activation");
auto output_memory = outputs.at("activation").get_memory();
auto output_layout = output_memory->get_layout();
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
cldnn::mem_lock<float> input_ptr(input, get_test_stream());
int y_size = output_layout.spatial(1);
int x_size = output_layout.spatial(0);
int f_size = output_layout.feature();
int b_size = output_layout.batch();
ASSERT_EQ(output_layout.format, format::bfyx);
ASSERT_EQ(y_size, 4);
ASSERT_EQ(x_size, 2);
ASSERT_EQ(f_size, 1);
ASSERT_EQ(b_size, 1);
for (size_t i = 0; i < output_layout.get_linear_size(); ++i) {
switch (func) {
case activation_func::gelu:
ASSERT_NEAR(0.5f * static_cast<float>(input_ptr[i]) * (1.f + std::erf(static_cast<float>((input_ptr[i])) / std::sqrt(2.0f))),
output_ptr[i], 1e-5f);
break;
case activation_func::relu:
ASSERT_EQ(std::max(input_ptr[i], static_cast<float>(0)), output_ptr[i]);
break;
case activation_func::hyperbolic_tan:
ASSERT_FLOAT_EQ(std::tanh(static_cast<float>(input_ptr[i])), output_ptr[i]);
break;
case activation_func::sqrt:
if (input_ptr[i] >= 0) {
ASSERT_FLOAT_EQ(std::sqrt(static_cast<float>(input_ptr[i])), output_ptr[i]);
}
break;
default:
break;
}
}
}
}
TEST(activation_f32_fw_gpu, not_basic_yxfb) { TEST(activation_f32_fw_gpu, not_basic_yxfb) {
// Input: // Input:
// 1 0 -3 4 5 // 1 0 -3 4 5
// 0 2 3 4 -6 // 0 2 3 4 -6
// 3 -3 3 0 1 // 3 -3 3 0 1
// 1 1 1 -1 0 // 1 1 1 -1 0

View File

@ -5,7 +5,8 @@
#include "test_utils.h" #include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp> #include <intel_gpu/primitives/input_layout.hpp>
#include "intel_gpu/primitives/select.hpp" #include <intel_gpu/primitives/select.hpp>
#include "select_inst.h"
using namespace cldnn; using namespace cldnn;
using namespace ::tests; using namespace ::tests;
@ -2292,3 +2293,104 @@ TEST(select_gpu_fp32, select_numpy_broadcast_mask_u8_1x1x3) {
ASSERT_EQ(answers[i], output_ptr[i]); ASSERT_EQ(answers[i], output_ptr[i]);
} }
} }
TEST(select_gpu_f32, dynamic) {
auto& engine = get_test_engine();
ov::PartialShape in1_shape = { 2, 2, 2, 2 };
ov::PartialShape in2_shape = { 2, 2, 2, 2 };
ov::PartialShape mask_shape = { 2, 2, 2, 1 };
layout input1_layout { ov::PartialShape::dynamic(in1_shape.size()), data_types::f32, format::bfyx };
layout input2_layout { ov::PartialShape::dynamic(in2_shape.size()), data_types::f32, format::bfyx };
layout mask_layout { ov::PartialShape::dynamic(mask_shape.size()), data_types::f32, format::bfyx };
auto input1 = engine.allocate_memory({ in1_shape, data_types::f32, format::bfyx });
auto input2 = engine.allocate_memory({ in2_shape, data_types::f32, format::bfyx });
auto mask = engine.allocate_memory({ mask_shape, data_types::f32, format::bfyx });
set_values(input1, {
1.f, 0.f,
5.f, 1.5f,
2.f, 0.f,
6.f, 5.2f,
3.f, 0.5f,
7.f, 12.f,
4.f, -0.5f,
8.f, 8.f
});
set_values(input2, {
0.5f, 2.5f,
1.5f, 3.f,
5.f, 7.f,
2.f, 4.f,
15.f, 17.f,
8.f, 10.f,
-2.f, 6.5f,
-0.5f, -2.5f
});
set_values(mask, {
0.f,
0.f,
1.f,
1.f,
0.f,
1.f,
1.f,
0.f,
});
topology topology;
topology.add(input_layout("input1", input1_layout));
topology.add(input_layout("input2", input2_layout));
topology.add(input_layout("mask", mask_layout));
topology.add(cldnn::select("select", input_info("mask"), input_info("input1"), input_info("input2")));
ExecutionConfig config;
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input1", input1);
network.set_input_data("input2", input2);
network.set_input_data("mask", mask);
auto inst = network.get_primitive("select");
auto impl = inst->get_impl();
ASSERT_TRUE(impl != nullptr);
ASSERT_TRUE(impl->is_dynamic());
auto outputs = network.execute();
auto output = outputs.at("select").get_memory();
float answers[16] = {
0.5f, 2.5f,
1.5f, 3.f,
2.f, 0.f,
6.f, 5.2f,
15.f, 17.f,
7.f, 12.f,
4.f, -0.5f,
-0.5f, -2.5f
};
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (int i = 0; i < 16; i++) {
ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}