[GPU] Dynamic FC unit test and several fixes (#12563)

This commit is contained in:
Vladimir Paramuzov 2022-08-31 18:01:35 +04:00 committed by GitHub
parent 5a8f5b630b
commit 382028e9c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 440 additions and 182 deletions

View File

@ -101,6 +101,12 @@ layout fully_connected_inst::calc_output_layout(fully_connected_node const& node
output_type = impl_param.get_fused_output_layout().data_type;
}
if (input_layout.is_dynamic()) {
auto rank = input_layout.get_rank();
format output_format = format::get_default_format(rank);
return layout(ov::PartialShape::dynamic(rank), output_type, output_format);
}
auto output_size = tensor(input_layout.batch(), weights_layout.batch(), 1, 1);
if (desc->input_size == 3) {
output_size = tensor(input_layout.batch(), input_layout.feature(), 1, weights_layout.batch());

View File

@ -108,9 +108,9 @@ void add_required_reorders::run(program& p) {
ToDo: Here we should handle also the situation where primitive usr has data inputs in different
formats
*/
layout current_layout(original_layout.data_type,
node->get_output_layout().format,
original_layout.get_tensor());
layout current_layout(original_layout.get_partial_shape(),
original_layout.data_type,
node->get_output_layout().format);
usr->set_output_layout(current_layout, false);
if (usr->type()->does_possible_implementation_exist(*usr)) {
correct_layout_selected = true;

View File

@ -57,9 +57,9 @@ void post_input_reorder::run(program& p) {
if (input_layout.format != layout_format) {
auto previous_layout = node->get_output_layout();
layout current_layout(input_layout.data_type,
layout current_layout(input_layout.get_partial_shape(),
input_layout.data_type,
layout_format,
input_layout.get_tensor(),
input_layout.data_padding);
auto& reorder = add_reorder(p, input, node, current_layout);
reorder.set_unique_id();

View File

@ -540,9 +540,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
continue;
auto is_grouped_conv = [](convolution_node& node) -> bool {
auto in_size = node.get_dependency(0).get_output_layout().get_tensor();
return (node.get_split() > 1 && node.get_split() != in_size.feature[0]) ||
(node.get_groups() > 1 && node.get_groups() != static_cast<uint32_t>(in_size.feature[0]));
auto in_layout = node.get_dependency(0).get_output_layout();
return (node.get_split() > 1 && node.get_split() != in_layout.feature()) ||
(node.get_groups() > 1 && node.get_groups() != static_cast<uint32_t>(in_layout.feature()));
};
auto conv_supports_fusings = [&](convolution_node& node) -> bool {

View File

@ -556,7 +556,7 @@ void remove_redundant_reorders::run(program& p) {
bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() &&
reshape_input_node.get_fused_activations_funcs().empty() && reshape_input_node.get_fused_primitives().empty();
bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() &&
reshape_input_node.get_dependency(0).get_output_layout().get_tensor() == reshape_node.get_output_layout().get_tensor() &&
reshape_input_node.get_dependency(0).get_output_layout() == reshape_node.get_output_layout() &&
reshape_node.get_fused_activations_funcs().empty() && reshape_node.get_fused_primitives().empty();
if (remove_dep) {

View File

@ -274,7 +274,9 @@ reorder_cnt count_reorders_in_dir(const std::map<program_node*, format::type>& f
travel_direction_wrapper<dir>::first(sel_fmt, next_fmt),
travel_direction_wrapper<dir>::second(sel_fmt, next_fmt)))) {
cnt += 1;
size += travel_direction_wrapper<dir>::first(node, next)->get_output_layout().count();
auto l = travel_direction_wrapper<dir>::first(node, next)->get_output_layout();
if (l.is_static())
size += l.count();
}
}
@ -780,7 +782,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
}
// Change input data of fully-connected node from bx to bf
if (format::is_simple_data_format(input_layout.format) && weights.is_constant() && input_layout.format.dimension() == 4 &&
if (input_layout.is_static() && format::is_simple_data_format(input_layout.format) && weights.is_constant() && input_layout.format.dimension() == 4 &&
input_layout.feature() == 1 && input_layout.spatial(0) != 1 && input_layout.spatial(1) == 1) {
auto new_tensor = input_layout.get_tensor();
new_tensor.feature[0] = input_layout.spatial(0);

View File

@ -19,7 +19,7 @@ void strided_slice_optimize::run(program& p) {
auto node_itr = p.get_processing_order().begin();
while (node_itr != p.get_processing_order().end()) {
auto& node = (*node_itr++);
if (node->is_type<strided_slice>()) {
if (node->is_type<strided_slice>() && node->get_output_layout().is_static()) {
auto& strided_slice_node = node->as<strided_slice>();
auto& new_axis_mask = strided_slice_node.get_primitive()->new_axis_mask;

View File

@ -15,7 +15,8 @@ namespace common {
class wait_for_events_impl : public primitive_impl {
public:
explicit wait_for_events_impl(const program_node& /*node*/) {}
explicit wait_for_events_impl(const program_node& /*node*/)
: primitive_impl(kernel_selector::weights_reorder_params{}, "wait_for_events") { }
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<wait_for_events_impl>(*this);

View File

@ -33,7 +33,7 @@ public:
}
static std::string to_string(input_layout_node const& node);
public:
void update_shape() override;
typed_primitive_inst(network& network, input_layout_node const& node);
void set_data(memory::ptr mem);

View File

@ -157,7 +157,7 @@ struct kernel_impl_params {
layout get_non_padded_input_layout(size_t idx = 0) const {
auto input_layout = get_input_layout(idx);
auto result = layout({input_layout.data_type, input_layout.format, input_layout.get_tensor()});
auto result = layout({input_layout.get_partial_shape(), input_layout.data_type, input_layout.format});
return result;
}
@ -218,7 +218,7 @@ inline params_t get_default_params(const kernel_impl_params& param_info, uint32_
set_params(param_info, params);
const auto& input_layout = param_info.input_layouts[0];
const auto& input_layout = param_info.get_input_layout(0);
const auto& output_layout = param_info.output_layout;
params.inputs[0] = convert_data_tensor(input_layout, split);
@ -244,7 +244,7 @@ inline params_t get_default_params(const kernel_impl_params& param_info, uint32_
prim_id_type_map[fused_prim.desc->id] = std::make_pair(desc.op_id, desc.output_tensor.GetDType());
for (size_t i = desc.dep_idx_start; i < desc.dep_idx_start + desc.dep_size; i++) {
desc.tensors.push_back(convert_data_tensor(param_info.input_layouts[i]));
desc.tensors.push_back(convert_data_tensor(param_info.get_input_layout(i)));
}
if (fused_prim.total_num_deps > 0) {

View File

@ -156,7 +156,7 @@ public:
}
bool is_dynamic() const {
return _node.is_dynamic() || _node.generates_dynamic_output();
return _is_dynamic;
}
void allocate_internal_buffers();
@ -200,6 +200,7 @@ protected:
true; // by default all primitives has valid inputs, exception is input_layout (see input_layout_inst)
bool _has_mutable_input = false;
bool _mem_allocated = false;
bool _is_dynamic = false;
size_t max_output_layout_size = 0;

View File

@ -145,7 +145,7 @@ public:
auto params = std::unique_ptr<kernel_impl_params>(new kernel_impl_params(get_program(), get_primitive(), get_unique_id(), in_layouts, out_layout,
get_fused_primitives(),
get_fused_activations_funcs(), get_fused_activations_params()));
params->memory_deps = get_const_memory_deps();
return params;
}

View File

@ -36,7 +36,7 @@ input_layout_node::typed_program_node(const std::shared_ptr<input_layout> dprim,
}
input_layout_inst::typed_primitive_inst(network& network, input_layout_node const& node)
: parent(network, node, !network.is_internal() || has_optimized_users(node)) {
: parent(network, node, !node.is_dynamic() && (!network.is_internal() || has_optimized_users(node))) {
_has_valid_input = false; // by default input for 'input_layout' is invalid as long as user doesn't call set_data
}
@ -57,6 +57,15 @@ void input_layout_inst::set_data(memory::ptr mem) {
_output_changed = true;
}
void input_layout_inst::update_shape() {
OPENVINO_ASSERT(_output != nullptr, "[GPU] input memory is not set");
auto mem_layout = _output->get_layout();
if (_impl_params->output_layout != mem_layout) {
set_shape_change();
}
_impl_params->output_layout = mem_layout;
}
std::string input_layout_inst::to_string(input_layout_node const& node) {
auto node_info = node.desc_to_json();

View File

@ -320,11 +320,12 @@ void network::validate_primitives() {
}
void network::set_arguments() {
if (!_reset_arguments || is_dynamic())
if (!_reset_arguments)
return;
for (auto const& prim : _exec_order) {
prim->set_arguments();
if (!prim->is_dynamic())
prim->set_arguments();
}
_reset_arguments = false;
}

View File

@ -11,6 +11,7 @@
#include "fully_connected_inst.h"
#include "convolution_inst.h"
#include "deconvolution_inst.h"
#include "shape_of_inst.h"
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
#include "intel_gpu/graph/network.hpp"
@ -148,6 +149,13 @@ void primitive_inst::update_shape() {
}
}
if (input_shape_changed)
set_shape_change();
// We assume that tensor ranks are static, thus shape_of doesn't need to update anything even if input shape is dynamic
if (_node.is_type<shape_of>())
return;
if (!input_shape_changed && !_node.generates_dynamic_output() && _impl_params->output_layout.is_static())
return;
@ -173,7 +181,8 @@ void primitive_inst::update_shape() {
_network.get_stream().wait_for_events(dependencies_events);
_impl_params->memory_deps = memory_deps;
layout new_layout = _node.type()->calc_output_layout(_node, *_impl_params);
auto new_layouts = _node.type()->calc_output_layouts(_node, *_impl_params);
auto new_layout = new_layouts.empty() ? _node.type()->calc_output_layout(_node, *_impl_params) : new_layouts[0];
new_layout.data_padding = padding::max(_node.get_primitive()->output_padding, new_layout.data_padding);
if (_impl_params->output_layout != new_layout) {
@ -192,17 +201,26 @@ void primitive_inst::realloc_if_needed() {
auto actual_layout = _impl_params->output_layout;
OPENVINO_ASSERT(actual_layout.is_static(), "[GPU] Can't realloc mem for dynamic layout");
if (!_output
|| ((_output->get_layout().count() < actual_layout.count())
&& (max_output_layout_size < actual_layout.count()))) {
// input_layout node is supposed to always use external memory in dynamic case
if (_node.is_type<input_layout>())
return;
bool can_reuse_buffer = _output && actual_layout.count() <= max_output_layout_size;
if (can_reuse_buffer) {
GPU_DEBUG_IF(debug_config->verbose >= 4) {
GPU_DEBUG_COUT << id() << ": realloc output memory" << std::endl;
GPU_DEBUG_COUT << id() << ": reuse previously allocated output buffer" << std::endl;
}
_output = _network.get_engine().reinterpret_buffer(*_output, actual_layout);
} else {
GPU_DEBUG_IF(debug_config->verbose >= 4) {
GPU_DEBUG_COUT << id() << ": realloc output memory. "
<< " Current buffer_size=" << max_output_layout_size
<< " Requested buffer_size=" << actual_layout.count() << std::endl;
}
_output = allocate_output();
} else {
_output = _network.get_engine().reinterpret_buffer(*_output, actual_layout);
max_output_layout_size = _output->get_layout().count();
}
max_output_layout_size = std::max(_output->get_layout().count(), max_output_layout_size);
}
void primitive_inst::update_impl() {
@ -214,10 +232,8 @@ void primitive_inst::update_impl() {
layout_key_str = id() + "_" + std::to_string(_node.get_unique_id());
layout_key_str += "_" + _impl_params->output_layout.to_string();
for (auto in : _node.get_dependencies()) {
if (!in->is_constant()) {
layout_key_str += "_" + in->get_output_layout().to_string();
}
for (auto in : _impl_params->input_layouts) {
layout_key_str += "_" + in.to_string();
}
}
return layout_key_str;
@ -301,12 +317,12 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
<< out_ptr << ")" << std::endl;
}
if (_exec_deps.empty())
if (_exec_deps.empty() && dependencies.empty())
return _impl->execute(events, *this);
auto queue_type = get_network().get_stream().get_queue_type();
if (queue_type == queue_types::out_of_order) {
dependencies.reserve(_exec_deps.size());
dependencies.reserve(dependencies.size() + _exec_deps.size());
for (auto& input : _exec_deps) {
auto id = input->id();
try {
@ -315,9 +331,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
auto ev = get_network().get_primitive_event(id);
dependencies.emplace_back(ev);
} catch (const std::out_of_range& oor) {
std::string temp = std::string("internal CLDNN error: execution order corrupted.") + std::string("\n") +
std::string(oor.what() + std::string("\n"));
CLDNN_ERROR_MESSAGE(id, temp);
OPENVINO_ASSERT(false, "[GPU] execution order corrupted: ", oor.what());
}
}
}
@ -343,7 +357,8 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
, _impl(node.get_selected_impl() ? node.get_selected_impl()->clone() : nullptr)
, _output()
, _output_changed(false)
, _mem_allocated(allocate_memory) {
, _mem_allocated(allocate_memory)
, _is_dynamic(_node.is_dynamic() || _node.generates_dynamic_output()) {
if (allocate_memory) {
// In case when output is mutable_data primitive, and other users dependencies are only used for
// suychronization, The output memory of such primitive will be fused with mutable_data
@ -435,7 +450,6 @@ event::ptr primitive_inst::update_weights() {
if (!weightable_node)
return nullptr;
GPU_DEBUG_GET_INSTANCE(debug_config);
auto& weights_params = _impl->_weights_reorder_params;
@ -480,7 +494,12 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
auto layout = impl_params.output_layout;
OPENVINO_ASSERT(layout.is_static(), "[GPU] Can't allocate output for dynamic layout");
auto device_mem_acc = [&](size_t a, const cldnn::layout& l) {
return a + l.bytes_count();
// Input shape may be dynamic is some cases (shape_of). It means that output shape of node doesn't depend on input shape
// and out memory can be allocated on program build stage.
if (l.is_static())
return a + l.bytes_count();
return a;
};
bool usm_device_allocatable = true;
@ -564,7 +583,7 @@ std::vector<std::shared_ptr<primitive_inst>> primitive_inst::build_exec_deps(
std::vector<std::shared_ptr<primitive_inst>> exec_deps;
exec_deps.reserve(deps.size());
for (auto& dep : deps)
if (dep->get_impl() != nullptr)
if (dep->get_impl() != nullptr || dep->is_dynamic())
exec_deps.push_back(dep);
return exec_deps;

View File

@ -96,7 +96,7 @@ std::vector<layout> reshape_inst::calc_output_layouts(reshape_node const& /*node
}
};
if (!memory_deps.empty()) {
if (memory_deps.count(1) > 0) {
auto pattern_mem = memory_deps.at(1);
cldnn::mem_lock<uint8_t, mem_lock_type::read> pattern_lock(pattern_mem, impl_param.prog.get_stream());

View File

@ -47,5 +47,5 @@ std::string shape_of_inst::to_string(shape_of_node const& node) {
return primitive_description.str();
}
shape_of_inst::typed_primitive_inst(network& network, shape_of_node const& node) : parent(network, node) { }
shape_of_inst::typed_primitive_inst(network& network, shape_of_node const& node) : parent(network, node, true) { }
} // namespace cldnn

View File

@ -20,6 +20,10 @@ namespace {
std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2) {
const auto& l1_pad = l1.data_padding;
const auto& l2_pad = l2.data_padding;
if (l1.is_dynamic() || l2.is_dynamic())
return {false, false};
auto l1_size = l1.get_tensor();
auto l2_size = l2.get_tensor();
int64_t offset_last_element_l1 = l1.get_linear_offset(l1_size - tensor{1});

View File

@ -19,13 +19,9 @@ using namespace ::tests;
namespace {
struct fully_connected_test_params {
tensor in_shape;
tensor out_shape;
tensor kernel;
tensor stride;
tensor pad;
tensor dilation;
uint32_t groups;
ov::PartialShape in_shape;
ov::PartialShape out_shape;
ov::PartialShape weights_shape;
data_types data_type;
format input_format;
data_types weights_type;
@ -50,45 +46,24 @@ public:
}
layout get_input_layout(fully_connected_test_params& p) {
auto pad = p.pad;
std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
return layout{ p.in_shape, p.data_type, p.input_format,};
}
layout get_per_channel_layout(fully_connected_test_params& p) {
return layout{ p.default_type, p.default_format, tensor{ 1, p.out_shape.feature[0], 1, 1 } };
return layout{ ov::PartialShape{1, p.out_shape[1]}, p.default_type, p.default_format };
}
size_t get_output_dim_size(fully_connected_test_params& p) {
size_t size = 2;
for (auto i : p.out_shape.spatial) {
if (i > 1)
size++;
}
return size;
return p.out_shape.size();
}
layout get_weights_layout(fully_connected_test_params& p) {
cldnn::tensor weights_tensor;
if (p.out_shape.spatial[1] > 1) {
// 3d case
weights_tensor = cldnn::tensor(p.kernel.batch[0], p.kernel.feature[0], 1, 1);
}
else {
weights_tensor = cldnn::tensor(batch(p.out_shape.feature[0]), feature(p.in_shape.feature[0]),
spatial(p.kernel.spatial[0], p.kernel.spatial[1], p.kernel.spatial[2]));
}
return layout{ p.weights_type, p.weights_format, weights_tensor };
return layout{ p.weights_shape, p.weights_type, p.weights_format };
}
layout get_bias_layout(fully_connected_test_params& p) {
if (p.out_shape.spatial[1] > 1) {
// 3d case
return layout{ p.default_type, format::bfyx, tensor{ 1, 1, 1, p.out_shape.spatial[1] } };
}
else {
return layout{ p.default_type, format::bfyx, tensor{ 1, p.out_shape.feature[0], 1, 1 } };
}
auto bias_shape = p.out_shape.size() == 3 ? ov::PartialShape{1, 1, p.out_shape[2]} : ov::PartialShape{1, p.out_shape[1]};
return layout{ bias_shape, p.default_type, p.default_format };
}
};
@ -103,75 +78,57 @@ public:
auto input_prim = p.data_type == data_types::u8 ? get_mem(get_input_layout(p), 0, 10) : get_mem(get_input_layout(p));
auto impl_forcing_bo = bo_fused.get<build_option_type::force_implementations>();
const auto& impl_forcing = impl_forcing_bo->forcing;
auto forcing_format = p.input_format;
for (auto& forcing : impl_forcing) {
if (forcing.first == "conv_prim") {
forcing_format = forcing.second.output_format;
}
}
implementation_desc conv_impl = { forcing_format, "", impl_types::onednn };
bo_fused.set_option(build_option::force_implementations({ { "conv_prim", conv_impl } }));
network network_not_fused(this->engine, this->topology_non_fused, bo_not_fused);
network network_fused(this->engine, this->topology_fused, bo_fused);
network_fused.set_input_data("input", input_prim);
network_not_fused.set_input_data("input", input_prim);
compare(network_not_fused, network_fused, p);
auto find_conv = [](primitive_info& p) -> bool {
if (p.original_id == "conv_prim")
return true;
return false;
};
auto pi_fused = network_fused.get_primitives_info();
auto info_fused = std::find_if(pi_fused.begin(), pi_fused.end(), find_conv);
if (info_fused != pi_fused.end())
std::cout << "kernel: " << info_fused->kernel_id << std::endl;
}
layout get_input_layout(fully_connected_test_params& p) {
auto pad = p.pad;
std::vector<int> pad_ = { 0, 0, pad.spatial[0], pad.spatial[1] };
return layout{ p.data_type, p.input_format, p.in_shape, padding{ pad_ } };
return layout{ p.in_shape, p.data_type, p.input_format,};
}
layout get_per_channel_layout(fully_connected_test_params& p) {
return layout{ p.default_type, p.default_format, tensor{1, p.out_shape.feature[0], 1, 1} };
return layout{ ov::PartialShape{1, p.out_shape[1]}, p.default_type, p.default_format };
}
size_t get_output_dim_size(fully_connected_test_params& p) {
size_t size = 2;
for (auto i : p.out_shape.spatial) {
if (i > 1)
size++;
}
return size;
return p.out_shape.size();
}
layout get_weights_layout(fully_connected_test_params& p) {
return layout{ p.weights_shape, p.weights_type, p.weights_format };
}
layout get_bias_layout(fully_connected_test_params& p) {
return get_per_channel_layout(p);
}
layout get_output_layout(fully_connected_test_params& p) {
return layout{ p.out_shape, p.data_type, p.input_format };
}
};
#endif // ENABLE_ONEDNN_FOR_GPU
} // namespace
// in_shape; out_shape; kernel; stride; pad; dilation; groups; data_type; input_format; weights_type; weights_format; default_type; default_format;
#define CASE_FC_FP32_1 { 1, 1, 3, 1 }, { 1, 4, 1, 1 }, { 4, 1, 3, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP32_2 { 2, 1, 3, 1 }, { 2, 4, 1, 1 }, { 4, 1, 3, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::yxfb, data_types::f32, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP32_3 { 2, 32, 1, 1 }, { 2, 16, 1, 1 }, { 16, 32, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP32_3D_1 { 5, 3, 1, 3 }, { 5, 3, 1, 5 }, { 5, 3, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_FC_FP32_3D_2 { 2, 1, 1, 1 }, { 2, 1, 1, 32 }, { 32, 1, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_FC_FP32_3D_3 { 2, 32, 1, 32 }, { 2, 32, 1, 16 }, { 16, 32, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
// in_shape; out_shape; kernel; data_type; input_format; weights_type; weights_format; default_type; default_format;
#define CASE_FC_FP32_1 { 1, 3 }, { 1, 4 }, { 4, 3 }, data_types::f32, format::bfyx, data_types::f32, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP32_2 { 2, 3 }, { 2, 4 }, { 4, 3 }, data_types::f32, format::yxfb, data_types::f32, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP32_3 { 2, 32 }, { 2, 16 }, { 16, 32 }, data_types::f32, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_FP32_3D_1 { 5, 3, 3 }, { 5, 3, 5 }, { 5, 3, 1 }, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_FC_FP32_3D_2 { 2, 1, 1 }, { 2, 1, 32 }, { 32, 1, 1 }, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_FC_FP32_3D_3 { 2, 32, 32 }, { 2, 32, 16 }, { 16, 32, 1 }, data_types::f32, format::bfyx, data_types::f32, format::os_iyx_osv16, data_types::f32, format::bfyx
#define CASE_FC_U8S8_1 { 1, 1, 3, 1 }, { 1, 4, 1, 1 }, { 4, 1, 3, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_2 { 2, 1, 3, 1 }, { 2, 4, 1, 1 }, { 4, 1, 3, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::b_fs_yx_fsv4, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3 { 2, 32, 1, 1 }, { 2, 16, 1, 1 }, { 16, 32, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::b_fs_yx_fsv4, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_1 { 2, 32, 1, 3 }, { 2, 32, 1, 16 }, { 16, 3, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_2 { 1, 1, 1, 3 }, { 1, 1, 1, 32 }, { 32, 3, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_3 { 2, 3, 1, 1 }, { 2, 3, 1, 15 }, { 15, 1, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_4 { 1, 512, 1, 1024 }, { 1, 384, 1, 1024 }, { 1024, 1024, 1, 1 }, tensor{ 1 }, tensor{ 0 }, tensor{ 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_1 { 1, 3 }, { 1, 4 }, { 4, 3 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_2 { 2, 3 }, { 2, 4 }, { 4, 3 }, data_types::u8, format::b_fs_yx_fsv4, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3 { 2, 32 }, { 2, 16 }, { 16, 32 }, data_types::u8, format::b_fs_yx_fsv4, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_1 { 2, 32, 3 }, { 2, 32, 16 }, { 16, 3, 1 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_2 { 1, 1, 3 }, { 1, 1, 32 }, { 32, 3, 1 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_3 { 2, 3, 1 }, { 2, 3, 15 }, { 15, 1, 1 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
#define CASE_FC_U8S8_3D_4 { 1, 512, 1024 }, { 1, 384, 1024 }, { 1024, 1024, 1 }, data_types::u8, format::bfyx, data_types::i8, format::oiyx, data_types::f32, format::bfyx
/* ----------------------------------------------------------------------------------------------------- */
/* ---------------------------------------- FC cases --------------------------------------------------- */
@ -225,48 +182,6 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_fp32_bias, ::testing::ValuesIn(std::vec
fully_connected_test_params{ CASE_FC_FP32_3D_3, 2, 3 },
}));
class fc_int8_scale : public FullyConnectedFusingTest {};
TEST_P(fc_int8_scale, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count())),
fully_connected("fc_prim", "input", "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
scale("scale", "fc_prim", "scale_data"),
reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
TEST_P(fc_int8_scale, fp16_scale_out) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count())),
fully_connected("fc_prim", "input", "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
scale("scale", "fc_prim", "scale_data", optional_data_type{ data_types::f16 }),
reorder("reorder_bfyx", "scale", p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_scale, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
fully_connected_test_params{ CASE_FC_U8S8_1, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_2, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3D_1, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3D_2, 2, 3 },
fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 3 },
}));
class fc_int8_quantize_u8 : public FullyConnectedFusingTest {};
TEST_P(fc_int8_quantize_u8, basic) {
auto p = GetParam();
@ -296,8 +211,8 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu_fc, fc_int8_quantize_u8, ::testing::ValuesI
fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 3 },
}));
class fc_int8_scale_quantize_i8 : public FullyConnectedFusingTest {};
TEST_P(fc_int8_scale_quantize_i8, basic) {
class fc_int8_eltwise_quantize_i8 : public FullyConnectedFusingTest {};
TEST_P(fc_int8_eltwise_quantize_i8, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
@ -307,10 +222,10 @@ TEST_P(fc_int8_scale_quantize_i8, basic) {
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
data("out_lo", get_mem(get_single_element_layout(p), -127)),
data("out_hi", get_mem(get_single_element_layout(p), 127)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count() / 255)),
data("eltwise_data", get_mem(get_per_channel_layout(p), 1.0f / get_weights_layout(p).count() / 255)),
fully_connected("fc_prim", "input", "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
scale("scale", "fc_prim", "scale_data"),
quantize("quantize", "scale", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
eltwise("eltwise", {"fc_prim", "eltwise_data"}, eltwise_mode::prod),
quantize("quantize", "eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
);
@ -318,7 +233,7 @@ TEST_P(fc_int8_scale_quantize_i8, basic) {
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_scale_quantize_i8, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_eltwise_quantize_i8, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
fully_connected_test_params{ CASE_FC_U8S8_1, 2, 4 },
fully_connected_test_params{ CASE_FC_U8S8_2, 2, 4 },
fully_connected_test_params{ CASE_FC_U8S8_3, 2, 4 },
@ -327,8 +242,8 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_scale_quantize_i8, ::testing::Valu
fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 4 },
}));
class fc_int8_scale_activation_quantize_i8 : public FullyConnectedFusingTest {};
TEST_P(fc_int8_scale_activation_quantize_i8, basic) {
class fc_int8_eltwise_activation_quantize_i8 : public FullyConnectedFusingTest {};
TEST_P(fc_int8_eltwise_activation_quantize_i8, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p)),
@ -338,11 +253,11 @@ TEST_P(fc_int8_scale_activation_quantize_i8, basic) {
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
data("out_lo", get_mem(get_single_element_layout(p), -127)),
data("out_hi", get_mem(get_single_element_layout(p), 127)),
data("scale_data", get_mem(get_per_channel_layout(p), 1.0f / p.kernel.count() / 255)),
data("eltwise_data", get_mem(get_per_channel_layout(p), 1.0f / get_weights_layout(p).count() / 255)),
fully_connected("fc_prim", "input", "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
scale("scale", "fc_prim", "scale_data"),
activation("activation_scale", "scale", activation_func::exp),
quantize("quantize", "activation_scale", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
eltwise("eltwise", {"fc_prim", "eltwise_data"}, eltwise_mode::prod),
activation("activation_eltwise", "eltwise", activation_func::exp),
quantize("quantize", "activation_eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 255, data_types::i8),
reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32)
);
@ -350,7 +265,7 @@ TEST_P(fc_int8_scale_activation_quantize_i8, basic) {
execute(p);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_scale_activation_quantize_i8, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_eltwise_activation_quantize_i8, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
fully_connected_test_params{ CASE_FC_U8S8_1, 2, 5 },
fully_connected_test_params{ CASE_FC_U8S8_2, 2, 5 },
fully_connected_test_params{ CASE_FC_U8S8_3, 2, 5 },
@ -370,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_scale_activation_quantize_i8, ::te
class fc_int8_inputs_fused_fp32_sum : public FullyConnectedFusingTestOneDNN {};
TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
auto p = GetParam();
auto shift_layout = layout{ p.default_type, p.default_format, tensor{ 1, 1, 1, p.kernel.batch[0] } };
auto shift_layout = layout{ ov::PartialShape{p.weights_shape[0]}, p.default_type, p.default_format };
create_topologies(
input_layout("input", get_input_layout(p)),

View File

@ -0,0 +1,44 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/graph/program.hpp"
#include "data_inst.h"
#include "eltwise_inst.h"
#include "intel_gpu/graph/network.hpp"
#include "pass_manager.h"
#include "to_string_utils.h"
#include "program_wrapper.h"
#include <memory>
using namespace cldnn;
using namespace ::tests;
TEST(prepare_primitive_fusing, fuse_to_fc_dyn) {
auto& engine = get_test_engine();
auto weights = engine.allocate_memory({ ov::PartialShape{ 16, 32 }, data_types::u8, format::bfyx });
auto in_layout = layout{ ov::PartialShape::dynamic(2), data_types::u8, format::bfyx };
topology topology;
topology.add(data("weights", weights));
topology.add(input_layout("input", in_layout));
topology.add(fully_connected("fc", "input", { "weights" }));
topology.add(activation("act", "fc", activation_func::relu));
topology.add(reorder("reorder", "act", format::bfyx, data_types::f32));
build_options build_opts;
auto prog = program::build_program(engine, topology, build_opts, false, true);
layout_optimizer lo(true);
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
ASSERT_NE(prog, nullptr);
ASSERT_FALSE(has_node_with_type<activation>(*prog));
}

View File

@ -1770,3 +1770,202 @@ TEST(fully_connected_3d_onednn_gpu, no_biases_int8) {
}
}
#endif
TEST(fully_connected_gpu, dynamic) {
auto& engine = get_test_engine();
const int32_t input_f = 3, input_b = 1, weight_b = 4;
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f, 1, 1}, data_types::f32,format::bfyx };
auto input_data = engine.allocate_memory(layout{ ov::PartialShape{ input_b, input_f, 1, 1}, data_types::f32,format::bfyx });
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f, 1, 1 }, data_types::f32,format::bfyx });
set_values(input_data, { -0.5f, 2.0f, 0.5f });
set_values(weights_data, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
cldnn::topology topology{
input_layout("input", input_dyn_layout),
data("weights", weights_data),
fully_connected("fc", "input", "weights")
};
build_options options;
options.set_option(build_option::optimize_data(true));
network network(engine, topology, options);
network.set_input_data("input", input_data);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim = outputs.begin()->second.get_memory();
auto out_l = output_prim->get_layout();
ASSERT_EQ(out_l.batch(), input_b);
ASSERT_EQ(out_l.feature(), weight_b);
ASSERT_EQ(out_l.spatial(0), 1);
ASSERT_EQ(out_l.spatial(1), 1);
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
ASSERT_EQ(1.5f, output_ptr[0]);
ASSERT_EQ(0.75f, output_ptr[1]);
ASSERT_EQ(-2.25f, output_ptr[2]);
ASSERT_EQ(3.0f, output_ptr[3]);
}
TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) {
auto& engine = get_test_engine();
const int32_t input_f = 3, input_b = 1, weight_b = 4;
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f, 1, 1}, data_types::f32,format::bfyx };
auto input_actual_layout = layout{ ov::PartialShape{ input_b, input_f, 1, 1}, data_types::f32,format::bfyx };
auto input_data1 = engine.allocate_memory(input_actual_layout);
auto input_data2 = engine.allocate_memory(input_actual_layout);
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f, 1, 1 }, data_types::f32,format::bfyx });
set_values(input_data1, { 0.5f, -2.0f, -0.5f });
set_values(input_data2, { -0.5f, 2.0f, 0.5f });
set_values(weights_data, { 1.5f, 1.0f, 0.5f,
-1.0f, 0.0f, 0.5f,
0.5f, -0.5f, -2.0f,
-0.5f, 1.0f, 1.5f });
cldnn::topology topology{
input_layout("input", input_dyn_layout),
data("weights", weights_data),
fully_connected("fc", "input", "weights")
};
build_options options;
options.set_option(build_option::optimize_data(true));
network network(engine, topology, options);
{
network.set_input_data("input", input_data1);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim = outputs.begin()->second.get_memory();
auto out_l = output_prim->get_layout();
ASSERT_EQ(out_l.batch(), input_b);
ASSERT_EQ(out_l.feature(), weight_b);
ASSERT_EQ(out_l.spatial(0), 1);
ASSERT_EQ(out_l.spatial(1), 1);
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
ASSERT_EQ(-1.5f, output_ptr[0]);
ASSERT_EQ(-0.75f, output_ptr[1]);
ASSERT_EQ(2.25f, output_ptr[2]);
ASSERT_EQ(-3.0f, output_ptr[3]);
}
{
network.set_input_data("input", input_data2);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim = outputs.begin()->second.get_memory();
auto out_l = output_prim->get_layout();
ASSERT_EQ(out_l.batch(), input_b);
ASSERT_EQ(out_l.feature(), weight_b);
ASSERT_EQ(out_l.spatial(0), 1);
ASSERT_EQ(out_l.spatial(1), 1);
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
ASSERT_EQ(1.5f, output_ptr[0]);
ASSERT_EQ(0.75f, output_ptr[1]);
ASSERT_EQ(-2.25f, output_ptr[2]);
ASSERT_EQ(3.0f, output_ptr[3]);
}
}
TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) {
auto& engine = get_test_engine();
const int32_t input_f = 3, weight_b = 4;
auto input_dyn_layout = layout{ ov::PartialShape{ ov::Dimension(1, 10), input_f, 1, 1}, data_types::f32,format::bfyx };
auto input_actual_layout1 = layout{ ov::PartialShape{ 2, input_f, 1, 1}, data_types::f32,format::bfyx};
auto input_actual_layout2 = layout{ ov::PartialShape{ 1, input_f, 1, 1}, data_types::f32,format::bfyx};
auto input_data1 = engine.allocate_memory(input_actual_layout1);
auto input_data2 = engine.allocate_memory(input_actual_layout2);
auto weights_data = engine.allocate_memory({ ov::PartialShape{ weight_b, input_f, 1, 1 }, data_types::f32,format::bfyx});
set_values(input_data1, { 0.5f, -2.0f, -0.5f,
-0.5f, 2.0f, 0.5f });
set_values(input_data2, { -0.5f, 2.0f, 0.5f });
set_values(weights_data, { 1.5f, 1.0f, 0.5f,
-1.0f, 0.0f, 0.5f,
0.5f, -0.5f, -2.0f,
-0.5f, 1.0f, 1.5f });
cldnn::topology topology{
input_layout("input", input_dyn_layout),
data("weights", weights_data),
fully_connected("fc", "input", "weights")
};
build_options options;
options.set_option(build_option::optimize_data(true));
network network(engine, topology, options);
{
network.set_input_data("input", input_data1);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim = outputs.begin()->second.get_memory();
auto out_l = output_prim->get_layout();
ASSERT_EQ(out_l.batch(), 2);
ASSERT_EQ(out_l.feature(), weight_b);
ASSERT_EQ(out_l.spatial(0), 1);
ASSERT_EQ(out_l.spatial(1), 1);
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
ASSERT_EQ(-1.5f, output_ptr[0]);
ASSERT_EQ(-0.75f, output_ptr[1]);
ASSERT_EQ(2.25f, output_ptr[2]);
ASSERT_EQ(-3.0f, output_ptr[3]);
ASSERT_EQ(1.5f, output_ptr[4]);
ASSERT_EQ(0.75f, output_ptr[5]);
ASSERT_EQ(-2.25f, output_ptr[6]);
ASSERT_EQ(3.0f, output_ptr[7]);
}
{
network.set_input_data("input", input_data2);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "fc");
auto output_prim = outputs.begin()->second.get_memory();
auto out_l = output_prim->get_layout();
ASSERT_EQ(out_l.batch(), 1);
ASSERT_EQ(out_l.feature(), weight_b);
ASSERT_EQ(out_l.spatial(0), 1);
ASSERT_EQ(out_l.spatial(1), 1);
cldnn::mem_lock<float> output_ptr (output_prim, get_test_stream());
ASSERT_EQ(1.5f, output_ptr[0]);
ASSERT_EQ(0.75f, output_ptr[1]);
ASSERT_EQ(-2.25f, output_ptr[2]);
ASSERT_EQ(3.0f, output_ptr[3]);
}
}

View File

@ -116,3 +116,49 @@ TEST(shape_of_gpu, bfzyx) {
EXPECT_TRUE(are_equal(expected_results[i], output_ptr[i]));
}
}
TEST(shape_of_gpu, dynamic) {
auto& engine = get_test_engine();
layout in_layout = {ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
layout in_mem_layout0 = {ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx};
layout in_mem_layout1 = {ov::PartialShape{4, 3, 2, 1}, data_types::f32, format::bfyx};
auto input_mem0 = engine.allocate_memory(in_mem_layout0);
auto input_mem1 = engine.allocate_memory(in_mem_layout1);
cldnn::topology topology;
topology.add(input_layout("input", in_layout));
topology.add(shape_of("shape_of", "input", 5, data_types::i32));
network network(engine, topology);
{
network.set_input_data("input", input_mem0);
auto outputs = network.execute();
auto output = outputs.at("shape_of").get_memory();
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
std::vector<int32_t> expected_results = {1, 2, 3, 4};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], output_ptr[i]));
}
}
{
network.set_input_data("input", input_mem1);
auto outputs = network.execute();
auto output = outputs.at("shape_of").get_memory();
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
std::vector<int32_t> expected_results = {4, 3, 2, 1};
for (size_t i = 0; i < expected_results.size(); ++i) {
EXPECT_TRUE(are_equal(expected_results[i], output_ptr[i]));
}
}
}

View File

@ -33,6 +33,7 @@
#include "random_gen.h"
#include "uniform_quantized_real_distribution.hpp"
#include "to_string_utils.h"
#include "program_node.h"
#include <iostream>
#include <limits>
@ -59,6 +60,16 @@ cldnn::engine& get_onednn_test_engine();
#endif
cldnn::stream& get_test_stream();
template<typename T>
bool has_node_with_type(cldnn::program& prog) {
for (auto node : prog.get_processing_order()) {
if (node->is_type<T>())
return true;
}
return false;
}
#define USE_RANDOM_SEED 0
#if USE_RANDOM_SEED
std::random_device rnd_device;